Repository: cilium/ebpf Branch: main Commit: 729742be0e24 Files: 638 Total size: 2.1 MB Directory structure: gitextract_xp4d7it9/ ├── .clang-format ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ └── config.yml │ ├── dependabot.yml │ └── workflows/ │ ├── apidiff.yml │ ├── ci.yml │ └── trusted.yml ├── .gitignore ├── .golangci.yaml ├── .vimto.toml ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── Makefile ├── README.md ├── asm/ │ ├── alu.go │ ├── alu_string.go │ ├── doc.go │ ├── dsl_test.go │ ├── func.go │ ├── func_lin.go │ ├── func_string.go │ ├── func_win.go │ ├── instruction.go │ ├── instruction_test.go │ ├── jump.go │ ├── jump_string.go │ ├── load_store.go │ ├── load_store_string.go │ ├── metadata.go │ ├── metadata_test.go │ ├── opcode.go │ ├── opcode_string.go │ ├── opcode_test.go │ └── register.go ├── attachtype_string.go ├── btf/ │ ├── btf.go │ ├── btf_test.go │ ├── btf_types.go │ ├── btf_types_string.go │ ├── core.go │ ├── core_reloc_test.go │ ├── core_test.go │ ├── dedup.go │ ├── dedup_test.go │ ├── doc.go │ ├── ext_info.go │ ├── ext_info_test.go │ ├── feature.go │ ├── feature_test.go │ ├── format.go │ ├── format_test.go │ ├── fuzz_test.go │ ├── handle.go │ ├── handle_test.go │ ├── kernel.go │ ├── kernel_test.go │ ├── marshal.go │ ├── marshal_test.go │ ├── strings.go │ ├── strings_test.go │ ├── testdata/ │ │ ├── bpf_core_read.h │ │ ├── btf_testmod.btf │ │ ├── btf_testmod.btf.base │ │ ├── fuzz/ │ │ │ └── FuzzExtInfo/ │ │ │ ├── 50a33736610b4a0945179db4c8a88e8247b05fbb25f50ed81e5393baf29bc5bc │ │ │ ├── 72534f53bd90cb52a017013499b11511535c1295bf0e22f856148c02454c323e │ │ │ └── a87a26efa64ed50b598ae8e333301d57d5f234527730f042d68ccc736e90c9fa │ │ ├── relocs-eb.elf │ │ ├── relocs-el.elf │ │ ├── relocs.c │ │ ├── relocs_enum-eb.elf │ │ ├── relocs_enum-el.elf │ │ ├── relocs_enum.c │ │ ├── relocs_read-eb.elf │ │ ├── relocs_read-el.elf │ │ ├── relocs_read.c │ │ ├── relocs_read_tgt-eb.elf │ │ ├── relocs_read_tgt-el.elf │ │ ├── relocs_read_tgt.c │ │ ├── tags-eb.elf │ │ ├── tags-el.elf │ │ └── tags.c │ ├── traversal.go │ ├── traversal_test.go │ ├── types.go │ ├── types_test.go │ ├── unmarshal.go │ ├── unmarshal_test.go │ ├── workarounds.go │ └── workarounds_test.go ├── cmd/ │ └── bpf2go/ │ ├── README.md │ ├── doc.go │ ├── flags.go │ ├── gen/ │ │ ├── compile.go │ │ ├── compile_test.go │ │ ├── doc.go │ │ ├── output.go │ │ ├── output.tpl │ │ ├── output_test.go │ │ ├── target.go │ │ ├── target_test.go │ │ ├── types.go │ │ └── types_test.go │ ├── internal/ │ │ └── module.go │ ├── main.go │ ├── main_test.go │ ├── makedep.go │ ├── makedep_test.go │ ├── test/ │ │ ├── api_test.go │ │ ├── doc.go │ │ ├── test_bpfeb.go │ │ ├── test_bpfeb.o │ │ ├── test_bpfel.go │ │ └── test_bpfel.o │ ├── testdata/ │ │ ├── minimal-eb.elf │ │ ├── minimal-el.elf │ │ └── minimal.c │ ├── tools.go │ └── tools_test.go ├── collection.go ├── collection_other.go ├── collection_test.go ├── collection_windows.go ├── collection_windows_test.go ├── cpu.go ├── cpu_other.go ├── cpu_test.go ├── cpu_windows.go ├── doc.go ├── docs/ │ ├── .gitignore │ ├── Makefile │ ├── Pipfile │ ├── README.md │ ├── ebpf/ │ │ ├── about.md │ │ ├── concepts/ │ │ │ ├── features.md │ │ │ ├── global-variables.md │ │ │ ├── loader.md │ │ │ ├── object-lifecycle.md │ │ │ ├── rlimit.md │ │ │ └── section-naming.md │ │ ├── contributing/ │ │ │ ├── architecture.md │ │ │ ├── index.md │ │ │ ├── new-example.md │ │ │ ├── new-feature.md │ │ │ └── windows.md │ │ ├── guides/ │ │ │ ├── getting-started.md │ │ │ ├── portable-ebpf.md │ │ │ └── windows-support.md │ │ ├── index.md │ │ ├── stylesheets/ │ │ │ └── extra.css │ │ └── users.md │ ├── examples/ │ │ ├── docs.c │ │ ├── docs_test.go │ │ ├── features_test.go │ │ ├── getting_started/ │ │ │ ├── counter.c │ │ │ ├── counter_bpfeb.go │ │ │ ├── counter_bpfeb.o │ │ │ ├── counter_bpfel.go │ │ │ ├── counter_bpfel.o │ │ │ ├── gen.go │ │ │ └── main.go │ │ ├── rlimit_test.go │ │ └── variables/ │ │ ├── gen.go │ │ ├── main.go │ │ ├── variables.c │ │ ├── variables_bpfeb.go │ │ ├── variables_bpfeb.o │ │ ├── variables_bpfel.go │ │ └── variables_bpfel.o │ ├── includes/ │ │ └── glossary.md │ ├── macros.py │ ├── mkdocs.yml │ ├── overrides/ │ │ └── partials/ │ │ └── source-file.html │ └── vars.yml ├── elf_reader.go ├── elf_reader_test.go ├── elf_sections.go ├── example_sock_elf_test.go ├── example_sock_extract_dist_test.go ├── examples/ │ ├── README.md │ ├── cgroup_skb/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── cgroup_skb.c │ │ └── main.go │ ├── fentry/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── fentry.c │ │ └── main.go │ ├── headers/ │ │ ├── LICENSE.BSD-2-Clause │ │ ├── bpf_endian.h │ │ ├── bpf_helper_defs.h │ │ ├── bpf_helpers.h │ │ ├── bpf_tracing.h │ │ ├── common.h │ │ └── update.sh │ ├── kprobe/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── kprobe.c │ │ └── main.go │ ├── kprobe_percpu/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── kprobe_percpu.c │ │ └── main.go │ ├── kprobepin/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── kprobe_pin.c │ │ └── main.go │ ├── map_in_map/ │ │ └── main.go │ ├── ringbuffer/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── ringbuffer.c │ ├── sched_ext/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── sched_ext.c │ ├── tcprtt/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── tcprtt.c │ ├── tcprtt_sockops/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── bpf_sockops.h │ │ ├── main.go │ │ └── tcprtt_sockops.c │ ├── tcx/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── tcx.c │ ├── tracepoint_in_c/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── tracepoint.c │ ├── tracepoint_in_go/ │ │ └── main.go │ ├── uretprobe/ │ │ ├── bpf_x86_bpfel.go │ │ ├── bpf_x86_bpfel.o │ │ ├── main.go │ │ └── uretprobe.c │ ├── xdp/ │ │ ├── bpf_bpfeb.go │ │ ├── bpf_bpfeb.o │ │ ├── bpf_bpfel.go │ │ ├── bpf_bpfel.o │ │ ├── main.go │ │ └── xdp.c │ └── xdp_live_frame/ │ ├── bpf_bpfeb.go │ ├── bpf_bpfeb.o │ ├── bpf_bpfel.go │ ├── bpf_bpfel.o │ ├── main.go │ └── xdp.c ├── features/ │ ├── doc.go │ ├── link.go │ ├── link_test.go │ ├── map.go │ ├── map_test.go │ ├── misc.go │ ├── misc_test.go │ ├── prog.go │ ├── prog_test.go │ └── version.go ├── fuzz_test.go ├── go.mod ├── go.sum ├── helpers_test.go ├── info.go ├── info_test.go ├── internal/ │ ├── cmd/ │ │ ├── genfunctions.awk │ │ ├── gensections.awk │ │ └── genwinfunctions.awk │ ├── deque.go │ ├── deque_test.go │ ├── efw/ │ │ ├── enums.go │ │ ├── error_reporting.go │ │ ├── error_reporting_test.go │ │ ├── fd.go │ │ ├── map.go │ │ ├── module.go │ │ ├── native.go │ │ ├── object.go │ │ ├── proc.go │ │ ├── proc_test.go │ │ ├── program.go │ │ ├── result.go │ │ ├── result_string_windows.go │ │ ├── result_test.go │ │ └── structs.go │ ├── elf.go │ ├── endian_be.go │ ├── endian_le.go │ ├── epoll/ │ │ ├── poller.go │ │ └── poller_test.go │ ├── errors.go │ ├── errors_test.go │ ├── feature.go │ ├── feature_test.go │ ├── io.go │ ├── io_test.go │ ├── kallsyms/ │ │ ├── cache.go │ │ ├── kallsyms.go │ │ ├── kallsyms_test.go │ │ ├── reader.go │ │ └── reader_test.go │ ├── kconfig/ │ │ ├── kconfig.go │ │ ├── kconfig_test.go │ │ └── testdata/ │ │ └── test.kconfig │ ├── linux/ │ │ ├── auxv.go │ │ ├── auxv_test.go │ │ ├── cpu.go │ │ ├── cpu_test.go │ │ ├── doc.go │ │ ├── helper_test.go │ │ ├── kconfig.go │ │ ├── platform.go │ │ ├── statfs.go │ │ ├── statfs_test.go │ │ ├── vdso.go │ │ ├── vdso_test.go │ │ ├── version.go │ │ └── version_test.go │ ├── math.go │ ├── math_test.go │ ├── nil.go │ ├── output.go │ ├── output_test.go │ ├── platform/ │ │ ├── constants.go │ │ ├── constants_test.go │ │ ├── platform.go │ │ ├── platform_linux.go │ │ ├── platform_other.go │ │ └── platform_windows.go │ ├── prog.go │ ├── sys/ │ │ ├── doc.go │ │ ├── fd.go │ │ ├── fd_linux_test.go │ │ ├── fd_other.go │ │ ├── fd_windows.go │ │ ├── pinning_other.go │ │ ├── pinning_windows.go │ │ ├── ptr.go │ │ ├── ptr_32_be.go │ │ ├── ptr_32_le.go │ │ ├── ptr_64.go │ │ ├── ptr_test.go │ │ ├── signals.go │ │ ├── signals_test.go │ │ ├── syscall.go │ │ ├── syscall_other.go │ │ ├── syscall_test.go │ │ ├── syscall_windows.go │ │ └── types.go │ ├── sysenc/ │ │ ├── buffer.go │ │ ├── buffer_test.go │ │ ├── doc.go │ │ ├── layout.go │ │ ├── layout_test.go │ │ ├── marshal.go │ │ └── marshal_test.go │ ├── testdata/ │ │ ├── errno524.log │ │ ├── invalid-R0.log │ │ ├── invalid-ctx-access.log │ │ ├── invalid-member.log │ │ └── issue-43.log │ ├── testutils/ │ │ ├── bpffs_other.go │ │ ├── bpffs_windows.go │ │ ├── bpffs_windows_test.go │ │ ├── cap.go │ │ ├── cgroup.go │ │ ├── chan.go │ │ ├── checkers.go │ │ ├── checkers_test.go │ │ ├── cpu.go │ │ ├── fd_other.go │ │ ├── fd_windows.go │ │ ├── feature.go │ │ ├── feature_other.go │ │ ├── feature_test.go │ │ ├── feature_windows.go │ │ ├── glob.go │ │ ├── netns_linux.go │ │ ├── netns_other.go │ │ ├── programs.go │ │ ├── rlimit.go │ │ ├── seed.go │ │ └── testmain/ │ │ ├── fd_trace.go │ │ ├── main.go │ │ ├── windows.go │ │ └── windows_test.go │ ├── tracefs/ │ │ ├── kprobe.go │ │ ├── kprobe_test.go │ │ ├── perf_event_test.go │ │ ├── probetype_string.go │ │ ├── uprobe.go │ │ └── uprobe_test.go │ ├── unix/ │ │ ├── doc.go │ │ ├── errno_linux.go │ │ ├── errno_linux_test.go │ │ ├── errno_other.go │ │ ├── errno_string_windows.go │ │ ├── errno_test.go │ │ ├── errno_windows.go │ │ ├── error.go │ │ ├── error_test.go │ │ ├── strings_other.go │ │ ├── strings_windows.go │ │ ├── types_linux.go │ │ └── types_other.go │ ├── version.go │ └── version_test.go ├── link/ │ ├── anchor.go │ ├── cgroup.go │ ├── cgroup_test.go │ ├── doc.go │ ├── helpers_windows_test.go │ ├── iter.go │ ├── iter_test.go │ ├── kprobe.go │ ├── kprobe_multi.go │ ├── kprobe_multi_test.go │ ├── kprobe_test.go │ ├── link.go │ ├── link_other.go │ ├── link_other_test.go │ ├── link_test.go │ ├── link_windows.go │ ├── link_windows_test.go │ ├── netfilter.go │ ├── netfilter_test.go │ ├── netkit.go │ ├── netkit_test.go │ ├── netns.go │ ├── netns_test.go │ ├── perf_event.go │ ├── perf_event_test.go │ ├── program.go │ ├── program_test.go │ ├── query.go │ ├── query_test.go │ ├── raw_tracepoint.go │ ├── raw_tracepoint_test.go │ ├── socket_filter.go │ ├── socket_filter_test.go │ ├── struct_ops.go │ ├── struct_ops_test.go │ ├── syscalls.go │ ├── syscalls_test.go │ ├── tcx.go │ ├── tcx_test.go │ ├── tracepoint.go │ ├── tracepoint_test.go │ ├── tracing.go │ ├── tracing_test.go │ ├── uprobe.go │ ├── uprobe_multi.go │ ├── uprobe_multi_test.go │ ├── uprobe_test.go │ ├── xdp.go │ └── xdp_test.go ├── linker.go ├── linker_test.go ├── map.go ├── map_test.go ├── marshaler_example_test.go ├── marshalers.go ├── marshalers_test.go ├── memory.go ├── memory_test.go ├── memory_unsafe.go ├── memory_unsafe_tag.go ├── memory_unsafe_test.go ├── netlify.toml ├── perf/ │ ├── doc.go │ ├── reader.go │ ├── reader_test.go │ ├── ring.go │ └── ring_test.go ├── pin/ │ ├── doc.go │ ├── load.go │ ├── load_test.go │ ├── pin.go │ ├── walk_other.go │ ├── walk_test.go │ └── walk_windows.go ├── prog.go ├── prog_linux_test.go ├── prog_test.go ├── ringbuf/ │ ├── doc.go │ ├── helper_other_test.go │ ├── helper_test.go │ ├── helper_windows_test.go │ ├── reader.go │ ├── reader_other.go │ ├── reader_test.go │ ├── reader_windows.go │ ├── ring.go │ ├── ring_other.go │ └── ring_windows.go ├── rlimit/ │ ├── doc.go │ ├── rlimit_linux.go │ ├── rlimit_linux_test.go │ └── rlimit_other.go ├── scripts/ │ ├── update-efw-deps.sh │ ├── update-kernel-deps.sh │ └── windows/ │ ├── README.md │ ├── Setup.ps1 │ ├── autounattend.xml │ ├── setup-efw.sh │ └── setup.sh ├── struct_ops.go ├── struct_ops_test.go ├── syscalls.go ├── syscalls_test.go ├── testdata/ │ ├── arena-eb.elf │ ├── arena-el.elf │ ├── arena.c │ ├── btf_map_init-eb.elf │ ├── btf_map_init-el.elf │ ├── btf_map_init.c │ ├── common.h │ ├── constants-eb.elf │ ├── constants-el.elf │ ├── constants.c │ ├── docker/ │ │ ├── Dockerfile │ │ ├── IMAGE │ │ ├── Makefile │ │ ├── README.md │ │ ├── VERSION │ │ ├── llvm-snapshot.gpg.key │ │ └── llvm.list │ ├── errors-eb.elf │ ├── errors-el.elf │ ├── errors.c │ ├── fentry_fexit-eb.elf │ ├── fentry_fexit-el.elf │ ├── fentry_fexit.c │ ├── freplace-eb.elf │ ├── freplace-el.elf │ ├── freplace.c │ ├── fwd_decl-eb.elf │ ├── fwd_decl-el.elf │ ├── fwd_decl.c │ ├── invalid-kfunc-eb.elf │ ├── invalid-kfunc-el.elf │ ├── invalid-kfunc.c │ ├── invalid_btf_map_init-eb.elf │ ├── invalid_btf_map_init-el.elf │ ├── invalid_btf_map_init.c │ ├── invalid_map-eb.elf │ ├── invalid_map-el.elf │ ├── invalid_map.c │ ├── invalid_map_static-eb.elf │ ├── invalid_map_static-el.elf │ ├── invalid_map_static.c │ ├── iproute2_map_compat-eb.elf │ ├── iproute2_map_compat-el.elf │ ├── iproute2_map_compat.c │ ├── kconfig-eb.elf │ ├── kconfig-el.elf │ ├── kconfig.c │ ├── kfunc-eb.elf │ ├── kfunc-el.elf │ ├── kfunc-kmod-eb.elf │ ├── kfunc-kmod-el.elf │ ├── kfunc-kmod.c │ ├── kfunc.c │ ├── ksym-eb.elf │ ├── ksym-el.elf │ ├── ksym.c │ ├── linked-el.elf │ ├── linked.h │ ├── linked1-el.elf │ ├── linked1.c │ ├── linked2-el.elf │ ├── linked2.c │ ├── loader-clang-14-eb.elf │ ├── loader-clang-14-el.elf │ ├── loader-clang-17-eb.elf │ ├── loader-clang-17-el.elf │ ├── loader-clang-20-eb.elf │ ├── loader-clang-20-el.elf │ ├── loader.c │ ├── loader.h │ ├── loader_nobtf-eb.elf │ ├── loader_nobtf-el.elf │ ├── loader_nobtf.h │ ├── manyprogs-eb.elf │ ├── manyprogs-el.elf │ ├── manyprogs.c │ ├── map_spin_lock-eb.elf │ ├── map_spin_lock-el.elf │ ├── map_spin_lock.c │ ├── raw_tracepoint-eb.elf │ ├── raw_tracepoint-el.elf │ ├── raw_tracepoint.c │ ├── strings-eb.elf │ ├── strings-el.elf │ ├── strings.c │ ├── struct_ops-eb.elf │ ├── struct_ops-el.elf │ ├── struct_ops.c │ ├── subprog_reloc-eb.elf │ ├── subprog_reloc-el.elf │ ├── subprog_reloc.c │ ├── variables-eb.elf │ ├── variables-el.elf │ ├── variables.c │ └── windows/ │ ├── LICENSE │ └── cgroup_sock_addr.sys ├── types.go ├── types_string.go ├── types_windows.go ├── types_windows_test.go ├── variable.go └── variable_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ --- Language: Cpp BasedOnStyle: LLVM AlignAfterOpenBracket: DontAlign AlignConsecutiveAssignments: true AlignEscapedNewlines: DontAlign # mkdocs annotations in source code are written as trailing comments # and alignment pushes these really far away from the content. AlignTrailingComments: false AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: false AllowAllParametersOfDeclarationOnNextLine: false AllowShortFunctionsOnASingleLine: false BreakBeforeBraces: Attach IndentWidth: 4 KeepEmptyLinesAtTheStartOfBlocks: false TabWidth: 4 UseTab: ForContinuationAndIndentation ColumnLimit: 1000 # Go compiler comments need to stay unindented. CommentPragmas: '^go:.*' # linux/bpf.h needs to be included before bpf/bpf_helpers.h for types like __u64 # and sorting makes this impossible. SortIncludes: false ... ================================================ FILE: .gitattributes ================================================ # Force line ending normalisation * text=auto # Show types.go in the PR diff view by default internal/sys/types.go linguist-generated=false ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug report description: Create a report to help us improve labels: ["bug"] assignees: [] body: - type: markdown attributes: value: "Thank you for reporting a bug. Please fill out the fields below." - type: textarea attributes: label: Describe the bug description: | A clear and concise description of what the bug is. Include what you expected to happen instead. validations: required: true - type: textarea attributes: label: How to reproduce description: "Steps to reproduce the behavior." validations: required: true - type: input id: version attributes: label: Version information description: The output of `go list -m github.com/cilium/ebpf`. placeholder: github.com/cilium/ebpf vX.Y.Z validations: required: true ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ contact_links: - name: Questions url: https://github.com/cilium/ebpf/discussions/categories/q-a about: Please ask and answer questions here. - name: Slack url: https://cilium.slack.com/messages/ebpf-go about: Join our slack. ================================================ FILE: .github/dependabot.yml ================================================ --- version: 2 updates: - package-ecosystem: "pip" directory: "/docs" schedule: interval: "monthly" allow: # Only manage direct dependencies in Pipfile, ignore transient # dependencies only appearing in Pipfile.lock. - dependency-name: "*" dependency-type: "direct" groups: docs: dependency-type: production applies-to: version-updates - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" ================================================ FILE: .github/workflows/apidiff.yml ================================================ name: apidiff on: push: branches: [ "main" ] pull_request: branches: [ "main" ] jobs: go-apidiff: name: go-apidiff runs-on: ubuntu-latest if: github.event_name == 'pull_request' steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-go@v6 with: go-version-file: go.mod - name: Run go-apidiff id: apidiff continue-on-error: true uses: joelanford/go-apidiff@main - name: Create apidiff.json run: | echo '{"id": ${{ github.event.pull_request.number }}, "semver-type": "${{ steps.apidiff.outputs.semver-type }}"}' > apidiff.json - name: Upload apidiff.json uses: actions/upload-artifact@v7 with: name: apidiff path: apidiff.json ================================================ FILE: .github/workflows/ci.yml ================================================ name: ci on: push: branches: [ "main" ] pull_request: branches: [ "main" ] env: TMPDIR: /tmp CI_MAX_KERNEL_VERSION: '6.18' CI_MAX_EFW_VERSION: '1.0.0-rc2' CI_MIN_CLANG_VERSION: '13' go_version: '~1.25' prev_go_version: '~1.24' CGO_ENABLED: '0' # Sync with Pipfile and netlify.toml. python_version: '~3.13' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: build-and-lint: name: Build and Lint runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - name: Run staticcheck uses: dominikh/staticcheck-action@v1 with: version: "master" install-go: false - name: Run golangci-lint uses: golangci/golangci-lint-action@v9.2.0 - name: Generate and format code run: | make clean && make container-all if ! git diff --exit-code; then echo "found unformatted source files, or generated files are not up to date, run 'make'" >&2 exit 1 fi - name: Test bpf2go run: | go test -v ./cmd/bpf2go - name: Build run: go build -v ./... cross-build: name: Cross build runs-on: ubuntu-22.04 needs: build-and-lint timeout-minutes: 10 steps: - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - name: Cross build darwin env: GOOS: darwin run: | go build -v ./... go test -c -o /dev/null ./... >/dev/null - name: Cross build arm32 env: GOARCH: arm GOARM: 6 run: | go build -v ./... go test -c -o /dev/null ./... >/dev/null - name: Cross build wasm env: GOOS: js GOARCH: wasm run: | go build -v ./... go test -c -o /dev/null ./... >/dev/null build-docs: name: Build Documentation runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v6 with: # The mkdocs git-authors plugin needs access to the full revision # history to correctly generate its statistics. fetch-depth: 0 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - name: Set up Python uses: actions/setup-python@v6 with: python-version: '${{ env.python_version }}' cache: 'pipenv' - name: Install pipenv run: pip3 install pipenv - name: Install Dependencies run: pipenv install working-directory: ./docs - name: Build Documentation run: make build working-directory: ./docs test-on-prev-go: name: Run tests on previous stable Go runs-on: ubuntu-latest needs: build-and-lint timeout-minutes: 15 env: CI_KERNEL_SELFTESTS: '/usr/src/linux/tools/testing/selftests/bpf' steps: - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.prev_go_version }}' - run: go install lmb.io/vimto@latest - run: go install gotest.tools/gotestsum@v1.12.3 - run: sudo apt-get update && sudo apt-get install -y --no-install-recommends qemu-system-x86 - run: sudo chmod 0666 /dev/kvm - name: Test env: GOTRACEBACK: crash CGO_ENABLED: 1 # CGo is required by `-race` run: | gotestsum --raw-command --ignore-non-json-output-lines --junitfile junit.xml -- vimto -kernel :stable-selftests -- go test -race -timeout 5m -short -count 1 -json ./... - name: Benchmark run: vimto -kernel :stable-selftests -- go test -short -run '^$' -bench . -benchtime=1x ./... - name: Upload coredumps uses: actions/upload-artifact@v7 if: ${{ failure() }} with: name: cores if-no-files-found: ignore path: | **/core-* **/*.test - name: Upload Test Results if: always() uses: actions/upload-artifact@v7 with: name: Test Results (previous stable Go) path: junit.xml test-on-arm64: name: Run tests on arm64 runs-on: ubuntu-24.04-arm64 needs: build-and-lint timeout-minutes: 15 env: EBPF_TEST_IGNORE_VERSION: 'TestKprobeMulti,TestKprobeMultiErrors,TestKprobeMultiCookie,TestKprobeMultiProgramCall,TestHaveBPFLinkKprobeMulti,TestKprobeSession,TestHaveBPFLinkKprobeSession,TestHaveProgramType/LircMode2' steps: - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - run: go install gotest.tools/gotestsum@v1.12.3 - name: Test # Skip TestGoarches/loong64 because the GH arm64 Go toolchain seems to be weird. # Ubuntu 24.04 crashes when executing TestKfunc. run: gotestsum --ignore-non-json-output-lines --junitfile junit.xml -- -exec 'sudo -E' -short -count 1 -skip '^TestGoarches/loong64$' -skip '^TestKfunc$' -json ./... - name: Benchmark run: go test -exec sudo -short -run '^$' -bench . -benchtime=1x ./... - name: Upload Test Results if: always() uses: actions/upload-artifact@v7 with: name: Test Results (arm64) path: junit.xml - name: Show dmesg if: failure() run: | sudo dmesg linux-test: name: Run tests (Linux) runs-on: ubuntu-latest needs: build-and-lint timeout-minutes: 15 strategy: matrix: tag: - "mainline" - "stable" - "6.12" - "6.6" - "6.1" - "5.15" - "5.10" - "5.4" steps: - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - run: go install gotest.tools/gotestsum@v1.12.3 - run: go install lmb.io/vimto@latest - run: sudo apt-get update && sudo apt-get install -y --no-install-recommends qemu-system-x86 - run: sudo chmod 0666 /dev/kvm - name: Test run: gotestsum --raw-command --ignore-non-json-output-lines --junitfile junit.xml -- vimto -kernel :${{ matrix.tag }} -- go test -short -count 1 -json ./... - name: Upload Test Results if: always() uses: actions/upload-artifact@v7 with: name: Test Results (linux ${{ matrix.tag }}) path: junit.xml windows-test: name: Run tests (Windows) runs-on: windows-2022 needs: build-and-lint timeout-minutes: 15 strategy: matrix: version: - "main" - "1.0.0-rc1" env: # Fix slow Go compile and cache restore # See https://github.com/actions/setup-go/pull/515 GOCACHE: D:\gocache GOMODCACHE: D:\gomodcache # Avoid putting temp on slow C: TEMP: D:\temp CI_EFW_VERSION: "0.21.0" steps: - run: mkdir D:\temp shell: pwsh - name: Get eBPF for Windows download URL id: determine-url uses: actions/github-script@v8 with: script: | if ("${{ matrix.version }}" != "main") { // Use version from matrix to fetch from release const version = "${{ matrix.version }}"; const releaseTag = `Release-v${version}`; console.log(`Fetching release: ${releaseTag}`); // Get the release by tag const release = await github.rest.repos.getReleaseByTag({ owner: 'microsoft', repo: 'ebpf-for-windows', tag: releaseTag }); if (!release.data) { core.setFailed(`Release ${releaseTag} not found`); return; } console.log(`Found release: ${release.data.name}`); // Find the Build.Debug.x64.zip asset const assetName = 'Build.Debug.x64.zip'; const asset = release.data.assets.find(a => a.name === assetName); if (!asset) { console.log('Available assets:', release.data.assets.map(a => a.name)); core.setFailed(`${assetName} asset not found in release ${releaseTag}`); return; } const download_url = asset.browser_download_url; console.log(`Download URL: ${download_url}`); core.setOutput('download_url', download_url); return } // Get the latest successful merge_group run const workflow_runs = await github.rest.actions.listWorkflowRuns({ owner: 'microsoft', repo: 'ebpf-for-windows', workflow_id: 'cicd.yml', event: 'schedule', branch: 'main', status: 'completed', per_page: 1 }); if (workflow_runs.data.workflow_runs.length === 0) { core.setFailed('No successful merge_group workflow runs found'); return; } // Get artifacts from this run const run_id = workflow_runs.data.workflow_runs[0].id; const run_url = workflow_runs.data.workflow_runs[0].html_url; console.log(`Using workflow run: ${run_url}`); // Paginate through all artifacts let allArtifacts = []; let page = 1; while (true) { const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: 'microsoft', repo: 'ebpf-for-windows', run_id: run_id, per_page: 100, page: page }); allArtifacts = allArtifacts.concat(artifacts.data.artifacts); // If we got fewer than 100, we've reached the last page if (artifacts.data.artifacts.length < 100) break; page++; } // Find the specific artifact const artifact = allArtifacts.find(a => a.name === 'Build-x64-Debug'); if (!artifact) { console.log('Available artifacts:', artifacts.data.artifacts.map(a => a.name)); core.setFailed('Build-x64-Debug artifact not found in the workflow run'); return; } // Get the download URL via redirect const response = await github.rest.actions.downloadArtifact({ owner: 'microsoft', repo: 'ebpf-for-windows', artifact_id: artifact.id, archive_format: 'zip', request: { redirect: 'manual' } }); // Extract the location header which contains the actual download URL const download_url = response.url; if (!download_url) { core.setFailed('Failed to get redirect URL from headers'); return; } core.setOutput('download_url', download_url); - name: Download and Install eBPF for Windows shell: pwsh run: | Invoke-WebRequest -Uri "${{ steps.determine-url.outputs.download_url }}" -OutFile "$env:TEMP\efw.zip" if ("${{ matrix.version }}" -eq "main") { # Workflow artifact has nested structure: outer zip contains build-Debug.zip Expand-Archive -Path "$env:TEMP\efw.zip" -DestinationPath "$env:TEMP" Expand-Archive -Path "$env:TEMP\build-Debug.zip" -DestinationPath "$env:TEMP\ebpf" } else { # Release asset is the final zip, extract directly Expand-Archive -Path "$env:TEMP\efw.zip" -DestinationPath "$env:TEMP\ebpf" } $setupScript = Get-ChildItem -Path "$env:TEMP\ebpf" -Filter "install_ebpf.psm1" -Recurse | Select-Object -First 1 if ($setupScript) { Write-Host "Found setup script: $($setupScript.FullName)" $releasePath = "$env:TEMP\ebpf\release" Rename-Item -Path $setupScript.DirectoryName -NewName $releasePath Write-Host "Renamed directory to: $releasePath" Set-Location -Path $releasePath Write-Host "Changed directory to: $(Get-Location)" Import-Module .\\install_ebpf.psm1 -ArgumentList ($pwd, "install.log") -Force Get-PSExec Install-eBPFComponents -KmTracing $false -KmTraceType "file" -TestMode "Normal" } else { Write-Error "Setup script not found in the extracted package" exit 1 } - name: Add eBPF for Windows to PATH shell: pwsh run: echo "C:\Program Files\ebpf-for-windows\" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: '${{ env.go_version }}' - run: go install gotest.tools/gotestsum@v1.12.3 - name: Test run: > gotestsum --raw-command --ignore-non-json-output-lines --junitfile junit.xml -- go test -short -count 1 -json ./... - name: Upload Test Results if: always() uses: actions/upload-artifact@v7 with: name: Test Results (windows ${{ matrix.version }}) path: junit.xml results: name: Results runs-on: ubuntu-latest needs: - build-and-lint - cross-build - build-docs - test-on-prev-go - test-on-arm64 - linux-test - windows-test if: always() steps: - name: Check Results run: | if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then echo "Some checks failed" exit 1 else echo "All checks passed successfully" fi ================================================ FILE: .github/workflows/trusted.yml ================================================ on: workflow_run: workflows: ["apidiff"] types: - completed permissions: pull-requests: write jobs: tag-breaking-change: name: Tag breaking changes runs-on: ubuntu-latest if: github.event.workflow_run.event == 'pull_request' steps: - name: 'Download artifact' uses: actions/github-script@v8 with: script: | var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: ${{github.event.workflow_run.id }}, }); var matchArtifact = artifacts.data.artifacts.filter((artifact) => { return artifact.name == "apidiff" })[0]; var download = await github.rest.actions.downloadArtifact({ owner: context.repo.owner, repo: context.repo.repo, artifact_id: matchArtifact.id, archive_format: 'zip', }); var fs = require('fs'); fs.writeFileSync('${{github.workspace}}/apidiff.zip', Buffer.from(download.data)); - run: unzip apidiff.zip - name: 'Add or remove label' uses: actions/github-script@v8 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | var fs = require('fs'); var jsonData = JSON.parse(fs.readFileSync('apidiff.json', 'utf8')); var issueNumber = jsonData.id; var semverType = jsonData["semver-type"]; if (semverType === 'major') { // Add 'breaking-change' label await github.rest.issues.addLabels({ owner: context.repo.owner, repo: context.repo.repo, issue_number: issueNumber, labels: ['breaking-change'] }); } else { // Remove 'breaking-change' label if it exists try { await github.rest.issues.removeLabel({ owner: context.repo.owner, repo: context.repo.repo, issue_number: issueNumber, name: 'breaking-change' }); } catch (error) { console.log('Label breaking-change not found or already removed'); } } ================================================ FILE: .gitignore ================================================ # Binaries for programs and plugins *.exe *.exe~ *.dll *.so *.dylib *.o !*_bpf*.o # Test binary, build with `go test -c` *.test # Output of the go coverage tool, specifically when used with LiteIDE *.out ================================================ FILE: .golangci.yaml ================================================ version: "2" linters: default: none enable: - depguard - govet - ineffassign - misspell - unused settings: depguard: rules: no-x-sys-unix: files: - '!**/internal/unix/*.go' - '!**/examples/**/*.go' - '!**/docs/**/*.go' deny: - pkg: golang.org/x/sys/unix desc: use internal/unix instead formatters: enable: - gofmt - goimports settings: goimports: local-prefixes: - github.com/cilium/ebpf ================================================ FILE: .vimto.toml ================================================ kernel="ghcr.io/cilium/ci-kernels:stable" smp="cpus=2" memory="1G" user="root" setup=[ "mount -t cgroup2 -o nosuid,noexec,nodev cgroup2 /sys/fs/cgroup", "/bin/sh -c 'modprobe bpf_testmod || true'", "dmesg --clear", ] teardown=[ "dmesg --read-clear", ] ================================================ FILE: CODEOWNERS ================================================ * @cilium/ebpf-lib-maintainers /features/ @rgo3 /link/ @mmat11 /perf/ @florianl /ringbuf/ @florianl /btf/ @dylandreimerink /docs/ @ti-mo # Windows specific code. /docs/**/windows*.md @cilium/ebpf-go-windows-reviewers /internal/efw @cilium/ebpf-go-windows-reviewers windows/ @cilium/ebpf-go-windows-reviewers # Folders *windows*.go @cilium/ebpf-go-windows-reviewers # Go code ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to ebpf-go Want to contribute to ebpf-go? There are a few things you need to know. We wrote a [contribution guide](https://ebpf-go.dev/contributing/) to help you get started. ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2017 Nathan Sweet Copyright (c) 2018, 2019 Cloudflare Copyright (c) 2019 Authors of Cilium Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MAINTAINERS.md ================================================ # Maintainers Maintainers can be found in the [Cilium Maintainers file](https://github.com/cilium/community/blob/main/roles/Maintainers.md) ================================================ FILE: Makefile ================================================ # The development version of clang is distributed as the 'clang' binary, # while stable/released versions have a version number attached. # Pin the default clang to a stable version. CLANG ?= clang-20 STRIP ?= llvm-strip-20 OBJCOPY ?= llvm-objcopy-20 CFLAGS := -O2 -g -Wall -Werror -mcpu=v2 $(CFLAGS) CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/ # Obtain an absolute path to the directory of the Makefile. # Assume the Makefile is in the root of the repository. REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) # Prefer podman if installed, otherwise use docker. # Note: Setting the var at runtime will always override. CONTAINER_ENGINE ?= $(if $(shell command -v podman),podman,docker) # Configure container runtime arguments based on the container engine. CONTAINER_RUN_ARGS := \ --env MAKEFLAGS \ --env BPF2GO_CC="$(CLANG)" \ --env BPF2GO_CFLAGS="$(CFLAGS)" \ --env HOME=/tmp \ -v "${REPODIR}":/ebpf -w /ebpf \ -v "$(shell go env GOCACHE)":/tmp/.cache/go-build \ -v "$(shell go env GOPATH)":/go \ -v "$(shell go env GOMODCACHE)":/go/pkg/mod ifeq ($(CONTAINER_ENGINE), podman) CONTAINER_RUN_ARGS += --log-driver=none --security-opt label=disable else CONTAINER_RUN_ARGS += --user "$(shell stat -c '%u:%g' ${REPODIR})" endif IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE) VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION) TARGETS_EL := \ testdata/linked1 \ testdata/linked2 \ testdata/linked TARGETS := \ testdata/loader-clang-14 \ testdata/loader-clang-17 \ testdata/loader-$(CLANG) \ testdata/loader_nobtf \ testdata/manyprogs \ testdata/btf_map_init \ testdata/invalid_map \ testdata/raw_tracepoint \ testdata/invalid_map_static \ testdata/invalid_btf_map_init \ testdata/strings \ testdata/freplace \ testdata/fentry_fexit \ testdata/iproute2_map_compat \ testdata/map_spin_lock \ testdata/subprog_reloc \ testdata/fwd_decl \ testdata/kconfig \ testdata/ksym \ testdata/kfunc \ testdata/invalid-kfunc \ testdata/kfunc-kmod \ testdata/constants \ testdata/errors \ testdata/variables \ testdata/arena \ testdata/struct_ops \ btf/testdata/relocs \ btf/testdata/relocs_read \ btf/testdata/relocs_read_tgt \ btf/testdata/relocs_enum \ btf/testdata/tags \ cmd/bpf2go/testdata/minimal HEADERS := $(wildcard testdata/*.h) .PHONY: all clean container-all container-shell generate .DEFAULT_TARGET = container-all # Build all ELF binaries using a containerized LLVM toolchain. container-all: +${CONTAINER_ENGINE} run --rm -ti ${CONTAINER_RUN_ARGS} \ "${IMAGE}:${VERSION}" \ $(MAKE) all # (debug) Drop the user into a shell inside the container as root. # Set BPF2GO_ envs to make 'make generate' just work. container-shell: ${CONTAINER_ENGINE} run --rm -ti ${CONTAINER_RUN_ARGS} \ "${IMAGE}:${VERSION}" clean: find "$(CURDIR)" -name "*.elf" -delete find "$(CURDIR)" -name "*.o" -delete format: find . -type f -name "*.c" | xargs clang-format -i all: format testdata update-external-deps ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf $(MAKE) generate generate: go generate -run "stringer" ./... go generate -run "gentypes" ./... go generate -skip "(gentypes|stringer)" ./... testdata: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) $(addsuffix -el.elf,$(TARGETS_EL)) testdata/loader-%-el.elf: testdata/loader.c $(HEADERS) $* $(CFLAGS) -target bpfel -c $< -o $@ $(STRIP) -g $@ testdata/loader-%-eb.elf: testdata/loader.c $(HEADERS) $* $(CFLAGS) -target bpfeb -c $< -o $@ $(STRIP) -g $@ testdata/loader_nobtf-el.elf: testdata/loader.c $(HEADERS) $(CLANG) $(CFLAGS) -g0 -D__NOBTF__ -target bpfel -c $< -o $@ testdata/loader_nobtf-eb.elf: testdata/loader.c $(HEADERS) $(CLANG) $(CFLAGS) -g0 -D__NOBTF__ -target bpfeb -c $< -o $@ %-el.elf: %.c $(HEADERS) $(CLANG) $(CFLAGS) -target bpfel -c $< -o $@ $(STRIP) -g $@ %-eb.elf: %.c $(HEADERS) $(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@ $(STRIP) -g $@ testdata/linked-el.elf: testdata/linked1-el.elf testdata/linked2-el.elf bpftool gen object $@ $^ .PHONY: update-external-deps update-external-deps: ./scripts/update-kernel-deps.sh ./scripts/update-efw-deps.sh ================================================ FILE: README.md ================================================ # eBPF [![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf) ![HoneyGopher](docs/ebpf/ebpf-go.png) ebpf-go is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes. See [ebpf.io](https://ebpf.io) for complementary projects from the wider eBPF ecosystem. ## Getting Started Please take a look at our [Getting Started] guide. [Contributions](https://ebpf-go.dev/contributing) are highly encouraged, as they highlight certain use cases of eBPF and the library, and help shape the future of the project. ## Getting Help The community actively monitors our [GitHub Discussions](https://github.com/cilium/ebpf/discussions) page. Please search for existing threads before starting a new one. Refrain from opening issues on the bug tracker if you're just starting out or if you're not sure if something is a bug in the library code. Alternatively, [join](https://ebpf.io/slack) the [#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack if you have other questions regarding the project. Note that this channel is ephemeral and has its history erased past a certain point, which is less helpful for others running into the same problem later. ## Packages This library includes the following packages: * [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic assembler, allowing you to write eBPF assembly instructions directly within your Go code. (You don't need to use this if you prefer to write your eBPF program in C.) * [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows compiling and embedding eBPF programs written in C within Go code. As well as compiling the C code, it auto-generates Go code for loading and manipulating the eBPF program and map objects. * [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF to various hooks * [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a `PERF_EVENT_ARRAY` * [ringbuf](https://pkg.go.dev/github.com/cilium/ebpf/ringbuf) allows reading from a `BPF_MAP_TYPE_RINGBUF` map * [features](https://pkg.go.dev/github.com/cilium/ebpf/features) implements the equivalent of `bpftool feature probe` for discovering BPF-related kernel features using native Go. * [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift the `RLIMIT_MEMLOCK` constraint on kernels before 5.11. * [btf](https://pkg.go.dev/github.com/cilium/ebpf/btf) allows reading the BPF Type Format. * [pin](https://pkg.go.dev/github.com/cilium/ebpf/pin) provides APIs for working with pinned objects on bpffs. ## Requirements * A version of Go that is [supported by upstream](https://golang.org/doc/devel/release.html#policy) * Linux (amd64, arm64): CI is run against kernel.org LTS releases. >= 4.4 should work but EOL'ed versions are not supported. * Windows (amd64): CI is run against Windows Server 2022. Only the latest eBPF for Windows release is supported. * Other architectures are best effort. 32bit arches are not supported. ## License MIT ### eBPF Gopher The eBPF honeygopher is based on the Go gopher designed by Renee French. [Getting Started]: https://ebpf-go.dev/guides/getting-started/ ================================================ FILE: asm/alu.go ================================================ package asm //go:generate go tool stringer -output alu_string.go -type=Source,Endianness,ALUOp // Source of ALU / ALU64 / Branch operations // // msb lsb // +------------+-+---+ // | op |S|cls| // +------------+-+---+ type Source uint16 const sourceMask OpCode = 0x0008 // Source bitmask const ( // InvalidSource is returned by getters when invoked // on non ALU / branch OpCodes. InvalidSource Source = 0xffff // ImmSource src is from constant ImmSource Source = 0x0000 // RegSource src is from register RegSource Source = 0x0008 ) // The Endianness of a byte swap instruction. type Endianness uint8 const endianMask = sourceMask // Endian flags const ( InvalidEndian Endianness = 0xff // Convert to little endian LE Endianness = 0x00 // Convert to big endian BE Endianness = 0x08 ) // ALUOp are ALU / ALU64 operations // // msb lsb // +-------+----+-+---+ // | EXT | OP |s|cls| // +-------+----+-+---+ type ALUOp uint16 const aluMask OpCode = 0x3ff0 const ( // InvalidALUOp is returned by getters when invoked // on non ALU OpCodes InvalidALUOp ALUOp = 0xffff // Add - addition Add ALUOp = 0x0000 // Sub - subtraction Sub ALUOp = 0x0010 // Mul - multiplication Mul ALUOp = 0x0020 // Div - division Div ALUOp = 0x0030 // SDiv - signed division SDiv ALUOp = Div + 0x0100 // Or - bitwise or Or ALUOp = 0x0040 // And - bitwise and And ALUOp = 0x0050 // LSh - bitwise shift left LSh ALUOp = 0x0060 // RSh - bitwise shift right RSh ALUOp = 0x0070 // Neg - sign/unsign signing bit Neg ALUOp = 0x0080 // Mod - modulo Mod ALUOp = 0x0090 // SMod - signed modulo SMod ALUOp = Mod + 0x0100 // Xor - bitwise xor Xor ALUOp = 0x00a0 // Mov - move value from one place to another Mov ALUOp = 0x00b0 // MovSX8 - move lower 8 bits, sign extended upper bits of target MovSX8 ALUOp = Mov + 0x0100 // MovSX16 - move lower 16 bits, sign extended upper bits of target MovSX16 ALUOp = Mov + 0x0200 // MovSX32 - move lower 32 bits, sign extended upper bits of target MovSX32 ALUOp = Mov + 0x0300 // ArSh - arithmetic shift ArSh ALUOp = 0x00c0 // Swap - endian conversions Swap ALUOp = 0x00d0 ) // HostTo converts from host to another endianness. func HostTo(endian Endianness, dst Register, size Size) Instruction { var imm int64 switch size { case Half: imm = 16 case Word: imm = 32 case DWord: imm = 64 default: return Instruction{OpCode: InvalidOpCode} } return Instruction{ OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)), Dst: dst, Constant: imm, } } // BSwap unconditionally reverses the order of bytes in a register. func BSwap(dst Register, size Size) Instruction { var imm int64 switch size { case Half: imm = 16 case Word: imm = 32 case DWord: imm = 64 default: return Instruction{OpCode: InvalidOpCode} } return Instruction{ OpCode: OpCode(ALU64Class).SetALUOp(Swap), Dst: dst, Constant: imm, } } // Op returns the OpCode for an ALU operation with a given source. func (op ALUOp) Op(source Source) OpCode { return OpCode(ALU64Class).SetALUOp(op).SetSource(source) } // Reg emits `dst (op) src`. func (op ALUOp) Reg(dst, src Register) Instruction { return Instruction{ OpCode: op.Op(RegSource), Dst: dst, Src: src, } } // Imm emits `dst (op) value`. func (op ALUOp) Imm(dst Register, value int32) Instruction { return Instruction{ OpCode: op.Op(ImmSource), Dst: dst, Constant: int64(value), } } // Op32 returns the OpCode for a 32-bit ALU operation with a given source. func (op ALUOp) Op32(source Source) OpCode { return OpCode(ALUClass).SetALUOp(op).SetSource(source) } // Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst. func (op ALUOp) Reg32(dst, src Register) Instruction { return Instruction{ OpCode: op.Op32(RegSource), Dst: dst, Src: src, } } // Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst. func (op ALUOp) Imm32(dst Register, value int32) Instruction { return Instruction{ OpCode: op.Op32(ImmSource), Dst: dst, Constant: int64(value), } } ================================================ FILE: asm/alu_string.go ================================================ // Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT. package asm import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidSource-65535] _ = x[ImmSource-0] _ = x[RegSource-8] } const ( _Source_name_0 = "ImmSource" _Source_name_1 = "RegSource" _Source_name_2 = "InvalidSource" ) func (i Source) String() string { switch { case i == 0: return _Source_name_0 case i == 8: return _Source_name_1 case i == 65535: return _Source_name_2 default: return "Source(" + strconv.FormatInt(int64(i), 10) + ")" } } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidEndian-255] _ = x[LE-0] _ = x[BE-8] } const ( _Endianness_name_0 = "LE" _Endianness_name_1 = "BE" _Endianness_name_2 = "InvalidEndian" ) func (i Endianness) String() string { switch { case i == 0: return _Endianness_name_0 case i == 8: return _Endianness_name_1 case i == 255: return _Endianness_name_2 default: return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")" } } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidALUOp-65535] _ = x[Add-0] _ = x[Sub-16] _ = x[Mul-32] _ = x[Div-48] _ = x[SDiv-304] _ = x[Or-64] _ = x[And-80] _ = x[LSh-96] _ = x[RSh-112] _ = x[Neg-128] _ = x[Mod-144] _ = x[SMod-400] _ = x[Xor-160] _ = x[Mov-176] _ = x[MovSX8-432] _ = x[MovSX16-688] _ = x[MovSX32-944] _ = x[ArSh-192] _ = x[Swap-208] } const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapSDivSModMovSX8MovSX16MovSX32InvalidALUOp" var _ALUOp_map = map[ALUOp]string{ 0: _ALUOp_name[0:3], 16: _ALUOp_name[3:6], 32: _ALUOp_name[6:9], 48: _ALUOp_name[9:12], 64: _ALUOp_name[12:14], 80: _ALUOp_name[14:17], 96: _ALUOp_name[17:20], 112: _ALUOp_name[20:23], 128: _ALUOp_name[23:26], 144: _ALUOp_name[26:29], 160: _ALUOp_name[29:32], 176: _ALUOp_name[32:35], 192: _ALUOp_name[35:39], 208: _ALUOp_name[39:43], 304: _ALUOp_name[43:47], 400: _ALUOp_name[47:51], 432: _ALUOp_name[51:57], 688: _ALUOp_name[57:64], 944: _ALUOp_name[64:71], 65535: _ALUOp_name[71:83], } func (i ALUOp) String() string { if str, ok := _ALUOp_map[i]; ok { return str } return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")" } ================================================ FILE: asm/doc.go ================================================ // Package asm is an assembler for eBPF bytecode. package asm ================================================ FILE: asm/dsl_test.go ================================================ package asm import ( "testing" ) func TestDSL(t *testing.T) { testcases := []struct { name string have Instruction want Instruction }{ {"Call", FnMapLookupElem.Call(), Instruction{OpCode: 0x85, Constant: 1}}, {"Exit", Return(), Instruction{OpCode: 0x95}}, {"LoadAbs", LoadAbs(2, Byte), Instruction{OpCode: 0x30, Constant: 2}}, {"Store", StoreMem(RFP, -4, R0, Word), Instruction{ OpCode: 0x63, Dst: RFP, Src: R0, Offset: -4, }}, {"Add.Imm", Add.Imm(R1, 22), Instruction{OpCode: 0x07, Dst: R1, Constant: 22}}, {"Add.Reg", Add.Reg(R1, R2), Instruction{OpCode: 0x0f, Dst: R1, Src: R2}}, {"Add.Imm32", Add.Imm32(R1, 22), Instruction{ OpCode: 0x04, Dst: R1, Constant: 22, }}, {"JSGT.Imm", JSGT.Imm(R1, 4, "foo"), Instruction{ OpCode: 0x65, Dst: R1, Constant: 4, Offset: -1, }.WithReference("foo")}, {"JSGT.Imm32", JSGT.Imm32(R1, -2, "foo"), Instruction{ OpCode: 0x66, Dst: R1, Constant: -2, Offset: -1, }.WithReference("foo")}, {"JSLT.Reg", JSLT.Reg(R1, R2, "foo"), Instruction{ OpCode: 0xcd, Dst: R1, Src: R2, Offset: -1, }.WithReference("foo")}, {"JSLT.Reg32", JSLT.Reg32(R1, R3, "foo"), Instruction{ OpCode: 0xce, Dst: R1, Src: R3, Offset: -1, }.WithReference("foo")}, } for _, tc := range testcases { if !tc.have.equal(tc.want) { t.Errorf("%s: have %v, want %v", tc.name, tc.have, tc.want) } } } ================================================ FILE: asm/func.go ================================================ package asm import "github.com/cilium/ebpf/internal/platform" //go:generate go tool stringer -output func_string.go -type=BuiltinFunc // BuiltinFunc is a built-in eBPF function. type BuiltinFunc uint32 // BuiltinFuncForPlatform returns a platform specific function constant. // // Use this if the library doesn't provide a constant yet. func BuiltinFuncForPlatform(plat string, value uint32) (BuiltinFunc, error) { return platform.EncodeConstant[BuiltinFunc](plat, value) } // Call emits a function call. func (fn BuiltinFunc) Call() Instruction { return Instruction{ OpCode: OpCode(JumpClass).SetJumpOp(Call), Constant: int64(fn), } } ================================================ FILE: asm/func_lin.go ================================================ // Code generated by internal/cmd/genfunctions.awk; DO NOT EDIT. package asm // Code in this file is derived from Linux, available under the GPL-2.0 WITH Linux-syscall-note. import "github.com/cilium/ebpf/internal/platform" // Built-in functions (Linux). const ( FnUnspec = BuiltinFunc(platform.LinuxTag | 0) //lint:ignore SA4016 consistency FnMapLookupElem = BuiltinFunc(platform.LinuxTag | 1) FnMapUpdateElem = BuiltinFunc(platform.LinuxTag | 2) FnMapDeleteElem = BuiltinFunc(platform.LinuxTag | 3) FnProbeRead = BuiltinFunc(platform.LinuxTag | 4) FnKtimeGetNs = BuiltinFunc(platform.LinuxTag | 5) FnTracePrintk = BuiltinFunc(platform.LinuxTag | 6) FnGetPrandomU32 = BuiltinFunc(platform.LinuxTag | 7) FnGetSmpProcessorId = BuiltinFunc(platform.LinuxTag | 8) FnSkbStoreBytes = BuiltinFunc(platform.LinuxTag | 9) FnL3CsumReplace = BuiltinFunc(platform.LinuxTag | 10) FnL4CsumReplace = BuiltinFunc(platform.LinuxTag | 11) FnTailCall = BuiltinFunc(platform.LinuxTag | 12) FnCloneRedirect = BuiltinFunc(platform.LinuxTag | 13) FnGetCurrentPidTgid = BuiltinFunc(platform.LinuxTag | 14) FnGetCurrentUidGid = BuiltinFunc(platform.LinuxTag | 15) FnGetCurrentComm = BuiltinFunc(platform.LinuxTag | 16) FnGetCgroupClassid = BuiltinFunc(platform.LinuxTag | 17) FnSkbVlanPush = BuiltinFunc(platform.LinuxTag | 18) FnSkbVlanPop = BuiltinFunc(platform.LinuxTag | 19) FnSkbGetTunnelKey = BuiltinFunc(platform.LinuxTag | 20) FnSkbSetTunnelKey = BuiltinFunc(platform.LinuxTag | 21) FnPerfEventRead = BuiltinFunc(platform.LinuxTag | 22) FnRedirect = BuiltinFunc(platform.LinuxTag | 23) FnGetRouteRealm = BuiltinFunc(platform.LinuxTag | 24) FnPerfEventOutput = BuiltinFunc(platform.LinuxTag | 25) FnSkbLoadBytes = BuiltinFunc(platform.LinuxTag | 26) FnGetStackid = BuiltinFunc(platform.LinuxTag | 27) FnCsumDiff = BuiltinFunc(platform.LinuxTag | 28) FnSkbGetTunnelOpt = BuiltinFunc(platform.LinuxTag | 29) FnSkbSetTunnelOpt = BuiltinFunc(platform.LinuxTag | 30) FnSkbChangeProto = BuiltinFunc(platform.LinuxTag | 31) FnSkbChangeType = BuiltinFunc(platform.LinuxTag | 32) FnSkbUnderCgroup = BuiltinFunc(platform.LinuxTag | 33) FnGetHashRecalc = BuiltinFunc(platform.LinuxTag | 34) FnGetCurrentTask = BuiltinFunc(platform.LinuxTag | 35) FnProbeWriteUser = BuiltinFunc(platform.LinuxTag | 36) FnCurrentTaskUnderCgroup = BuiltinFunc(platform.LinuxTag | 37) FnSkbChangeTail = BuiltinFunc(platform.LinuxTag | 38) FnSkbPullData = BuiltinFunc(platform.LinuxTag | 39) FnCsumUpdate = BuiltinFunc(platform.LinuxTag | 40) FnSetHashInvalid = BuiltinFunc(platform.LinuxTag | 41) FnGetNumaNodeId = BuiltinFunc(platform.LinuxTag | 42) FnSkbChangeHead = BuiltinFunc(platform.LinuxTag | 43) FnXdpAdjustHead = BuiltinFunc(platform.LinuxTag | 44) FnProbeReadStr = BuiltinFunc(platform.LinuxTag | 45) FnGetSocketCookie = BuiltinFunc(platform.LinuxTag | 46) FnGetSocketUid = BuiltinFunc(platform.LinuxTag | 47) FnSetHash = BuiltinFunc(platform.LinuxTag | 48) FnSetsockopt = BuiltinFunc(platform.LinuxTag | 49) FnSkbAdjustRoom = BuiltinFunc(platform.LinuxTag | 50) FnRedirectMap = BuiltinFunc(platform.LinuxTag | 51) FnSkRedirectMap = BuiltinFunc(platform.LinuxTag | 52) FnSockMapUpdate = BuiltinFunc(platform.LinuxTag | 53) FnXdpAdjustMeta = BuiltinFunc(platform.LinuxTag | 54) FnPerfEventReadValue = BuiltinFunc(platform.LinuxTag | 55) FnPerfProgReadValue = BuiltinFunc(platform.LinuxTag | 56) FnGetsockopt = BuiltinFunc(platform.LinuxTag | 57) FnOverrideReturn = BuiltinFunc(platform.LinuxTag | 58) FnSockOpsCbFlagsSet = BuiltinFunc(platform.LinuxTag | 59) FnMsgRedirectMap = BuiltinFunc(platform.LinuxTag | 60) FnMsgApplyBytes = BuiltinFunc(platform.LinuxTag | 61) FnMsgCorkBytes = BuiltinFunc(platform.LinuxTag | 62) FnMsgPullData = BuiltinFunc(platform.LinuxTag | 63) FnBind = BuiltinFunc(platform.LinuxTag | 64) FnXdpAdjustTail = BuiltinFunc(platform.LinuxTag | 65) FnSkbGetXfrmState = BuiltinFunc(platform.LinuxTag | 66) FnGetStack = BuiltinFunc(platform.LinuxTag | 67) FnSkbLoadBytesRelative = BuiltinFunc(platform.LinuxTag | 68) FnFibLookup = BuiltinFunc(platform.LinuxTag | 69) FnSockHashUpdate = BuiltinFunc(platform.LinuxTag | 70) FnMsgRedirectHash = BuiltinFunc(platform.LinuxTag | 71) FnSkRedirectHash = BuiltinFunc(platform.LinuxTag | 72) FnLwtPushEncap = BuiltinFunc(platform.LinuxTag | 73) FnLwtSeg6StoreBytes = BuiltinFunc(platform.LinuxTag | 74) FnLwtSeg6AdjustSrh = BuiltinFunc(platform.LinuxTag | 75) FnLwtSeg6Action = BuiltinFunc(platform.LinuxTag | 76) FnRcRepeat = BuiltinFunc(platform.LinuxTag | 77) FnRcKeydown = BuiltinFunc(platform.LinuxTag | 78) FnSkbCgroupId = BuiltinFunc(platform.LinuxTag | 79) FnGetCurrentCgroupId = BuiltinFunc(platform.LinuxTag | 80) FnGetLocalStorage = BuiltinFunc(platform.LinuxTag | 81) FnSkSelectReuseport = BuiltinFunc(platform.LinuxTag | 82) FnSkbAncestorCgroupId = BuiltinFunc(platform.LinuxTag | 83) FnSkLookupTcp = BuiltinFunc(platform.LinuxTag | 84) FnSkLookupUdp = BuiltinFunc(platform.LinuxTag | 85) FnSkRelease = BuiltinFunc(platform.LinuxTag | 86) FnMapPushElem = BuiltinFunc(platform.LinuxTag | 87) FnMapPopElem = BuiltinFunc(platform.LinuxTag | 88) FnMapPeekElem = BuiltinFunc(platform.LinuxTag | 89) FnMsgPushData = BuiltinFunc(platform.LinuxTag | 90) FnMsgPopData = BuiltinFunc(platform.LinuxTag | 91) FnRcPointerRel = BuiltinFunc(platform.LinuxTag | 92) FnSpinLock = BuiltinFunc(platform.LinuxTag | 93) FnSpinUnlock = BuiltinFunc(platform.LinuxTag | 94) FnSkFullsock = BuiltinFunc(platform.LinuxTag | 95) FnTcpSock = BuiltinFunc(platform.LinuxTag | 96) FnSkbEcnSetCe = BuiltinFunc(platform.LinuxTag | 97) FnGetListenerSock = BuiltinFunc(platform.LinuxTag | 98) FnSkcLookupTcp = BuiltinFunc(platform.LinuxTag | 99) FnTcpCheckSyncookie = BuiltinFunc(platform.LinuxTag | 100) FnSysctlGetName = BuiltinFunc(platform.LinuxTag | 101) FnSysctlGetCurrentValue = BuiltinFunc(platform.LinuxTag | 102) FnSysctlGetNewValue = BuiltinFunc(platform.LinuxTag | 103) FnSysctlSetNewValue = BuiltinFunc(platform.LinuxTag | 104) FnStrtol = BuiltinFunc(platform.LinuxTag | 105) FnStrtoul = BuiltinFunc(platform.LinuxTag | 106) FnSkStorageGet = BuiltinFunc(platform.LinuxTag | 107) FnSkStorageDelete = BuiltinFunc(platform.LinuxTag | 108) FnSendSignal = BuiltinFunc(platform.LinuxTag | 109) FnTcpGenSyncookie = BuiltinFunc(platform.LinuxTag | 110) FnSkbOutput = BuiltinFunc(platform.LinuxTag | 111) FnProbeReadUser = BuiltinFunc(platform.LinuxTag | 112) FnProbeReadKernel = BuiltinFunc(platform.LinuxTag | 113) FnProbeReadUserStr = BuiltinFunc(platform.LinuxTag | 114) FnProbeReadKernelStr = BuiltinFunc(platform.LinuxTag | 115) FnTcpSendAck = BuiltinFunc(platform.LinuxTag | 116) FnSendSignalThread = BuiltinFunc(platform.LinuxTag | 117) FnJiffies64 = BuiltinFunc(platform.LinuxTag | 118) FnReadBranchRecords = BuiltinFunc(platform.LinuxTag | 119) FnGetNsCurrentPidTgid = BuiltinFunc(platform.LinuxTag | 120) FnXdpOutput = BuiltinFunc(platform.LinuxTag | 121) FnGetNetnsCookie = BuiltinFunc(platform.LinuxTag | 122) FnGetCurrentAncestorCgroupId = BuiltinFunc(platform.LinuxTag | 123) FnSkAssign = BuiltinFunc(platform.LinuxTag | 124) FnKtimeGetBootNs = BuiltinFunc(platform.LinuxTag | 125) FnSeqPrintf = BuiltinFunc(platform.LinuxTag | 126) FnSeqWrite = BuiltinFunc(platform.LinuxTag | 127) FnSkCgroupId = BuiltinFunc(platform.LinuxTag | 128) FnSkAncestorCgroupId = BuiltinFunc(platform.LinuxTag | 129) FnRingbufOutput = BuiltinFunc(platform.LinuxTag | 130) FnRingbufReserve = BuiltinFunc(platform.LinuxTag | 131) FnRingbufSubmit = BuiltinFunc(platform.LinuxTag | 132) FnRingbufDiscard = BuiltinFunc(platform.LinuxTag | 133) FnRingbufQuery = BuiltinFunc(platform.LinuxTag | 134) FnCsumLevel = BuiltinFunc(platform.LinuxTag | 135) FnSkcToTcp6Sock = BuiltinFunc(platform.LinuxTag | 136) FnSkcToTcpSock = BuiltinFunc(platform.LinuxTag | 137) FnSkcToTcpTimewaitSock = BuiltinFunc(platform.LinuxTag | 138) FnSkcToTcpRequestSock = BuiltinFunc(platform.LinuxTag | 139) FnSkcToUdp6Sock = BuiltinFunc(platform.LinuxTag | 140) FnGetTaskStack = BuiltinFunc(platform.LinuxTag | 141) FnLoadHdrOpt = BuiltinFunc(platform.LinuxTag | 142) FnStoreHdrOpt = BuiltinFunc(platform.LinuxTag | 143) FnReserveHdrOpt = BuiltinFunc(platform.LinuxTag | 144) FnInodeStorageGet = BuiltinFunc(platform.LinuxTag | 145) FnInodeStorageDelete = BuiltinFunc(platform.LinuxTag | 146) FnDPath = BuiltinFunc(platform.LinuxTag | 147) FnCopyFromUser = BuiltinFunc(platform.LinuxTag | 148) FnSnprintfBtf = BuiltinFunc(platform.LinuxTag | 149) FnSeqPrintfBtf = BuiltinFunc(platform.LinuxTag | 150) FnSkbCgroupClassid = BuiltinFunc(platform.LinuxTag | 151) FnRedirectNeigh = BuiltinFunc(platform.LinuxTag | 152) FnPerCpuPtr = BuiltinFunc(platform.LinuxTag | 153) FnThisCpuPtr = BuiltinFunc(platform.LinuxTag | 154) FnRedirectPeer = BuiltinFunc(platform.LinuxTag | 155) FnTaskStorageGet = BuiltinFunc(platform.LinuxTag | 156) FnTaskStorageDelete = BuiltinFunc(platform.LinuxTag | 157) FnGetCurrentTaskBtf = BuiltinFunc(platform.LinuxTag | 158) FnBprmOptsSet = BuiltinFunc(platform.LinuxTag | 159) FnKtimeGetCoarseNs = BuiltinFunc(platform.LinuxTag | 160) FnImaInodeHash = BuiltinFunc(platform.LinuxTag | 161) FnSockFromFile = BuiltinFunc(platform.LinuxTag | 162) FnCheckMtu = BuiltinFunc(platform.LinuxTag | 163) FnForEachMapElem = BuiltinFunc(platform.LinuxTag | 164) FnSnprintf = BuiltinFunc(platform.LinuxTag | 165) FnSysBpf = BuiltinFunc(platform.LinuxTag | 166) FnBtfFindByNameKind = BuiltinFunc(platform.LinuxTag | 167) FnSysClose = BuiltinFunc(platform.LinuxTag | 168) FnTimerInit = BuiltinFunc(platform.LinuxTag | 169) FnTimerSetCallback = BuiltinFunc(platform.LinuxTag | 170) FnTimerStart = BuiltinFunc(platform.LinuxTag | 171) FnTimerCancel = BuiltinFunc(platform.LinuxTag | 172) FnGetFuncIp = BuiltinFunc(platform.LinuxTag | 173) FnGetAttachCookie = BuiltinFunc(platform.LinuxTag | 174) FnTaskPtRegs = BuiltinFunc(platform.LinuxTag | 175) FnGetBranchSnapshot = BuiltinFunc(platform.LinuxTag | 176) FnTraceVprintk = BuiltinFunc(platform.LinuxTag | 177) FnSkcToUnixSock = BuiltinFunc(platform.LinuxTag | 178) FnKallsymsLookupName = BuiltinFunc(platform.LinuxTag | 179) FnFindVma = BuiltinFunc(platform.LinuxTag | 180) FnLoop = BuiltinFunc(platform.LinuxTag | 181) FnStrncmp = BuiltinFunc(platform.LinuxTag | 182) FnGetFuncArg = BuiltinFunc(platform.LinuxTag | 183) FnGetFuncRet = BuiltinFunc(platform.LinuxTag | 184) FnGetFuncArgCnt = BuiltinFunc(platform.LinuxTag | 185) FnGetRetval = BuiltinFunc(platform.LinuxTag | 186) FnSetRetval = BuiltinFunc(platform.LinuxTag | 187) FnXdpGetBuffLen = BuiltinFunc(platform.LinuxTag | 188) FnXdpLoadBytes = BuiltinFunc(platform.LinuxTag | 189) FnXdpStoreBytes = BuiltinFunc(platform.LinuxTag | 190) FnCopyFromUserTask = BuiltinFunc(platform.LinuxTag | 191) FnSkbSetTstamp = BuiltinFunc(platform.LinuxTag | 192) FnImaFileHash = BuiltinFunc(platform.LinuxTag | 193) FnKptrXchg = BuiltinFunc(platform.LinuxTag | 194) FnMapLookupPercpuElem = BuiltinFunc(platform.LinuxTag | 195) FnSkcToMptcpSock = BuiltinFunc(platform.LinuxTag | 196) FnDynptrFromMem = BuiltinFunc(platform.LinuxTag | 197) FnRingbufReserveDynptr = BuiltinFunc(platform.LinuxTag | 198) FnRingbufSubmitDynptr = BuiltinFunc(platform.LinuxTag | 199) FnRingbufDiscardDynptr = BuiltinFunc(platform.LinuxTag | 200) FnDynptrRead = BuiltinFunc(platform.LinuxTag | 201) FnDynptrWrite = BuiltinFunc(platform.LinuxTag | 202) FnDynptrData = BuiltinFunc(platform.LinuxTag | 203) FnTcpRawGenSyncookieIpv4 = BuiltinFunc(platform.LinuxTag | 204) FnTcpRawGenSyncookieIpv6 = BuiltinFunc(platform.LinuxTag | 205) FnTcpRawCheckSyncookieIpv4 = BuiltinFunc(platform.LinuxTag | 206) FnTcpRawCheckSyncookieIpv6 = BuiltinFunc(platform.LinuxTag | 207) FnKtimeGetTaiNs = BuiltinFunc(platform.LinuxTag | 208) FnUserRingbufDrain = BuiltinFunc(platform.LinuxTag | 209) FnCgrpStorageGet = BuiltinFunc(platform.LinuxTag | 210) FnCgrpStorageDelete = BuiltinFunc(platform.LinuxTag | 211) ) ================================================ FILE: asm/func_string.go ================================================ // Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT. package asm import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[FnUnspec-0] _ = x[FnMapLookupElem-1] _ = x[FnMapUpdateElem-2] _ = x[FnMapDeleteElem-3] _ = x[FnProbeRead-4] _ = x[FnKtimeGetNs-5] _ = x[FnTracePrintk-6] _ = x[FnGetPrandomU32-7] _ = x[FnGetSmpProcessorId-8] _ = x[FnSkbStoreBytes-9] _ = x[FnL3CsumReplace-10] _ = x[FnL4CsumReplace-11] _ = x[FnTailCall-12] _ = x[FnCloneRedirect-13] _ = x[FnGetCurrentPidTgid-14] _ = x[FnGetCurrentUidGid-15] _ = x[FnGetCurrentComm-16] _ = x[FnGetCgroupClassid-17] _ = x[FnSkbVlanPush-18] _ = x[FnSkbVlanPop-19] _ = x[FnSkbGetTunnelKey-20] _ = x[FnSkbSetTunnelKey-21] _ = x[FnPerfEventRead-22] _ = x[FnRedirect-23] _ = x[FnGetRouteRealm-24] _ = x[FnPerfEventOutput-25] _ = x[FnSkbLoadBytes-26] _ = x[FnGetStackid-27] _ = x[FnCsumDiff-28] _ = x[FnSkbGetTunnelOpt-29] _ = x[FnSkbSetTunnelOpt-30] _ = x[FnSkbChangeProto-31] _ = x[FnSkbChangeType-32] _ = x[FnSkbUnderCgroup-33] _ = x[FnGetHashRecalc-34] _ = x[FnGetCurrentTask-35] _ = x[FnProbeWriteUser-36] _ = x[FnCurrentTaskUnderCgroup-37] _ = x[FnSkbChangeTail-38] _ = x[FnSkbPullData-39] _ = x[FnCsumUpdate-40] _ = x[FnSetHashInvalid-41] _ = x[FnGetNumaNodeId-42] _ = x[FnSkbChangeHead-43] _ = x[FnXdpAdjustHead-44] _ = x[FnProbeReadStr-45] _ = x[FnGetSocketCookie-46] _ = x[FnGetSocketUid-47] _ = x[FnSetHash-48] _ = x[FnSetsockopt-49] _ = x[FnSkbAdjustRoom-50] _ = x[FnRedirectMap-51] _ = x[FnSkRedirectMap-52] _ = x[FnSockMapUpdate-53] _ = x[FnXdpAdjustMeta-54] _ = x[FnPerfEventReadValue-55] _ = x[FnPerfProgReadValue-56] _ = x[FnGetsockopt-57] _ = x[FnOverrideReturn-58] _ = x[FnSockOpsCbFlagsSet-59] _ = x[FnMsgRedirectMap-60] _ = x[FnMsgApplyBytes-61] _ = x[FnMsgCorkBytes-62] _ = x[FnMsgPullData-63] _ = x[FnBind-64] _ = x[FnXdpAdjustTail-65] _ = x[FnSkbGetXfrmState-66] _ = x[FnGetStack-67] _ = x[FnSkbLoadBytesRelative-68] _ = x[FnFibLookup-69] _ = x[FnSockHashUpdate-70] _ = x[FnMsgRedirectHash-71] _ = x[FnSkRedirectHash-72] _ = x[FnLwtPushEncap-73] _ = x[FnLwtSeg6StoreBytes-74] _ = x[FnLwtSeg6AdjustSrh-75] _ = x[FnLwtSeg6Action-76] _ = x[FnRcRepeat-77] _ = x[FnRcKeydown-78] _ = x[FnSkbCgroupId-79] _ = x[FnGetCurrentCgroupId-80] _ = x[FnGetLocalStorage-81] _ = x[FnSkSelectReuseport-82] _ = x[FnSkbAncestorCgroupId-83] _ = x[FnSkLookupTcp-84] _ = x[FnSkLookupUdp-85] _ = x[FnSkRelease-86] _ = x[FnMapPushElem-87] _ = x[FnMapPopElem-88] _ = x[FnMapPeekElem-89] _ = x[FnMsgPushData-90] _ = x[FnMsgPopData-91] _ = x[FnRcPointerRel-92] _ = x[FnSpinLock-93] _ = x[FnSpinUnlock-94] _ = x[FnSkFullsock-95] _ = x[FnTcpSock-96] _ = x[FnSkbEcnSetCe-97] _ = x[FnGetListenerSock-98] _ = x[FnSkcLookupTcp-99] _ = x[FnTcpCheckSyncookie-100] _ = x[FnSysctlGetName-101] _ = x[FnSysctlGetCurrentValue-102] _ = x[FnSysctlGetNewValue-103] _ = x[FnSysctlSetNewValue-104] _ = x[FnStrtol-105] _ = x[FnStrtoul-106] _ = x[FnSkStorageGet-107] _ = x[FnSkStorageDelete-108] _ = x[FnSendSignal-109] _ = x[FnTcpGenSyncookie-110] _ = x[FnSkbOutput-111] _ = x[FnProbeReadUser-112] _ = x[FnProbeReadKernel-113] _ = x[FnProbeReadUserStr-114] _ = x[FnProbeReadKernelStr-115] _ = x[FnTcpSendAck-116] _ = x[FnSendSignalThread-117] _ = x[FnJiffies64-118] _ = x[FnReadBranchRecords-119] _ = x[FnGetNsCurrentPidTgid-120] _ = x[FnXdpOutput-121] _ = x[FnGetNetnsCookie-122] _ = x[FnGetCurrentAncestorCgroupId-123] _ = x[FnSkAssign-124] _ = x[FnKtimeGetBootNs-125] _ = x[FnSeqPrintf-126] _ = x[FnSeqWrite-127] _ = x[FnSkCgroupId-128] _ = x[FnSkAncestorCgroupId-129] _ = x[FnRingbufOutput-130] _ = x[FnRingbufReserve-131] _ = x[FnRingbufSubmit-132] _ = x[FnRingbufDiscard-133] _ = x[FnRingbufQuery-134] _ = x[FnCsumLevel-135] _ = x[FnSkcToTcp6Sock-136] _ = x[FnSkcToTcpSock-137] _ = x[FnSkcToTcpTimewaitSock-138] _ = x[FnSkcToTcpRequestSock-139] _ = x[FnSkcToUdp6Sock-140] _ = x[FnGetTaskStack-141] _ = x[FnLoadHdrOpt-142] _ = x[FnStoreHdrOpt-143] _ = x[FnReserveHdrOpt-144] _ = x[FnInodeStorageGet-145] _ = x[FnInodeStorageDelete-146] _ = x[FnDPath-147] _ = x[FnCopyFromUser-148] _ = x[FnSnprintfBtf-149] _ = x[FnSeqPrintfBtf-150] _ = x[FnSkbCgroupClassid-151] _ = x[FnRedirectNeigh-152] _ = x[FnPerCpuPtr-153] _ = x[FnThisCpuPtr-154] _ = x[FnRedirectPeer-155] _ = x[FnTaskStorageGet-156] _ = x[FnTaskStorageDelete-157] _ = x[FnGetCurrentTaskBtf-158] _ = x[FnBprmOptsSet-159] _ = x[FnKtimeGetCoarseNs-160] _ = x[FnImaInodeHash-161] _ = x[FnSockFromFile-162] _ = x[FnCheckMtu-163] _ = x[FnForEachMapElem-164] _ = x[FnSnprintf-165] _ = x[FnSysBpf-166] _ = x[FnBtfFindByNameKind-167] _ = x[FnSysClose-168] _ = x[FnTimerInit-169] _ = x[FnTimerSetCallback-170] _ = x[FnTimerStart-171] _ = x[FnTimerCancel-172] _ = x[FnGetFuncIp-173] _ = x[FnGetAttachCookie-174] _ = x[FnTaskPtRegs-175] _ = x[FnGetBranchSnapshot-176] _ = x[FnTraceVprintk-177] _ = x[FnSkcToUnixSock-178] _ = x[FnKallsymsLookupName-179] _ = x[FnFindVma-180] _ = x[FnLoop-181] _ = x[FnStrncmp-182] _ = x[FnGetFuncArg-183] _ = x[FnGetFuncRet-184] _ = x[FnGetFuncArgCnt-185] _ = x[FnGetRetval-186] _ = x[FnSetRetval-187] _ = x[FnXdpGetBuffLen-188] _ = x[FnXdpLoadBytes-189] _ = x[FnXdpStoreBytes-190] _ = x[FnCopyFromUserTask-191] _ = x[FnSkbSetTstamp-192] _ = x[FnImaFileHash-193] _ = x[FnKptrXchg-194] _ = x[FnMapLookupPercpuElem-195] _ = x[FnSkcToMptcpSock-196] _ = x[FnDynptrFromMem-197] _ = x[FnRingbufReserveDynptr-198] _ = x[FnRingbufSubmitDynptr-199] _ = x[FnRingbufDiscardDynptr-200] _ = x[FnDynptrRead-201] _ = x[FnDynptrWrite-202] _ = x[FnDynptrData-203] _ = x[FnTcpRawGenSyncookieIpv4-204] _ = x[FnTcpRawGenSyncookieIpv6-205] _ = x[FnTcpRawCheckSyncookieIpv4-206] _ = x[FnTcpRawCheckSyncookieIpv6-207] _ = x[FnKtimeGetTaiNs-208] _ = x[FnUserRingbufDrain-209] _ = x[FnCgrpStorageGet-210] _ = x[FnCgrpStorageDelete-211] _ = x[WindowsFnMapLookupElem-268435457] _ = x[WindowsFnMapUpdateElem-268435458] _ = x[WindowsFnMapDeleteElem-268435459] _ = x[WindowsFnMapLookupAndDeleteElem-268435460] _ = x[WindowsFnTailCall-268435461] _ = x[WindowsFnGetPrandomU32-268435462] _ = x[WindowsFnKtimeGetBootNs-268435463] _ = x[WindowsFnGetSmpProcessorId-268435464] _ = x[WindowsFnKtimeGetNs-268435465] _ = x[WindowsFnCsumDiff-268435466] _ = x[WindowsFnRingbufOutput-268435467] _ = x[WindowsFnTracePrintk2-268435468] _ = x[WindowsFnTracePrintk3-268435469] _ = x[WindowsFnTracePrintk4-268435470] _ = x[WindowsFnTracePrintk5-268435471] _ = x[WindowsFnMapPushElem-268435472] _ = x[WindowsFnMapPopElem-268435473] _ = x[WindowsFnMapPeekElem-268435474] _ = x[WindowsFnGetCurrentPidTgid-268435475] _ = x[WindowsFnGetCurrentLogonId-268435476] _ = x[WindowsFnIsCurrentAdmin-268435477] _ = x[WindowsFnMemcpyS-268435478] _ = x[WindowsFnMemcmpS-268435479] _ = x[WindowsFnMemset-268435480] _ = x[WindowsFnMemmoveS-268435481] _ = x[WindowsFnGetSocketCookie-268435482] _ = x[WindowsFnStrncpyS-268435483] _ = x[WindowsFnStrncatS-268435484] _ = x[WindowsFnStrnlenS-268435485] _ = x[WindowsFnKtimeGetBootMs-268435486] _ = x[WindowsFnKtimeGetMs-268435487] _ = x[WindowsFnPerfEventOutput-268435488] _ = x[WindowsFnGetCurrentProcessStartKey-268435489] _ = x[WindowsFnGetCurrentThreadCreateTime-268435490] } const ( _BuiltinFunc_name_0 = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDataFnTcpRawGenSyncookieIpv4FnTcpRawGenSyncookieIpv6FnTcpRawCheckSyncookieIpv4FnTcpRawCheckSyncookieIpv6FnKtimeGetTaiNsFnUserRingbufDrainFnCgrpStorageGetFnCgrpStorageDelete" _BuiltinFunc_name_1 = "WindowsFnMapLookupElemWindowsFnMapUpdateElemWindowsFnMapDeleteElemWindowsFnMapLookupAndDeleteElemWindowsFnTailCallWindowsFnGetPrandomU32WindowsFnKtimeGetBootNsWindowsFnGetSmpProcessorIdWindowsFnKtimeGetNsWindowsFnCsumDiffWindowsFnRingbufOutputWindowsFnTracePrintk2WindowsFnTracePrintk3WindowsFnTracePrintk4WindowsFnTracePrintk5WindowsFnMapPushElemWindowsFnMapPopElemWindowsFnMapPeekElemWindowsFnGetCurrentPidTgidWindowsFnGetCurrentLogonIdWindowsFnIsCurrentAdminWindowsFnMemcpySWindowsFnMemcmpSWindowsFnMemsetWindowsFnMemmoveSWindowsFnGetSocketCookieWindowsFnStrncpySWindowsFnStrncatSWindowsFnStrnlenSWindowsFnKtimeGetBootMsWindowsFnKtimeGetMsWindowsFnPerfEventOutputWindowsFnGetCurrentProcessStartKeyWindowsFnGetCurrentThreadCreateTime" ) var ( _BuiltinFunc_index_0 = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3021, 3045, 3071, 3097, 3112, 3130, 3146, 3165} _BuiltinFunc_index_1 = [...]uint16{0, 22, 44, 66, 97, 114, 136, 159, 185, 204, 221, 243, 264, 285, 306, 327, 347, 366, 386, 412, 438, 461, 477, 493, 508, 525, 549, 566, 583, 600, 623, 642, 666, 700, 735} ) func (i BuiltinFunc) String() string { switch { case i <= 211: return _BuiltinFunc_name_0[_BuiltinFunc_index_0[i]:_BuiltinFunc_index_0[i+1]] case 268435457 <= i && i <= 268435490: i -= 268435457 return _BuiltinFunc_name_1[_BuiltinFunc_index_1[i]:_BuiltinFunc_index_1[i+1]] default: return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")" } } ================================================ FILE: asm/func_win.go ================================================ // Code generated by internal/cmd/genwinfunctions.awk; DO NOT EDIT. package asm // Code in this file is derived from eBPF for Windows, available under the MIT License. import "github.com/cilium/ebpf/internal/platform" // Built-in functions (Windows). const ( WindowsFnMapLookupElem = BuiltinFunc(platform.WindowsTag | 1) WindowsFnMapUpdateElem = BuiltinFunc(platform.WindowsTag | 2) WindowsFnMapDeleteElem = BuiltinFunc(platform.WindowsTag | 3) WindowsFnMapLookupAndDeleteElem = BuiltinFunc(platform.WindowsTag | 4) WindowsFnTailCall = BuiltinFunc(platform.WindowsTag | 5) WindowsFnGetPrandomU32 = BuiltinFunc(platform.WindowsTag | 6) WindowsFnKtimeGetBootNs = BuiltinFunc(platform.WindowsTag | 7) WindowsFnGetSmpProcessorId = BuiltinFunc(platform.WindowsTag | 8) WindowsFnKtimeGetNs = BuiltinFunc(platform.WindowsTag | 9) WindowsFnCsumDiff = BuiltinFunc(platform.WindowsTag | 10) WindowsFnRingbufOutput = BuiltinFunc(platform.WindowsTag | 11) WindowsFnTracePrintk2 = BuiltinFunc(platform.WindowsTag | 12) WindowsFnTracePrintk3 = BuiltinFunc(platform.WindowsTag | 13) WindowsFnTracePrintk4 = BuiltinFunc(platform.WindowsTag | 14) WindowsFnTracePrintk5 = BuiltinFunc(platform.WindowsTag | 15) WindowsFnMapPushElem = BuiltinFunc(platform.WindowsTag | 16) WindowsFnMapPopElem = BuiltinFunc(platform.WindowsTag | 17) WindowsFnMapPeekElem = BuiltinFunc(platform.WindowsTag | 18) WindowsFnGetCurrentPidTgid = BuiltinFunc(platform.WindowsTag | 19) WindowsFnGetCurrentLogonId = BuiltinFunc(platform.WindowsTag | 20) WindowsFnIsCurrentAdmin = BuiltinFunc(platform.WindowsTag | 21) WindowsFnMemcpyS = BuiltinFunc(platform.WindowsTag | 22) WindowsFnMemcmpS = BuiltinFunc(platform.WindowsTag | 23) WindowsFnMemset = BuiltinFunc(platform.WindowsTag | 24) WindowsFnMemmoveS = BuiltinFunc(platform.WindowsTag | 25) WindowsFnGetSocketCookie = BuiltinFunc(platform.WindowsTag | 26) WindowsFnStrncpyS = BuiltinFunc(platform.WindowsTag | 27) WindowsFnStrncatS = BuiltinFunc(platform.WindowsTag | 28) WindowsFnStrnlenS = BuiltinFunc(platform.WindowsTag | 29) WindowsFnKtimeGetBootMs = BuiltinFunc(platform.WindowsTag | 30) WindowsFnKtimeGetMs = BuiltinFunc(platform.WindowsTag | 31) WindowsFnPerfEventOutput = BuiltinFunc(platform.WindowsTag | 32) WindowsFnGetCurrentProcessStartKey = BuiltinFunc(platform.WindowsTag | 33) WindowsFnGetCurrentThreadCreateTime = BuiltinFunc(platform.WindowsTag | 34) ) ================================================ FILE: asm/instruction.go ================================================ package asm import ( "crypto/sha1" "crypto/sha256" "encoding/binary" "encoding/hex" "errors" "fmt" "hash" "io" "math" "sort" "strings" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) // InstructionSize is the size of a BPF instruction in bytes const InstructionSize = 8 // RawInstructionOffset is an offset in units of raw BPF instructions. type RawInstructionOffset uint64 var ErrUnreferencedSymbol = errors.New("unreferenced symbol") var ErrUnsatisfiedMapReference = errors.New("unsatisfied map reference") var ErrUnsatisfiedProgramReference = errors.New("unsatisfied program reference") // Bytes returns the offset of an instruction in bytes. func (rio RawInstructionOffset) Bytes() uint64 { return uint64(rio) * InstructionSize } // Instruction is a single eBPF instruction. type Instruction struct { OpCode OpCode Dst Register Src Register Offset int16 Constant int64 // Metadata contains optional metadata about this instruction. Metadata Metadata } // Width returns how many raw BPF instructions the Instruction occupies within // an instruction stream. For example, an Instruction encoding a 64-bit value // will typically occupy 2 raw instructions, while a 32-bit constant can be // encoded in a single raw instruction. func (ins *Instruction) Width() RawInstructionOffset { return RawInstructionOffset(ins.OpCode.rawInstructions()) } // Unmarshal decodes a BPF instruction. func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder, platform string) error { data := make([]byte, InstructionSize) if _, err := io.ReadFull(r, data); err != nil { return err } ins.OpCode = OpCode(data[0]) regs := data[1] switch bo { case binary.LittleEndian: ins.Dst, ins.Src = Register(regs&0xF), Register(regs>>4) case binary.BigEndian: ins.Dst, ins.Src = Register(regs>>4), Register(regs&0xf) } ins.Offset = int16(bo.Uint16(data[2:4])) // Convert to int32 before widening to int64 // to ensure the signed bit is carried over. ins.Constant = int64(int32(bo.Uint32(data[4:8]))) if ins.IsBuiltinCall() { if ins.Constant >= 0 { // Leave negative constants from the instruction stream // unchanged. These are sometimes used as placeholders for later // patching. // This relies on not having a valid platform tag with a high bit set. fn, err := BuiltinFuncForPlatform(platform, uint32(ins.Constant)) if err != nil { return err } ins.Constant = int64(fn) } } else if ins.OpCode.Class().IsALU() { switch ins.OpCode.ALUOp() { case Div: if ins.Offset == 1 { ins.OpCode = ins.OpCode.SetALUOp(SDiv) ins.Offset = 0 } case Mod: if ins.Offset == 1 { ins.OpCode = ins.OpCode.SetALUOp(SMod) ins.Offset = 0 } case Mov: switch ins.Offset { case 8: ins.OpCode = ins.OpCode.SetALUOp(MovSX8) ins.Offset = 0 case 16: ins.OpCode = ins.OpCode.SetALUOp(MovSX16) ins.Offset = 0 case 32: ins.OpCode = ins.OpCode.SetALUOp(MovSX32) ins.Offset = 0 } } } else if ins.OpCode.Class() == StXClass && ins.OpCode.Mode() == AtomicMode { // For atomic ops, part of the opcode is stored in the // constant field. Shift over 8 bytes so we can OR with the actual opcode and // apply `atomicMask` to avoid merging unknown bits that may be added in the future. ins.OpCode |= (OpCode((ins.Constant << 8)) & atomicMask) } if !ins.OpCode.IsDWordLoad() { return nil } // Pull another instruction from the stream to retrieve the second // half of the 64-bit immediate value. if _, err := io.ReadFull(r, data); err != nil { // No Wrap, to avoid io.EOF clash return errors.New("64bit immediate is missing second half") } // Require that all fields other than the value are zero. if bo.Uint32(data[0:4]) != 0 { return errors.New("64bit immediate has non-zero fields") } cons1 := uint32(ins.Constant) cons2 := int32(bo.Uint32(data[4:8])) ins.Constant = int64(cons2)<<32 | int64(cons1) return nil } // Marshal encodes a BPF instruction. func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) { if ins.OpCode == InvalidOpCode { return 0, errors.New("invalid opcode") } isDWordLoad := ins.OpCode.IsDWordLoad() cons := int32(ins.Constant) if isDWordLoad { // Encode least significant 32bit first for 64bit operations. cons = int32(uint32(ins.Constant)) } regs, err := newBPFRegisters(ins.Dst, ins.Src, bo) if err != nil { return 0, fmt.Errorf("can't marshal registers: %s", err) } if ins.IsBuiltinCall() { fn := BuiltinFunc(ins.Constant) plat, value := platform.DecodeConstant(fn) if plat != platform.Native { return 0, fmt.Errorf("function %s (%s): %w", fn, plat, internal.ErrNotSupportedOnOS) } cons = int32(value) } else if ins.OpCode.Class().IsALU() { newOffset := int16(0) switch ins.OpCode.ALUOp() { case SDiv: ins.OpCode = ins.OpCode.SetALUOp(Div) newOffset = 1 case SMod: ins.OpCode = ins.OpCode.SetALUOp(Mod) newOffset = 1 case MovSX8: ins.OpCode = ins.OpCode.SetALUOp(Mov) newOffset = 8 case MovSX16: ins.OpCode = ins.OpCode.SetALUOp(Mov) newOffset = 16 case MovSX32: ins.OpCode = ins.OpCode.SetALUOp(Mov) newOffset = 32 } if newOffset != 0 && ins.Offset != 0 { return 0, fmt.Errorf("extended ALU opcodes should have an .Offset of 0: %s", ins) } ins.Offset = newOffset } else if atomic := ins.OpCode.AtomicOp(); atomic != InvalidAtomic { ins.OpCode = ins.OpCode &^ atomicMask ins.Constant = int64(atomic >> 8) } op, err := ins.OpCode.bpfOpCode() if err != nil { return 0, err } data := make([]byte, InstructionSize) data[0] = op data[1] = byte(regs) bo.PutUint16(data[2:4], uint16(ins.Offset)) bo.PutUint32(data[4:8], uint32(cons)) if _, err := w.Write(data); err != nil { return 0, err } if !isDWordLoad { return InstructionSize, nil } // The first half of the second part of a double-wide instruction // must be zero. The second half carries the value. bo.PutUint32(data[0:4], 0) bo.PutUint32(data[4:8], uint32(ins.Constant>>32)) if _, err := w.Write(data); err != nil { return 0, err } return 2 * InstructionSize, nil } // AssociateMap associates a Map with this Instruction. // // Implicitly clears the Instruction's Reference field. // // Returns an error if the Instruction is not a map load. func (ins *Instruction) AssociateMap(m FDer) error { if !ins.IsLoadFromMap() { return errors.New("not a load from a map") } ins.Metadata.Set(referenceMeta{}, nil) ins.Metadata.Set(mapMeta{}, m) return nil } func (ins *Instruction) encodeMapFD(fd int) { // Preserve the offset value for direct map loads. offset := uint64(ins.Constant) & (math.MaxUint32 << 32) rawFd := uint64(uint32(fd)) ins.Constant = int64(offset | rawFd) } // mapFd returns the map file descriptor stored in the 32 least significant // bits of ins' Constant field. func (ins *Instruction) mapFd() int { return int(int32(ins.Constant)) } // RewriteMapOffset changes the offset of a direct load from a map. // // Returns an error if the instruction is not a direct load. func (ins *Instruction) RewriteMapOffset(offset uint32) error { if !ins.OpCode.IsDWordLoad() { return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) } if ins.Src != PseudoMapValue { return errors.New("not a direct load from a map") } fd := uint64(ins.Constant) & math.MaxUint32 ins.Constant = int64(uint64(offset)<<32 | fd) return nil } func (ins *Instruction) mapOffset() uint32 { return uint32(uint64(ins.Constant) >> 32) } // IsLoadFromMap returns true if the instruction loads from a map. // // This covers both loading the map pointer and direct map value loads. func (ins *Instruction) IsLoadFromMap() bool { return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue) } // IsFunctionCall returns true if the instruction calls another BPF function. // // This is not the same thing as a BPF helper call. func (ins *Instruction) IsFunctionCall() bool { return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall } // IsKfuncCall returns true if the instruction calls a kfunc. // // This is not the same thing as a BPF helper call. func (ins *Instruction) IsKfuncCall() bool { return ins.OpCode.JumpOp() == Call && ins.Src == PseudoKfuncCall } // IsLoadOfFunctionPointer returns true if the instruction loads a function pointer. func (ins *Instruction) IsLoadOfFunctionPointer() bool { return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc } // IsFunctionReference returns true if the instruction references another BPF // function, either by invoking a Call jump operation or by loading a function // pointer. func (ins *Instruction) IsFunctionReference() bool { return ins.IsFunctionCall() || ins.IsLoadOfFunctionPointer() } // IsBuiltinCall returns true if the instruction is a built-in call, i.e. BPF helper call. func (ins *Instruction) IsBuiltinCall() bool { return ins.OpCode.JumpOp() == Call && ins.Src == R0 && ins.Dst == R0 } // IsConstantLoad returns true if the instruction loads a constant of the // given size. func (ins *Instruction) IsConstantLoad(size Size) bool { return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0 } // Format implements fmt.Formatter. func (ins Instruction) Format(f fmt.State, c rune) { if c != 'v' { fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c) return } op := ins.OpCode if op == InvalidOpCode { fmt.Fprint(f, "INVALID") return } // Omit trailing space for Exit if op.JumpOp() == Exit { fmt.Fprint(f, op) return } if ins.IsLoadFromMap() { fd := ins.mapFd() m := ins.Map() switch ins.Src { case PseudoMapFD: if m != nil { fmt.Fprintf(f, "LoadMapPtr dst: %s map: %s", ins.Dst, m) } else { fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd) } case PseudoMapValue: if m != nil { fmt.Fprintf(f, "LoadMapValue dst: %s, map: %s off: %d", ins.Dst, m, ins.mapOffset()) } else { fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset()) } } goto ref } switch cls := op.Class(); { case cls.isLoadOrStore(): fmt.Fprintf(f, "%v ", op) switch op.Mode() { case ImmMode: fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) case AbsMode: fmt.Fprintf(f, "imm: %d", ins.Constant) case IndMode: fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) case MemMode, MemSXMode: fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) case AtomicMode: fmt.Fprintf(f, "dst: %s src: %s off: %d", ins.Dst, ins.Src, ins.Offset) } case cls.IsALU(): fmt.Fprintf(f, "%v", op) if op == Swap.Op(ImmSource) { fmt.Fprintf(f, "%d", ins.Constant) } fmt.Fprintf(f, " dst: %s ", ins.Dst) switch { case op.ALUOp() == Swap: break case op.Source() == ImmSource: fmt.Fprintf(f, "imm: %d", ins.Constant) default: fmt.Fprintf(f, "src: %s", ins.Src) } case cls.IsJump(): fmt.Fprintf(f, "%v ", op) switch jop := op.JumpOp(); jop { case Call: switch ins.Src { case PseudoCall: // bpf-to-bpf call fmt.Fprint(f, ins.Constant) case PseudoKfuncCall: // kfunc call fmt.Fprintf(f, "Kfunc(%d)", ins.Constant) default: fmt.Fprint(f, BuiltinFunc(ins.Constant)) } case Ja: if ins.OpCode.Class() == Jump32Class { fmt.Fprintf(f, "imm: %d", ins.Constant) } else { fmt.Fprintf(f, "off: %d", ins.Offset) } default: fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) if op.Source() == ImmSource { fmt.Fprintf(f, "imm: %d", ins.Constant) } else { fmt.Fprintf(f, "src: %s", ins.Src) } } default: fmt.Fprintf(f, "%v ", op) } ref: if ins.Reference() != "" { fmt.Fprintf(f, " <%s>", ins.Reference()) } } func (ins Instruction) equal(other Instruction) bool { return ins.OpCode == other.OpCode && ins.Dst == other.Dst && ins.Src == other.Src && ins.Offset == other.Offset && ins.Constant == other.Constant } // Size returns the amount of bytes ins would occupy in binary form. func (ins Instruction) Size() uint64 { return uint64(InstructionSize * ins.OpCode.rawInstructions()) } // WithMetadata sets the given Metadata on the Instruction. e.g. to copy // Metadata from another Instruction when replacing it. func (ins Instruction) WithMetadata(meta Metadata) Instruction { ins.Metadata = meta return ins } type symbolMeta struct{} // WithSymbol marks the Instruction as a Symbol, which other Instructions // can point to using corresponding calls to WithReference. func (ins Instruction) WithSymbol(name string) Instruction { ins.Metadata.Set(symbolMeta{}, name) return ins } // Symbol returns the value ins has been marked with using WithSymbol, // otherwise returns an empty string. A symbol is often an Instruction // at the start of a function body. func (ins Instruction) Symbol() string { sym, _ := ins.Metadata.Get(symbolMeta{}).(string) return sym } type referenceMeta struct{} // WithReference makes ins reference another Symbol or map by name. func (ins Instruction) WithReference(ref string) Instruction { ins.Metadata.Set(referenceMeta{}, ref) return ins } // Reference returns the Symbol or map name referenced by ins, if any. func (ins Instruction) Reference() string { ref, _ := ins.Metadata.Get(referenceMeta{}).(string) return ref } type mapMeta struct{} // Map returns the Map referenced by ins, if any. // An Instruction will contain a Map if e.g. it references an existing, // pinned map that was opened during ELF loading. func (ins Instruction) Map() FDer { fd, _ := ins.Metadata.Get(mapMeta{}).(FDer) return fd } type sourceMeta struct{} // WithSource adds source information about the Instruction. func (ins Instruction) WithSource(src fmt.Stringer) Instruction { ins.Metadata.Set(sourceMeta{}, src) return ins } // Source returns source information about the Instruction. The field is // present when the compiler emits BTF line info about the Instruction and // usually contains the line of source code responsible for it. func (ins Instruction) Source() fmt.Stringer { str, _ := ins.Metadata.Get(sourceMeta{}).(fmt.Stringer) return str } // A Comment can be passed to Instruction.WithSource to add a comment // to an instruction. type Comment string func (s Comment) String() string { return string(s) } // FDer represents a resource tied to an underlying file descriptor. // Used as a stand-in for e.g. ebpf.Map since that type cannot be // imported here and FD() is the only method we rely on. type FDer interface { FD() int } // Instructions is an eBPF program. type Instructions []Instruction // AppendInstructions decodes [Instruction] from r and appends them to insns. func AppendInstructions(insns Instructions, r io.Reader, bo binary.ByteOrder, platform string) (Instructions, error) { var offset uint64 for { var ins Instruction err := ins.Unmarshal(r, bo, platform) if errors.Is(err, io.EOF) { break } if err != nil { return nil, fmt.Errorf("offset %d: %w", offset, err) } insns = append(insns, ins) offset += ins.Size() } return insns, nil } // Name returns the name of the function insns belongs to, if any. func (insns Instructions) Name() string { if len(insns) == 0 { return "" } return insns[0].Symbol() } func (insns Instructions) String() string { return fmt.Sprint(insns) } // Size returns the amount of bytes insns would occupy in binary form. func (insns Instructions) Size() uint64 { var sum uint64 for _, ins := range insns { sum += ins.Size() } return sum } // AssociateMap updates all Instructions that Reference the given symbol // to point to an existing Map m instead. // // Returns ErrUnreferencedSymbol error if no references to symbol are found // in insns. If symbol is anything else than the symbol name of map (e.g. // a bpf2bpf subprogram), an error is returned. func (insns Instructions) AssociateMap(symbol string, m FDer) error { if symbol == "" { return errors.New("empty symbol") } var found bool for i := range insns { ins := &insns[i] if ins.Reference() != symbol { continue } if err := ins.AssociateMap(m); err != nil { return err } found = true } if !found { return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol) } return nil } // SymbolOffsets returns the set of symbols and their offset in // the instructions. func (insns Instructions) SymbolOffsets() (map[string]int, error) { offsets := make(map[string]int) for i, ins := range insns { if ins.Symbol() == "" { continue } if _, ok := offsets[ins.Symbol()]; ok { return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol()) } offsets[ins.Symbol()] = i } return offsets, nil } // FunctionReferences returns a set of symbol names these Instructions make // bpf-to-bpf calls to. func (insns Instructions) FunctionReferences() []string { calls := make(map[string]struct{}) for _, ins := range insns { if ins.Constant != -1 { // BPF-to-BPF calls have -1 constants. continue } if ins.Reference() == "" { continue } if !ins.IsFunctionReference() { continue } calls[ins.Reference()] = struct{}{} } result := make([]string, 0, len(calls)) for call := range calls { result = append(result, call) } sort.Strings(result) return result } // ReferenceOffsets returns the set of references and their offset in // the instructions. func (insns Instructions) ReferenceOffsets() map[string][]int { offsets := make(map[string][]int) for i, ins := range insns { if ins.Reference() == "" { continue } offsets[ins.Reference()] = append(offsets[ins.Reference()], i) } return offsets } // Format implements fmt.Formatter. // // You can control indentation of symbols by // specifying a width. Setting a precision controls the indentation of // instructions. // The default character is a tab, which can be overridden by specifying // the ' ' space flag. func (insns Instructions) Format(f fmt.State, c rune) { if c != 's' && c != 'v' { fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c) return } // Precision is better in this case, because it allows // specifying 0 padding easily. padding, ok := f.Precision() if !ok { padding = 1 } indent := strings.Repeat("\t", padding) if f.Flag(' ') { indent = strings.Repeat(" ", padding) } symPadding, ok := f.Width() if !ok { symPadding = padding - 1 } if symPadding < 0 { symPadding = 0 } symIndent := strings.Repeat("\t", symPadding) if f.Flag(' ') { symIndent = strings.Repeat(" ", symPadding) } // Guess how many digits we need at most, by assuming that all instructions // are double wide. highestOffset := len(insns) * 2 offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset)))) iter := insns.Iterate() for iter.Next() { if iter.Ins.Symbol() != "" { fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol()) } if src := iter.Ins.Source(); src != nil { line := strings.TrimSpace(src.String()) if line != "" { fmt.Fprintf(f, "%s%*s; %s\n", indent, offsetWidth, " ", line) } } fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, iter.Offset, iter.Ins) } } // Marshal encodes a BPF program into the kernel format. // // insns may be modified if there are unresolved jumps or bpf2bpf calls. // // Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction // without a matching Symbol Instruction within insns. func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error { if err := insns.encodeFunctionReferences(); err != nil { return err } if err := insns.encodeMapPointers(); err != nil { return err } for i, ins := range insns { if _, err := ins.Marshal(w, bo); err != nil { return fmt.Errorf("instruction %d: %w", i, err) } } return nil } // Tag calculates the kernel tag for a series of instructions. // // It mirrors bpf_prog_calc_tag in the kernel and so can be compared // to ProgramInfo.Tag to figure out whether a loaded program matches // certain instructions. // // Deprecated: The value produced by this method no longer matches tags produced // by the kernel since Linux 6.18. Use [Instructions.HasTag] instead. func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) { // We cannot determine which hashing function to use without probing the kernel. // So use the legacy SHA-1 implementation and deprecate this method. return insns.tagSha1(bo) } // HasTag returns true if the given tag matches the kernel tag of insns. func (insns Instructions) HasTag(tag string, bo binary.ByteOrder) (bool, error) { sha256Tag, err := insns.tagSha256(bo) if err != nil { return false, fmt.Errorf("hashing sha256: %w", err) } if tag == sha256Tag { return true, nil } sha1Tag, err := insns.tagSha1(bo) if err != nil { return false, fmt.Errorf("hashing sha1: %w", err) } return tag == sha1Tag, nil } // tagSha1 calculates the kernel tag for a series of instructions. // // It mirrors bpf_prog_calc_tag in kernels up to v6.18 and can be compared to // ProgramInfo.Tag to figure out whether a loaded Program matches insns. func (insns Instructions) tagSha1(bo binary.ByteOrder) (string, error) { h := sha1.New() if err := insns.hash(h, bo); err != nil { return "", err } return hex.EncodeToString(h.Sum(nil)[:sys.BPF_TAG_SIZE]), nil } // tagSha256 calculates the kernel tag for a series of instructions. // // It mirrors bpf_prog_calc_tag in the kernel and can be compared to // ProgramInfo.Tag to figure out whether a loaded Program matches insns. func (insns Instructions) tagSha256(bo binary.ByteOrder) (string, error) { h := sha256.New() if err := insns.hash(h, bo); err != nil { return "", err } return hex.EncodeToString(h.Sum(nil)[:sys.BPF_TAG_SIZE]), nil } // hash calculates the hash of the instruction stream. Map load instructions // are zeroed out, since these contain map file descriptors or pointers to // maps, which will be different from load to load and would make the hash // non-deterministic. func (insns Instructions) hash(h hash.Hash, bo binary.ByteOrder) error { for i, ins := range insns { if ins.IsLoadFromMap() { ins.Constant = 0 } _, err := ins.Marshal(h, bo) if err != nil { return fmt.Errorf("instruction %d: %w", i, err) } } return nil } // encodeFunctionReferences populates the Offset (or Constant, depending on // the instruction type) field of instructions with a Reference field to point // to the offset of the corresponding instruction with a matching Symbol field. // // Only Reference Instructions that are either jumps or BPF function references // (calls or function pointer loads) are populated. // // Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction // without at least one corresponding Symbol Instruction within insns. func (insns Instructions) encodeFunctionReferences() error { // Index the offsets of instructions tagged as a symbol. symbolOffsets := make(map[string]RawInstructionOffset) iter := insns.Iterate() for iter.Next() { ins := iter.Ins if ins.Symbol() == "" { continue } if _, ok := symbolOffsets[ins.Symbol()]; ok { return fmt.Errorf("duplicate symbol %s", ins.Symbol()) } symbolOffsets[ins.Symbol()] = iter.Offset } // Find all instructions tagged as references to other symbols. // Depending on the instruction type, populate their constant or offset // fields to point to the symbol they refer to within the insn stream. iter = insns.Iterate() for iter.Next() { i := iter.Index offset := iter.Offset ins := iter.Ins if ins.Reference() == "" { continue } switch { case ins.IsFunctionReference() && ins.Constant == -1, ins.OpCode == Ja.opCode(Jump32Class, ImmSource) && ins.Constant == -1: symOffset, ok := symbolOffsets[ins.Reference()] if !ok { return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference) } ins.Constant = int64(symOffset - offset - 1) case ins.OpCode.Class().IsJump() && ins.Offset == -1: symOffset, ok := symbolOffsets[ins.Reference()] if !ok { return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference) } ins.Offset = int16(symOffset - offset - 1) } } return nil } // encodeMapPointers finds all Map Instructions and encodes their FDs // into their Constant fields. func (insns Instructions) encodeMapPointers() error { iter := insns.Iterate() for iter.Next() { ins := iter.Ins if !ins.IsLoadFromMap() { continue } m := ins.Map() if m == nil { continue } fd := m.FD() if fd < 0 { return fmt.Errorf("map %s: %w", m, sys.ErrClosedFd) } ins.encodeMapFD(m.FD()) } return nil } // Iterate allows iterating a BPF program while keeping track of // various offsets. // // Modifying the instruction slice will lead to undefined behaviour. func (insns Instructions) Iterate() *InstructionIterator { return &InstructionIterator{insns: insns} } // InstructionIterator iterates over a BPF program. type InstructionIterator struct { insns Instructions // The instruction in question. Ins *Instruction // The index of the instruction in the original instruction slice. Index int // The offset of the instruction in raw BPF instructions. This accounts // for double-wide instructions. Offset RawInstructionOffset } // Next returns true as long as there are any instructions remaining. func (iter *InstructionIterator) Next() bool { if len(iter.insns) == 0 { return false } if iter.Ins != nil { iter.Index++ iter.Offset += RawInstructionOffset(iter.Ins.OpCode.rawInstructions()) } iter.Ins = &iter.insns[0] iter.insns = iter.insns[1:] return true } type bpfRegisters uint8 func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) { switch bo { case binary.LittleEndian: return bpfRegisters((src << 4) | (dst & 0xF)), nil case binary.BigEndian: return bpfRegisters((dst << 4) | (src & 0xF)), nil default: return 0, fmt.Errorf("unrecognized ByteOrder %T", bo) } } ================================================ FILE: asm/instruction_test.go ================================================ package asm import ( "bytes" "encoding/binary" "encoding/hex" "fmt" "io" "math" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/platform" ) var test64bitImmProg = []byte{ // r0 = math.MinInt32 - 1 0x18, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, } func TestRead64bitImmediate(t *testing.T) { var ins Instruction err := ins.Unmarshal(bytes.NewReader(test64bitImmProg), binary.LittleEndian, platform.Linux) if err != nil { t.Fatal(err) } if c := ins.Constant; c != math.MinInt32-1 { t.Errorf("Expected immediate to be %v, got %v", int64(math.MinInt32)-1, c) } } func BenchmarkRead64bitImmediate(b *testing.B) { r := &bytes.Reader{} for b.Loop() { r.Reset(test64bitImmProg) var ins Instruction if err := ins.Unmarshal(r, binary.LittleEndian, platform.Linux); err != nil { b.Fatal(err) } } } func TestWrite64bitImmediate(t *testing.T) { insns := Instructions{ LoadImm(R0, math.MinInt32-1, DWord), } var buf bytes.Buffer if err := insns.Marshal(&buf, binary.LittleEndian); err != nil { t.Fatal(err) } if prog := buf.Bytes(); !bytes.Equal(prog, test64bitImmProg) { t.Errorf("Marshalled program does not match:\n%s", hex.Dump(prog)) } } func BenchmarkWrite64BitImmediate(b *testing.B) { ins := LoadImm(R0, math.MinInt32-1, DWord) var buf bytes.Buffer for b.Loop() { buf.Reset() if _, err := ins.Marshal(&buf, binary.LittleEndian); err != nil { b.Fatal(err) } } } func TestAppendInstructions(t *testing.T) { r := bytes.NewReader(test64bitImmProg) insns, err := AppendInstructions(nil, r, binary.LittleEndian, platform.Linux) qt.Assert(t, qt.IsNil(err)) if len(insns) != 1 { t.Fatalf("Expected one instruction, got %d", len(insns)) } } func TestSignedJump(t *testing.T) { insns := Instructions{ JSGT.Imm(R0, -1, "foo"), } insns[0].Offset = 1 err := insns.Marshal(io.Discard, binary.LittleEndian) if err != nil { t.Error("Can't marshal signed jump:", err) } } func TestInstructionLoadMapValue(t *testing.T) { ins := LoadMapValue(R0, 1, 123) if !ins.IsLoadFromMap() { t.Error("isLoadFromMap returns false") } if fd := ins.mapFd(); fd != 1 { t.Error("Expected map fd to be 1, got", fd) } if off := ins.mapOffset(); off != 123 { t.Fatal("Expected map offset to be 123 after changing the pointer, got", off) } } func TestInstructionWithMetadata(t *testing.T) { ins := LoadImm(R0, 123, DWord).WithSymbol("abc") ins2 := LoadImm(R0, 567, DWord).WithMetadata(ins.Metadata) if want, got := "abc", ins2.Symbol(); want != got { t.Fatalf("unexpected Symbol value on ins2: want: %s, got: %s", want, got) } if want, got := ins.Metadata, ins2.Metadata; want != got { t.Fatal("expected ins and isn2 Metadata to match") } } func TestReadCallToNegativeOne(t *testing.T) { raw := []byte{ 0x85, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, } var ins Instruction err := ins.Unmarshal(bytes.NewReader(raw), binary.LittleEndian, platform.Linux) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(ins.Constant, -1)) } // You can use format flags to change the way an eBPF // program is stringified. func ExampleInstructions_Format() { insns := Instructions{ FnMapLookupElem.Call().WithSymbol("my_func").WithSource(Comment("bpf_map_lookup_elem()")), LoadImm(R0, 42, DWord).WithSource(Comment("abc = 42")), Return(), } fmt.Println("Default format:") fmt.Printf("%v\n", insns) fmt.Println("Don't indent instructions:") fmt.Printf("%.0v\n", insns) fmt.Println("Indent using spaces:") fmt.Printf("% v\n", insns) fmt.Println("Control symbol indentation:") fmt.Printf("%2v\n", insns) // Output: Default format: // my_func: // ; bpf_map_lookup_elem() // 0: Call FnMapLookupElem // ; abc = 42 // 1: LdImmDW dst: r0 imm: 42 // 3: Exit // // Don't indent instructions: // my_func: // ; bpf_map_lookup_elem() // 0: Call FnMapLookupElem // ; abc = 42 // 1: LdImmDW dst: r0 imm: 42 // 3: Exit // // Indent using spaces: // my_func: // ; bpf_map_lookup_elem() // 0: Call FnMapLookupElem // ; abc = 42 // 1: LdImmDW dst: r0 imm: 42 // 3: Exit // // Control symbol indentation: // my_func: // ; bpf_map_lookup_elem() // 0: Call FnMapLookupElem // ; abc = 42 // 1: LdImmDW dst: r0 imm: 42 // 3: Exit } func TestReadSrcDst(t *testing.T) { testSrcDstProg := []byte{ // on little-endian: r0 = r1 // on big-endian: be: r1 = r0 0xbf, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, } testcases := []struct { bo binary.ByteOrder dst, src Register }{ {binary.BigEndian, R1, R0}, {binary.LittleEndian, R0, R1}, } for _, tc := range testcases { t.Run(tc.bo.String(), func(t *testing.T) { var ins Instruction err := ins.Unmarshal(bytes.NewReader(testSrcDstProg), tc.bo, platform.Linux) if err != nil { t.Fatal(err) } if ins.Dst != tc.dst { t.Errorf("Expected destination to be %v, got %v", tc.dst, ins.Dst) } if ins.Src != tc.src { t.Errorf("Expected source to be %v, got %v", tc.src, ins.Src) } }) } } func TestInstructionIterator(t *testing.T) { insns := Instructions{ LoadImm(R0, 0, Word), LoadImm(R0, 0, DWord), Return(), } offsets := []RawInstructionOffset{0, 1, 3} iter := insns.Iterate() for i := 0; i < len(insns); i++ { if !iter.Next() { t.Fatalf("Expected %dth call to Next to return true", i) } if iter.Ins == nil { t.Errorf("Expected iter.Ins to be non-nil") } if iter.Index != i { t.Errorf("Expected iter.Index to be %d, got %d", i, iter.Index) } if iter.Offset != offsets[i] { t.Errorf("Expected iter.Offset to be %d, got %d", offsets[i], iter.Offset) } } } func TestMetadataCopyOnWrite(t *testing.T) { // Setting metadata should copy Instruction and modify the metadata pointer // of the new object without touching the old Instruction. // Reference ins := Ja.Label("my_func") ins2 := ins.WithReference("my_func2") qt.Assert(t, qt.Equals(ins.Reference(), "my_func"), qt.Commentf("WithReference updated ins")) qt.Assert(t, qt.Equals(ins2.Reference(), "my_func2"), qt.Commentf("WithReference didn't update ins2")) // Symbol ins = Ja.Label("").WithSymbol("my_sym") ins2 = ins.WithSymbol("my_sym2") qt.Assert(t, qt.Equals(ins.Symbol(), "my_sym"), qt.Commentf("WithSymbol updated ins")) qt.Assert(t, qt.Equals(ins2.Symbol(), "my_sym2"), qt.Commentf("WithSymbol didn't update ins2")) // Map ins = LoadMapPtr(R1, 0) ins2 = ins testMap := testFDer(1) qt.Assert(t, qt.IsNil(ins2.AssociateMap(testMap)), qt.Commentf("failed to associate map with ins2")) qt.Assert(t, qt.IsNil(ins.Map()), qt.Commentf("AssociateMap updated ins")) qt.Assert(t, qt.Equals[FDer](ins2.Map(), testMap), qt.Commentf("AssociateMap didn't update ins2")) } type testFDer int func (t testFDer) FD() int { return int(t) } func TestAtomics(t *testing.T) { rawInsns := []byte{ 0xc3, 0x21, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) += w2 0xc3, 0x21, 0x01, 0x00, 0x50, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) &= w2 0xc3, 0x21, 0x01, 0x00, 0xa0, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) ^= w2 0xc3, 0x21, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, // lock *(u32 *)(r1 + 0x1) |= w2 0xdb, 0x21, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) += r2 0xdb, 0x21, 0x01, 0x00, 0x50, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) &= r2 0xdb, 0x21, 0x01, 0x00, 0xa0, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) ^= r2 0xdb, 0x21, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, // lock *(u64 *)(r1 + 0x1) |= r2 0xc3, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // w0 = atomic_fetch_add((u32 *)(r1 + 0x0), w0) 0xc3, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, // w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 0xc3, 0x01, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, // w0 = atomic_fetch_xor((u32 *)(r1 + 0x0), w0) 0xc3, 0x01, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, // w0 = atomic_fetch_or((u32 *)(r1 + 0x0), w0) 0xdb, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // r0 = atomic_fetch_add((u64 *)(r1 + 0x0), r0) 0xdb, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, // r0 = atomic_fetch_and((u64 *)(r1 + 0x0), r0) 0xdb, 0x01, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, // r0 = atomic_fetch_xor((u64 *)(r1 + 0x0), r0) 0xdb, 0x01, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, // r0 = atomic_fetch_or((u64 *)(r1 + 0x0), r0) 0xc3, 0x01, 0x00, 0x00, 0xe1, 0x00, 0x00, 0x00, // w0 = xchg32_32(r1 + 0x0, w0) 0xdb, 0x01, 0x00, 0x00, 0xe1, 0x00, 0x00, 0x00, // r0 = xchg_64(r1 + 0x0, r0) 0xc3, 0x11, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, // w0 = cmpxchg32_32(r1 + 0x0, w0, w1) 0xdb, 0x11, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, // r0 = cmpxchg_64(r1 + 0x0, r0, r1) } insns, err := AppendInstructions(nil, bytes.NewReader(rawInsns), binary.LittleEndian, platform.Linux) if err != nil { t.Fatal(err) } lines := []string{ "StXAtomicAddW dst: r1 src: r2 off: 1", "StXAtomicAndW dst: r1 src: r2 off: 1", "StXAtomicXorW dst: r1 src: r2 off: 1", "StXAtomicOrW dst: r1 src: r2 off: 1", "StXAtomicAddDW dst: r1 src: r2 off: 1", "StXAtomicAndDW dst: r1 src: r2 off: 1", "StXAtomicXorDW dst: r1 src: r2 off: 1", "StXAtomicOrDW dst: r1 src: r2 off: 1", "StXAtomicFetchAddW dst: r1 src: r0 off: 0", "StXAtomicFetchAndW dst: r1 src: r0 off: 0", "StXAtomicFetchXorW dst: r1 src: r0 off: 0", "StXAtomicFetchOrW dst: r1 src: r0 off: 0", "StXAtomicFetchAddDW dst: r1 src: r0 off: 0", "StXAtomicFetchAndDW dst: r1 src: r0 off: 0", "StXAtomicFetchXorDW dst: r1 src: r0 off: 0", "StXAtomicFetchOrDW dst: r1 src: r0 off: 0", "StXAtomicXchgW dst: r1 src: r0 off: 0", "StXAtomicXchgDW dst: r1 src: r0 off: 0", "StXAtomicCmpXchgW dst: r1 src: r1 off: 0", "StXAtomicCmpXchgDW dst: r1 src: r1 off: 0", } for i, ins := range insns { if want, got := lines[i], fmt.Sprint(ins); want != got { t.Errorf("Expected %q, got %q", want, got) } } // Marshal and unmarshal again to make sure the instructions are // still valid. var buf bytes.Buffer err = insns.Marshal(&buf, binary.LittleEndian) if err != nil { t.Fatal(err) } if !bytes.Equal(buf.Bytes(), rawInsns) { t.Error("Expected instructions to be equal after marshalling") } } func TestISAv4(t *testing.T) { rawInsns := []byte{ 0xd7, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, // r1 = bswap16 r1 0xd7, 0x02, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, // r2 = bswap32 r2 0xd7, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, // r3 = bswap64 r3 0x91, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // r1 = *(s8 *)(r4 + 0x0) 0x89, 0x52, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // r2 = *(s16 *)(r5 + 0x4) 0x81, 0x63, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, // r3 = *(s32 *)(r6 + 0x8) 0x91, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // r1 = *(s8 *)(r4 + 0x0) 0x89, 0x52, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // r2 = *(s16 *)(r5 + 0x4) 0xbf, 0x41, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, // r1 = (s8)r4 0xbf, 0x52, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, // r2 = (s16)r5 0xbf, 0x63, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, // r3 = (s32)r6 0xbc, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, // w1 = (s8)w3 0xbc, 0x42, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, // w2 = (s16)w4 0x06, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, // gotol +3 0x3f, 0x31, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // r1 s/= r3 0x9f, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // r2 s%= r4 0x3c, 0x31, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // w1 s/= w3 0x9c, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // w2 s%= w4 0xd3, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u8 *)(r1 + 0x0)) 0xcb, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u16 *)(r1 + 0x0)) 0xc3, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // w0 = load_acquire((u32 *)(r1 + 0x0)) 0xdb, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // r0 = load_acquire((u64 *)(r1 + 0x0)) 0xd3, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u8 *)(r1 + 0x0), w2) 0xcb, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u16 *)(r1 + 0x0), w2) 0xc3, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u32 *)(r1 + 0x0), w2) 0xdb, 0x21, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, // store_release((u64 *)(r1 + 0x0), r2) } insns, err := AppendInstructions(nil, bytes.NewReader(rawInsns), binary.LittleEndian, platform.Linux) if err != nil { t.Fatal(err) } lines := []string{ "BSwap16 dst: r1 ", "BSwap32 dst: r2 ", "BSwap64 dst: r3 ", "LdXMemSXB dst: r1 src: r4 off: 0 imm: 0", "LdXMemSXH dst: r2 src: r5 off: 4 imm: 0", "LdXMemSXW dst: r3 src: r6 off: 8 imm: 0", "LdXMemSXB dst: r1 src: r4 off: 0 imm: 0", "LdXMemSXH dst: r2 src: r5 off: 4 imm: 0", "MovSX8Reg dst: r1 src: r4", "MovSX16Reg dst: r2 src: r5", "MovSX32Reg dst: r3 src: r6", "MovSX8Reg32 dst: r1 src: r3", "MovSX16Reg32 dst: r2 src: r4", "Ja32 imm: 3", "SDivReg dst: r1 src: r3", "SModReg dst: r2 src: r4", "SDivReg32 dst: r1 src: r3", "SModReg32 dst: r2 src: r4", "StXAtomicLdAcqB dst: r0 src: r1 off: 0", "StXAtomicLdAcqH dst: r0 src: r1 off: 0", "StXAtomicLdAcqW dst: r0 src: r1 off: 0", "StXAtomicLdAcqDW dst: r0 src: r1 off: 0", "StXAtomicStRelB dst: r1 src: r2 off: 0", "StXAtomicStRelH dst: r1 src: r2 off: 0", "StXAtomicStRelW dst: r1 src: r2 off: 0", "StXAtomicStRelDW dst: r1 src: r2 off: 0", } for i, ins := range insns { if want, got := lines[i], fmt.Sprint(ins); want != got { t.Errorf("Expected %q, got %q", want, got) } } // Marshal and unmarshal again to make sure the instructions are // still valid. var buf bytes.Buffer err = insns.Marshal(&buf, binary.LittleEndian) if err != nil { t.Fatal(err) } if !bytes.Equal(buf.Bytes(), rawInsns) { t.Error("Expected instructions to be equal after marshalling") } } func TestLongJumpPatching(t *testing.T) { insns := Instructions{ LongJump("exit"), Xor.Reg(R0, R0), Xor.Reg(R0, R0), Xor.Reg(R0, R0), Return().WithSymbol("exit"), } err := insns.encodeFunctionReferences() if err != nil { t.Fatal(err) } if insns[0].Constant != 3 { t.Errorf("Expected offset to be 3, got %d", insns[1].Constant) } } ================================================ FILE: asm/jump.go ================================================ package asm //go:generate go tool stringer -output jump_string.go -type=JumpOp // JumpOp affect control flow. // // msb lsb // +----+-+---+ // |OP |s|cls| // +----+-+---+ type JumpOp uint8 const jumpMask OpCode = 0xf0 const ( // InvalidJumpOp is returned by getters when invoked // on non branch OpCodes InvalidJumpOp JumpOp = 0xff // Ja jumps by offset unconditionally Ja JumpOp = 0x00 // JEq jumps by offset if r == imm JEq JumpOp = 0x10 // JGT jumps by offset if r > imm JGT JumpOp = 0x20 // JGE jumps by offset if r >= imm JGE JumpOp = 0x30 // JSet jumps by offset if r & imm JSet JumpOp = 0x40 // JNE jumps by offset if r != imm JNE JumpOp = 0x50 // JSGT jumps by offset if signed r > signed imm JSGT JumpOp = 0x60 // JSGE jumps by offset if signed r >= signed imm JSGE JumpOp = 0x70 // Call builtin or user defined function from imm Call JumpOp = 0x80 // Exit ends execution, with value in r0 Exit JumpOp = 0x90 // JLT jumps by offset if r < imm JLT JumpOp = 0xa0 // JLE jumps by offset if r <= imm JLE JumpOp = 0xb0 // JSLT jumps by offset if signed r < signed imm JSLT JumpOp = 0xc0 // JSLE jumps by offset if signed r <= signed imm JSLE JumpOp = 0xd0 ) // Return emits an exit instruction. // // Requires a return value in R0. func Return() Instruction { return Instruction{ OpCode: OpCode(JumpClass).SetJumpOp(Exit), } } // Op returns the OpCode for a given jump source. func (op JumpOp) Op(source Source) OpCode { return OpCode(JumpClass).SetJumpOp(op).SetSource(source) } // Imm compares 64 bit dst to 64 bit value (sign extended), and adjusts PC by offset if the condition is fulfilled. func (op JumpOp) Imm(dst Register, value int32, label string) Instruction { return Instruction{ OpCode: op.opCode(JumpClass, ImmSource), Dst: dst, Offset: -1, Constant: int64(value), }.WithReference(label) } // Imm32 compares 32 bit dst to 32 bit value, and adjusts PC by offset if the condition is fulfilled. // Requires kernel 5.1. func (op JumpOp) Imm32(dst Register, value int32, label string) Instruction { return Instruction{ OpCode: op.opCode(Jump32Class, ImmSource), Dst: dst, Offset: -1, Constant: int64(value), }.WithReference(label) } // Reg compares 64 bit dst to 64 bit src, and adjusts PC by offset if the condition is fulfilled. func (op JumpOp) Reg(dst, src Register, label string) Instruction { return Instruction{ OpCode: op.opCode(JumpClass, RegSource), Dst: dst, Src: src, Offset: -1, }.WithReference(label) } // Reg32 compares 32 bit dst to 32 bit src, and adjusts PC by offset if the condition is fulfilled. // Requires kernel 5.1. func (op JumpOp) Reg32(dst, src Register, label string) Instruction { return Instruction{ OpCode: op.opCode(Jump32Class, RegSource), Dst: dst, Src: src, Offset: -1, }.WithReference(label) } func (op JumpOp) opCode(class Class, source Source) OpCode { if op == Exit || op == Call { return InvalidOpCode } return OpCode(class).SetJumpOp(op).SetSource(source) } // LongJump returns a jump always instruction with a range of [-2^31, 2^31 - 1]. func LongJump(label string) Instruction { return Instruction{ OpCode: Ja.opCode(Jump32Class, ImmSource), Constant: -1, }.WithReference(label) } // Label adjusts PC to the address of the label. func (op JumpOp) Label(label string) Instruction { if op == Call { return Instruction{ OpCode: OpCode(JumpClass).SetJumpOp(Call), Src: PseudoCall, Constant: -1, }.WithReference(label) } return Instruction{ OpCode: OpCode(JumpClass).SetJumpOp(op), Offset: -1, }.WithReference(label) } ================================================ FILE: asm/jump_string.go ================================================ // Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT. package asm import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidJumpOp-255] _ = x[Ja-0] _ = x[JEq-16] _ = x[JGT-32] _ = x[JGE-48] _ = x[JSet-64] _ = x[JNE-80] _ = x[JSGT-96] _ = x[JSGE-112] _ = x[Call-128] _ = x[Exit-144] _ = x[JLT-160] _ = x[JLE-176] _ = x[JSLT-192] _ = x[JSLE-208] } const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp" var _JumpOp_map = map[JumpOp]string{ 0: _JumpOp_name[0:2], 16: _JumpOp_name[2:5], 32: _JumpOp_name[5:8], 48: _JumpOp_name[8:11], 64: _JumpOp_name[11:15], 80: _JumpOp_name[15:18], 96: _JumpOp_name[18:22], 112: _JumpOp_name[22:26], 128: _JumpOp_name[26:30], 144: _JumpOp_name[30:34], 160: _JumpOp_name[34:37], 176: _JumpOp_name[37:40], 192: _JumpOp_name[40:44], 208: _JumpOp_name[44:48], 255: _JumpOp_name[48:61], } func (i JumpOp) String() string { if str, ok := _JumpOp_map[i]; ok { return str } return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")" } ================================================ FILE: asm/load_store.go ================================================ package asm import "fmt" //go:generate go tool stringer -output load_store_string.go -type=Mode,Size // Mode for load and store operations // // msb lsb // +---+--+---+ // |MDE|sz|cls| // +---+--+---+ type Mode uint8 const modeMask OpCode = 0xe0 const ( // InvalidMode is returned by getters when invoked // on non load / store OpCodes InvalidMode Mode = 0xff // ImmMode - immediate value ImmMode Mode = 0x00 // AbsMode - immediate value + offset AbsMode Mode = 0x20 // IndMode - indirect (imm+src) IndMode Mode = 0x40 // MemMode - load from memory MemMode Mode = 0x60 // MemSXMode - load from memory, sign extension MemSXMode Mode = 0x80 // AtomicMode - add atomically across processors. AtomicMode Mode = 0xc0 ) const atomicMask OpCode = 0x0001_ff00 type AtomicOp uint32 const ( InvalidAtomic AtomicOp = 0xffff_ffff // AddAtomic - add src to memory address dst atomically AddAtomic AtomicOp = AtomicOp(Add) << 8 // FetchAdd - add src to memory address dst atomically, store result in src FetchAdd AtomicOp = AddAtomic | fetch // AndAtomic - bitwise AND src with memory address at dst atomically AndAtomic AtomicOp = AtomicOp(And) << 8 // FetchAnd - bitwise AND src with memory address at dst atomically, store result in src FetchAnd AtomicOp = AndAtomic | fetch // OrAtomic - bitwise OR src with memory address at dst atomically OrAtomic AtomicOp = AtomicOp(Or) << 8 // FetchOr - bitwise OR src with memory address at dst atomically, store result in src FetchOr AtomicOp = OrAtomic | fetch // XorAtomic - bitwise XOR src with memory address at dst atomically XorAtomic AtomicOp = AtomicOp(Xor) << 8 // FetchXor - bitwise XOR src with memory address at dst atomically, store result in src FetchXor AtomicOp = XorAtomic | fetch // Xchg - atomically exchange the old value with the new value // // src gets populated with the old value of *(size *)(dst + offset). Xchg AtomicOp = 0x0000_e000 | fetch // CmpXchg - atomically compare and exchange the old value with the new value // // Compares R0 and *(size *)(dst + offset), writes src to *(size *)(dst + offset) on match. // R0 gets populated with the old value of *(size *)(dst + offset), even if no exchange occurs. CmpXchg AtomicOp = 0x0000_f000 | fetch // fetch modifier for copy-modify-write atomics fetch AtomicOp = 0x0000_0100 // loadAcquire - atomically load with acquire semantics loadAcquire AtomicOp = 0x0001_0000 // storeRelease - atomically store with release semantics storeRelease AtomicOp = 0x0001_1000 ) func (op AtomicOp) String() string { var name string switch op { case AddAtomic, AndAtomic, OrAtomic, XorAtomic: name = ALUOp(op >> 8).String() case FetchAdd, FetchAnd, FetchOr, FetchXor: name = "Fetch" + ALUOp((op^fetch)>>8).String() case Xchg: name = "Xchg" case CmpXchg: name = "CmpXchg" case loadAcquire: name = "LdAcq" case storeRelease: name = "StRel" default: name = fmt.Sprintf("AtomicOp(%#x)", uint32(op)) } return name } func (op AtomicOp) OpCode(size Size) OpCode { switch op { case AddAtomic, AndAtomic, OrAtomic, XorAtomic, FetchAdd, FetchAnd, FetchOr, FetchXor, Xchg, CmpXchg: switch size { case Byte, Half: // 8-bit and 16-bit atomic copy-modify-write atomics are not supported return InvalidOpCode } } return OpCode(StXClass).SetMode(AtomicMode).SetSize(size).SetAtomicOp(op) } // Mem emits `*(size *)(dst + offset) (op) src`. func (op AtomicOp) Mem(dst, src Register, size Size, offset int16) Instruction { return Instruction{ OpCode: op.OpCode(size), Dst: dst, Src: src, Offset: offset, } } // Emits `lock-acquire dst = *(size *)(src + offset)`. func LoadAcquire(dst, src Register, size Size, offset int16) Instruction { return Instruction{ OpCode: loadAcquire.OpCode(size), Dst: dst, Src: src, Offset: offset, } } // Emits `lock-release *(size *)(dst + offset) = src`. func StoreRelease(dst, src Register, size Size, offset int16) Instruction { return Instruction{ OpCode: storeRelease.OpCode(size), Dst: dst, Src: src, Offset: offset, } } // Size of load and store operations // // msb lsb // +---+--+---+ // |mde|SZ|cls| // +---+--+---+ type Size uint8 const sizeMask OpCode = 0x18 const ( // InvalidSize is returned by getters when invoked // on non load / store OpCodes InvalidSize Size = 0xff // DWord - double word; 64 bits DWord Size = 0x18 // Word - word; 32 bits Word Size = 0x00 // Half - half-word; 16 bits Half Size = 0x08 // Byte - byte; 8 bits Byte Size = 0x10 ) // Sizeof returns the size in bytes. func (s Size) Sizeof() int { switch s { case DWord: return 8 case Word: return 4 case Half: return 2 case Byte: return 1 default: return -1 } } // LoadMemOp returns the OpCode to load a value of given size from memory. func LoadMemOp(size Size) OpCode { return OpCode(LdXClass).SetMode(MemMode).SetSize(size) } // LoadMemSXOp returns the OpCode to load a value of given size from memory sign extended. func LoadMemSXOp(size Size) OpCode { return OpCode(LdXClass).SetMode(MemSXMode).SetSize(size) } // LoadMem emits `dst = *(size *)(src + offset)`. func LoadMem(dst, src Register, offset int16, size Size) Instruction { return Instruction{ OpCode: LoadMemOp(size), Dst: dst, Src: src, Offset: offset, } } // LoadMemSX emits `dst = *(size *)(src + offset)` but sign extends dst. func LoadMemSX(dst, src Register, offset int16, size Size) Instruction { if size == DWord { return Instruction{OpCode: InvalidOpCode} } return Instruction{ OpCode: LoadMemSXOp(size), Dst: dst, Src: src, Offset: offset, } } // LoadImmOp returns the OpCode to load an immediate of given size. // // As of kernel 4.20, only DWord size is accepted. func LoadImmOp(size Size) OpCode { return OpCode(LdClass).SetMode(ImmMode).SetSize(size) } // LoadImm emits `dst = (size)value`. // // As of kernel 4.20, only DWord size is accepted. func LoadImm(dst Register, value int64, size Size) Instruction { return Instruction{ OpCode: LoadImmOp(size), Dst: dst, Constant: value, } } // LoadMapPtr stores a pointer to a map in dst. func LoadMapPtr(dst Register, fd int) Instruction { if fd < 0 { return Instruction{OpCode: InvalidOpCode} } return Instruction{ OpCode: LoadImmOp(DWord), Dst: dst, Src: PseudoMapFD, Constant: int64(uint32(fd)), } } // LoadMapValue stores a pointer to the value at a certain offset of a map. func LoadMapValue(dst Register, fd int, offset uint32) Instruction { if fd < 0 { return Instruction{OpCode: InvalidOpCode} } fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd)) return Instruction{ OpCode: LoadImmOp(DWord), Dst: dst, Src: PseudoMapValue, Constant: int64(fdAndOffset), } } // LoadIndOp returns the OpCode for loading a value of given size from an sk_buff. func LoadIndOp(size Size) OpCode { return OpCode(LdClass).SetMode(IndMode).SetSize(size) } // LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`. func LoadInd(dst, src Register, offset int32, size Size) Instruction { return Instruction{ OpCode: LoadIndOp(size), Dst: dst, Src: src, Constant: int64(offset), } } // LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff. func LoadAbsOp(size Size) OpCode { return OpCode(LdClass).SetMode(AbsMode).SetSize(size) } // LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`. func LoadAbs(offset int32, size Size) Instruction { return Instruction{ OpCode: LoadAbsOp(size), Dst: R0, Constant: int64(offset), } } // StoreMemOp returns the OpCode for storing a register of given size in memory. func StoreMemOp(size Size) OpCode { return OpCode(StXClass).SetMode(MemMode).SetSize(size) } // StoreMem emits `*(size *)(dst + offset) = src` func StoreMem(dst Register, offset int16, src Register, size Size) Instruction { return Instruction{ OpCode: StoreMemOp(size), Dst: dst, Src: src, Offset: offset, } } // StoreImmOp returns the OpCode for storing an immediate of given size in memory. func StoreImmOp(size Size) OpCode { return OpCode(StClass).SetMode(MemMode).SetSize(size) } // StoreImm emits `*(size *)(dst + offset) = value`. func StoreImm(dst Register, offset int16, value int64, size Size) Instruction { if size == DWord { return Instruction{OpCode: InvalidOpCode} } return Instruction{ OpCode: StoreImmOp(size), Dst: dst, Offset: offset, Constant: value, } } // StoreXAddOp returns the OpCode to atomically add a register to a value in memory. func StoreXAddOp(size Size) OpCode { return AddAtomic.OpCode(size) } // StoreXAdd atomically adds src to *dst. func StoreXAdd(dst, src Register, size Size) Instruction { return AddAtomic.Mem(dst, src, size, 0) } ================================================ FILE: asm/load_store_string.go ================================================ // Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT. package asm import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidMode-255] _ = x[ImmMode-0] _ = x[AbsMode-32] _ = x[IndMode-64] _ = x[MemMode-96] _ = x[MemSXMode-128] _ = x[AtomicMode-192] } const ( _Mode_name_0 = "ImmMode" _Mode_name_1 = "AbsMode" _Mode_name_2 = "IndMode" _Mode_name_3 = "MemMode" _Mode_name_4 = "MemSXMode" _Mode_name_5 = "AtomicMode" _Mode_name_6 = "InvalidMode" ) func (i Mode) String() string { switch { case i == 0: return _Mode_name_0 case i == 32: return _Mode_name_1 case i == 64: return _Mode_name_2 case i == 96: return _Mode_name_3 case i == 128: return _Mode_name_4 case i == 192: return _Mode_name_5 case i == 255: return _Mode_name_6 default: return "Mode(" + strconv.FormatInt(int64(i), 10) + ")" } } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[InvalidSize-255] _ = x[DWord-24] _ = x[Word-0] _ = x[Half-8] _ = x[Byte-16] } const ( _Size_name_0 = "Word" _Size_name_1 = "Half" _Size_name_2 = "Byte" _Size_name_3 = "DWord" _Size_name_4 = "InvalidSize" ) func (i Size) String() string { switch { case i == 0: return _Size_name_0 case i == 8: return _Size_name_1 case i == 16: return _Size_name_2 case i == 24: return _Size_name_3 case i == 255: return _Size_name_4 default: return "Size(" + strconv.FormatInt(int64(i), 10) + ")" } } ================================================ FILE: asm/metadata.go ================================================ package asm // Metadata contains metadata about an instruction. type Metadata struct { head *metaElement } type metaElement struct { next *metaElement key, value interface{} } // Find the element containing key. // // Returns nil if there is no such element. func (m *Metadata) find(key interface{}) *metaElement { for e := m.head; e != nil; e = e.next { if e.key == key { return e } } return nil } // Remove an element from the linked list. // // Copies as many elements of the list as necessary to remove r, but doesn't // perform a full copy. func (m *Metadata) remove(r *metaElement) { current := &m.head for e := m.head; e != nil; e = e.next { if e == r { // We've found the element we want to remove. *current = e.next // No need to copy the tail. return } // There is another element in front of the one we want to remove. // We have to copy it to be able to change metaElement.next. cpy := &metaElement{key: e.key, value: e.value} *current = cpy current = &cpy.next } } // Set a key to a value. // // If value is nil, the key is removed. Avoids modifying old metadata by // copying if necessary. func (m *Metadata) Set(key, value interface{}) { if e := m.find(key); e != nil { if e.value == value { // Key is present and the value is the same. Nothing to do. return } // Key is present with a different value. Create a copy of the list // which doesn't have the element in it. m.remove(e) } // m.head is now a linked list that doesn't contain key. if value == nil { return } m.head = &metaElement{key: key, value: value, next: m.head} } // Get the value of a key. // // Returns nil if no value with the given key is present. func (m *Metadata) Get(key interface{}) interface{} { if e := m.find(key); e != nil { return e.value } return nil } ================================================ FILE: asm/metadata_test.go ================================================ package asm import ( "testing" "unsafe" "github.com/go-quicktest/qt" ) func TestMetadata(t *testing.T) { var m Metadata // Metadata should be the size of a pointer. qt.Assert(t, qt.Equals(unsafe.Sizeof(m), unsafe.Sizeof(uintptr(0)))) // A lookup in a nil meta should return nil. qt.Assert(t, qt.IsNil(m.Get(bool(false)))) // We can look up anything we inserted. m.Set(bool(false), int(0)) m.Set(int(1), int(1)) qt.Assert(t, qt.Equals(m.Get(bool(false)), 0)) qt.Assert(t, qt.Equals(m.Get(1), 1)) // We have copy on write semantics old := m m.Set(bool(false), int(1)) qt.Assert(t, qt.Equals(m.Get(bool(false)), 1)) qt.Assert(t, qt.Equals(m.Get(int(1)), 1)) qt.Assert(t, qt.Equals(old.Get(bool(false)), 0)) qt.Assert(t, qt.Equals(old.Get(int(1)), 1)) // Newtypes are handled distinctly. type b bool m.Set(b(false), int(42)) qt.Assert(t, qt.Equals(m.Get(bool(false)), 1)) qt.Assert(t, qt.Equals(m.Get(int(1)), 1)) qt.Assert(t, qt.Equals(m.Get(b(false)), 42)) // Setting nil removes a key. m.Set(bool(false), nil) qt.Assert(t, qt.IsNil(m.Get(bool(false)))) qt.Assert(t, qt.Equals(m.Get(int(1)), 1)) qt.Assert(t, qt.Equals(m.Get(b(false)), 42)) } func BenchmarkMetadata(b *testing.B) { // Assume that three bits of metadata on a single instruction is // our worst case. const worstCaseItems = 3 type t struct{} b.Run("add first", func(b *testing.B) { b.ReportAllocs() for b.Loop() { var v Metadata v.Set(t{}, 0) } }) b.Run("add last", func(b *testing.B) { var m Metadata for i := 0; i < worstCaseItems-1; i++ { m.Set(i, i) } b.ReportAllocs() for b.Loop() { v := m v.Set(t{}, 0) } }) b.Run("add existing", func(b *testing.B) { var m Metadata for i := 0; i < worstCaseItems-1; i++ { m.Set(i, i) } m.Set(t{}, 0) b.ReportAllocs() for b.Loop() { v := m v.Set(t{}, 0) } }) b.Run("get miss", func(b *testing.B) { var m Metadata for i := 0; i < worstCaseItems; i++ { m.Set(i, i) } b.ReportAllocs() for b.Loop() { if m.Get(t{}) != nil { b.Fatal("got result from miss") } } }) } ================================================ FILE: asm/opcode.go ================================================ package asm import ( "fmt" "strings" ) //go:generate go tool stringer -output opcode_string.go -type=Class // Class of operations // // msb lsb // +---+--+---+ // | ?? |CLS| // +---+--+---+ type Class uint8 const classMask OpCode = 0x07 const ( // LdClass loads immediate values into registers. // Also used for non-standard load operations from cBPF. LdClass Class = 0x00 // LdXClass loads memory into registers. LdXClass Class = 0x01 // StClass stores immediate values to memory. StClass Class = 0x02 // StXClass stores registers to memory. StXClass Class = 0x03 // ALUClass describes arithmetic operators. ALUClass Class = 0x04 // JumpClass describes jump operators. JumpClass Class = 0x05 // Jump32Class describes jump operators with 32-bit comparisons. // Requires kernel 5.1. Jump32Class Class = 0x06 // ALU64Class describes arithmetic operators in 64-bit mode. ALU64Class Class = 0x07 ) // IsLoad checks if this is either LdClass or LdXClass. func (cls Class) IsLoad() bool { return cls == LdClass || cls == LdXClass } // IsStore checks if this is either StClass or StXClass. func (cls Class) IsStore() bool { return cls == StClass || cls == StXClass } func (cls Class) isLoadOrStore() bool { return cls.IsLoad() || cls.IsStore() } // IsALU checks if this is either ALUClass or ALU64Class. func (cls Class) IsALU() bool { return cls == ALUClass || cls == ALU64Class } // IsJump checks if this is either JumpClass or Jump32Class. func (cls Class) IsJump() bool { return cls == JumpClass || cls == Jump32Class } func (cls Class) isJumpOrALU() bool { return cls.IsJump() || cls.IsALU() } // OpCode represents a single operation. // It is not a 1:1 mapping to real eBPF opcodes. // // The encoding varies based on a 3-bit Class: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // ??? | CLS // // For ALUClass and ALUCLass32: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // 0 | OPC |S| CLS // // For LdClass, LdXclass, StClass and StXClass: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // 0 | MDE |SIZ| CLS // // For StXClass where MDE == AtomicMode: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // 0 | ATOMIC OP | MDE |SIZ| CLS // // For JumpClass, Jump32Class: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // 0 | OPC |S| CLS type OpCode uint32 // InvalidOpCode is returned by setters on OpCode const InvalidOpCode OpCode = 0xffff // bpfOpCode returns the actual BPF opcode. func (op OpCode) bpfOpCode() (byte, error) { const opCodeMask = 0xff if !valid(op, opCodeMask) { return 0, fmt.Errorf("invalid opcode %x", op) } return byte(op & opCodeMask), nil } // rawInstructions returns the number of BPF instructions required // to encode this opcode. func (op OpCode) rawInstructions() int { if op.IsDWordLoad() { return 2 } return 1 } func (op OpCode) IsDWordLoad() bool { return op == LoadImmOp(DWord) } // Class returns the class of operation. func (op OpCode) Class() Class { return Class(op & classMask) } // Mode returns the mode for load and store operations. func (op OpCode) Mode() Mode { if !op.Class().isLoadOrStore() { return InvalidMode } return Mode(op & modeMask) } // Size returns the size for load and store operations. func (op OpCode) Size() Size { if !op.Class().isLoadOrStore() { return InvalidSize } return Size(op & sizeMask) } // AtomicOp returns the type of atomic operation. func (op OpCode) AtomicOp() AtomicOp { if op.Class() != StXClass || op.Mode() != AtomicMode { return InvalidAtomic } return AtomicOp(op & atomicMask) } // Source returns the source for branch and ALU operations. func (op OpCode) Source() Source { if !op.Class().isJumpOrALU() || op.ALUOp() == Swap { return InvalidSource } return Source(op & sourceMask) } // ALUOp returns the ALUOp. func (op OpCode) ALUOp() ALUOp { if !op.Class().IsALU() { return InvalidALUOp } return ALUOp(op & aluMask) } // Endianness returns the Endianness for a byte swap instruction. func (op OpCode) Endianness() Endianness { if op.ALUOp() != Swap { return InvalidEndian } return Endianness(op & endianMask) } // JumpOp returns the JumpOp. // Returns InvalidJumpOp if it doesn't encode a jump. func (op OpCode) JumpOp() JumpOp { if !op.Class().IsJump() { return InvalidJumpOp } jumpOp := JumpOp(op & jumpMask) // Some JumpOps are only supported by JumpClass, not Jump32Class. if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call) { return InvalidJumpOp } return jumpOp } // SetMode sets the mode on load and store operations. // // Returns InvalidOpCode if op is of the wrong class. func (op OpCode) SetMode(mode Mode) OpCode { if !op.Class().isLoadOrStore() || !valid(OpCode(mode), modeMask) { return InvalidOpCode } return (op & ^modeMask) | OpCode(mode) } // SetSize sets the size on load and store operations. // // Returns InvalidOpCode if op is of the wrong class. func (op OpCode) SetSize(size Size) OpCode { if !op.Class().isLoadOrStore() || !valid(OpCode(size), sizeMask) { return InvalidOpCode } return (op & ^sizeMask) | OpCode(size) } func (op OpCode) SetAtomicOp(atomic AtomicOp) OpCode { if op.Class() != StXClass || op.Mode() != AtomicMode || !valid(OpCode(atomic), atomicMask) { return InvalidOpCode } return (op & ^atomicMask) | OpCode(atomic) } // SetSource sets the source on jump and ALU operations. // // Returns InvalidOpCode if op is of the wrong class. func (op OpCode) SetSource(source Source) OpCode { if !op.Class().isJumpOrALU() || !valid(OpCode(source), sourceMask) { return InvalidOpCode } return (op & ^sourceMask) | OpCode(source) } // SetALUOp sets the ALUOp on ALU operations. // // Returns InvalidOpCode if op is of the wrong class. func (op OpCode) SetALUOp(alu ALUOp) OpCode { if !op.Class().IsALU() || !valid(OpCode(alu), aluMask) { return InvalidOpCode } return (op & ^aluMask) | OpCode(alu) } // SetJumpOp sets the JumpOp on jump operations. // // Returns InvalidOpCode if op is of the wrong class. func (op OpCode) SetJumpOp(jump JumpOp) OpCode { if !op.Class().IsJump() || !valid(OpCode(jump), jumpMask) { return InvalidOpCode } newOp := (op & ^jumpMask) | OpCode(jump) // Check newOp is legal. if newOp.JumpOp() == InvalidJumpOp { return InvalidOpCode } return newOp } func (op OpCode) String() string { var f strings.Builder switch class := op.Class(); { case class.isLoadOrStore(): f.WriteString(strings.TrimSuffix(class.String(), "Class")) mode := op.Mode() f.WriteString(strings.TrimSuffix(mode.String(), "Mode")) if atomic := op.AtomicOp(); atomic != InvalidAtomic { f.WriteString(strings.TrimSuffix(atomic.String(), "Atomic")) } switch op.Size() { case DWord: f.WriteString("DW") case Word: f.WriteString("W") case Half: f.WriteString("H") case Byte: f.WriteString("B") } case class.IsALU(): if op.ALUOp() == Swap && op.Class() == ALU64Class { // B to make BSwap, uncontitional byte swap f.WriteString("B") } f.WriteString(op.ALUOp().String()) if op.ALUOp() == Swap { if op.Class() == ALUClass { // Width for Endian is controlled by Constant f.WriteString(op.Endianness().String()) } } else { f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) if class == ALUClass { f.WriteString("32") } } case class.IsJump(): f.WriteString(op.JumpOp().String()) if class == Jump32Class { f.WriteString("32") } if jop := op.JumpOp(); jop != Exit && jop != Call && jop != Ja { f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) } default: fmt.Fprintf(&f, "OpCode(%#x)", uint8(op)) } return f.String() } // valid returns true if all bits in value are covered by mask. func valid(value, mask OpCode) bool { return value & ^mask == 0 } ================================================ FILE: asm/opcode_string.go ================================================ // Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT. package asm import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[LdClass-0] _ = x[LdXClass-1] _ = x[StClass-2] _ = x[StXClass-3] _ = x[ALUClass-4] _ = x[JumpClass-5] _ = x[Jump32Class-6] _ = x[ALU64Class-7] } const _Class_name = "LdClassLdXClassStClassStXClassALUClassJumpClassJump32ClassALU64Class" var _Class_index = [...]uint8{0, 7, 15, 22, 30, 38, 47, 58, 68} func (i Class) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_Class_index)-1 { return "Class(" + strconv.FormatInt(int64(i), 10) + ")" } return _Class_name[_Class_index[idx]:_Class_index[idx+1]] } ================================================ FILE: asm/opcode_test.go ================================================ package asm import ( "fmt" "testing" "github.com/go-quicktest/qt" ) func TestGetSetJumpOp(t *testing.T) { test := func(class Class, op JumpOp, valid bool) { t.Run(fmt.Sprintf("%s-%s", class, op), func(t *testing.T) { opcode := OpCode(class).SetJumpOp(op) if valid { qt.Assert(t, qt.Not(qt.Equals(opcode, InvalidOpCode))) qt.Assert(t, qt.Equals(opcode.JumpOp(), op)) } else { qt.Assert(t, qt.Equals(opcode, InvalidOpCode)) qt.Assert(t, qt.Equals(opcode.JumpOp(), InvalidJumpOp)) } }) } // Exit and call aren't allowed with Jump32 test(Jump32Class, Exit, false) test(Jump32Class, Call, false) // But are with Jump test(JumpClass, Exit, true) test(JumpClass, Call, true) // All other ops work for _, op := range []JumpOp{ Ja, JEq, JGT, JGE, JSet, JNE, JSGT, JSGE, JLT, JLE, JSLT, JSLE, } { test(Jump32Class, op, true) test(JumpClass, op, true) } } ================================================ FILE: asm/register.go ================================================ package asm import ( "fmt" ) // Register is the source or destination of most operations. type Register uint8 // R0 contains return values. const R0 Register = 0 // Registers for function arguments. const ( R1 Register = R0 + 1 + iota R2 R3 R4 R5 ) // Callee saved registers preserved by function calls. const ( R6 Register = R5 + 1 + iota R7 R8 R9 ) // Read-only frame pointer to access stack. const ( R10 Register = R9 + 1 RFP = R10 ) // Pseudo registers used by 64bit loads and jumps const ( PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE PseudoCall = R1 // BPF_PSEUDO_CALL PseudoFunc = R4 // BPF_PSEUDO_FUNC PseudoKfuncCall = R2 // BPF_PSEUDO_KFUNC_CALL ) func (r Register) String() string { v := uint8(r) if v == 10 { return "rfp" } return fmt.Sprintf("r%d", v) } ================================================ FILE: attachtype_string.go ================================================ // Code generated by "stringer -type AttachType -trimprefix Attach"; DO NOT EDIT. package ebpf import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[AttachNone-0] _ = x[AttachCGroupInetIngress-0] _ = x[AttachCGroupInetEgress-1] _ = x[AttachCGroupInetSockCreate-2] _ = x[AttachCGroupSockOps-3] _ = x[AttachSkSKBStreamParser-4] _ = x[AttachSkSKBStreamVerdict-5] _ = x[AttachCGroupDevice-6] _ = x[AttachSkMsgVerdict-7] _ = x[AttachCGroupInet4Bind-8] _ = x[AttachCGroupInet6Bind-9] _ = x[AttachCGroupInet4Connect-10] _ = x[AttachCGroupInet6Connect-11] _ = x[AttachCGroupInet4PostBind-12] _ = x[AttachCGroupInet6PostBind-13] _ = x[AttachCGroupUDP4Sendmsg-14] _ = x[AttachCGroupUDP6Sendmsg-15] _ = x[AttachLircMode2-16] _ = x[AttachFlowDissector-17] _ = x[AttachCGroupSysctl-18] _ = x[AttachCGroupUDP4Recvmsg-19] _ = x[AttachCGroupUDP6Recvmsg-20] _ = x[AttachCGroupGetsockopt-21] _ = x[AttachCGroupSetsockopt-22] _ = x[AttachTraceRawTp-23] _ = x[AttachTraceFEntry-24] _ = x[AttachTraceFExit-25] _ = x[AttachModifyReturn-26] _ = x[AttachLSMMac-27] _ = x[AttachTraceIter-28] _ = x[AttachCgroupInet4GetPeername-29] _ = x[AttachCgroupInet6GetPeername-30] _ = x[AttachCgroupInet4GetSockname-31] _ = x[AttachCgroupInet6GetSockname-32] _ = x[AttachXDPDevMap-33] _ = x[AttachCgroupInetSockRelease-34] _ = x[AttachXDPCPUMap-35] _ = x[AttachSkLookup-36] _ = x[AttachXDP-37] _ = x[AttachSkSKBVerdict-38] _ = x[AttachSkReuseportSelect-39] _ = x[AttachSkReuseportSelectOrMigrate-40] _ = x[AttachPerfEvent-41] _ = x[AttachTraceKprobeMulti-42] _ = x[AttachTraceKprobeSession-56] _ = x[AttachLSMCgroup-43] _ = x[AttachStructOps-44] _ = x[AttachNetfilter-45] _ = x[AttachTCXIngress-46] _ = x[AttachTCXEgress-47] _ = x[AttachTraceUprobeMulti-48] _ = x[AttachCgroupUnixConnect-49] _ = x[AttachCgroupUnixSendmsg-50] _ = x[AttachCgroupUnixRecvmsg-51] _ = x[AttachCgroupUnixGetpeername-52] _ = x[AttachCgroupUnixGetsockname-53] _ = x[AttachNetkitPrimary-54] _ = x[AttachNetkitPeer-55] _ = x[AttachWindowsXDP-268435457] _ = x[AttachWindowsBind-268435458] _ = x[AttachWindowsCGroupInet4Connect-268435459] _ = x[AttachWindowsCGroupInet6Connect-268435460] _ = x[AttachWindowsCgroupInet4RecvAccept-268435461] _ = x[AttachWindowsCgroupInet6RecvAccept-268435462] _ = x[AttachWindowsCGroupSockOps-268435463] _ = x[AttachWindowsSample-268435464] _ = x[AttachWindowsXDPTest-268435465] } const ( _AttachType_name_0 = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEventTraceKprobeMultiLSMCgroupStructOpsNetfilterTCXIngressTCXEgressTraceUprobeMultiCgroupUnixConnectCgroupUnixSendmsgCgroupUnixRecvmsgCgroupUnixGetpeernameCgroupUnixGetsocknameNetkitPrimaryNetkitPeerTraceKprobeSession" _AttachType_name_1 = "WindowsXDPWindowsBindWindowsCGroupInet4ConnectWindowsCGroupInet6ConnectWindowsCgroupInet4RecvAcceptWindowsCgroupInet6RecvAcceptWindowsCGroupSockOpsWindowsSampleWindowsXDPTest" ) var ( _AttachType_index_0 = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610, 626, 635, 644, 653, 663, 672, 688, 705, 722, 739, 760, 781, 794, 804, 822} _AttachType_index_1 = [...]uint8{0, 10, 21, 46, 71, 99, 127, 147, 160, 174} ) func (i AttachType) String() string { switch { case i <= 56: return _AttachType_name_0[_AttachType_index_0[i]:_AttachType_index_0[i+1]] case 268435457 <= i && i <= 268435465: i -= 268435457 return _AttachType_name_1[_AttachType_index_1[i]:_AttachType_index_1[i+1]] default: return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")" } } ================================================ FILE: btf/btf.go ================================================ package btf import ( "debug/elf" "errors" "fmt" "io" "iter" "maps" "math" "os" "reflect" "slices" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" ) const btfMagic = 0xeB9F // Errors returned by BTF functions. var ( ErrNotSupported = internal.ErrNotSupported ErrNotFound = errors.New("not found") ErrNoExtendedInfo = errors.New("no extended info") ErrMultipleMatches = errors.New("multiple matching types") ) // ID represents the unique ID of a BTF object. type ID = sys.BTFID type elfData struct { sectionSizes map[string]uint32 symbolOffsets map[elfSymbol]uint32 fixups map[Type]bool } type elfSymbol struct { section string name string } // Spec allows querying a set of Types and loading the set into the // kernel. type Spec struct { *decoder // Additional data from ELF, may be nil. elf *elfData } // LoadSpec opens file and calls LoadSpecFromReader on it. func LoadSpec(file string) (*Spec, error) { fh, err := os.Open(file) if err != nil { return nil, err } defer fh.Close() return LoadSpecFromReader(fh) } // LoadSpecFromReader reads from an ELF or a raw BTF blob. // // Returns ErrNotFound if reading from an ELF which contains no BTF. ExtInfos // may be nil. func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { file, err := internal.NewSafeELFFile(rd) if err != nil { raw, err := io.ReadAll(io.NewSectionReader(rd, 0, math.MaxInt64)) if err != nil { return nil, fmt.Errorf("read raw BTF: %w", err) } return loadRawSpec(raw, nil) } return loadSpecFromELF(file) } // LoadSpecAndExtInfosFromReader reads from an ELF. // // ExtInfos may be nil if the ELF doesn't contain section metadata. // Returns ErrNotFound if the ELF contains no BTF. func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) { file, err := internal.NewSafeELFFile(rd) if err != nil { return nil, nil, err } spec, err := loadSpecFromELF(file) if err != nil { return nil, nil, err } extInfos, err := loadExtInfosFromELF(file, spec) if err != nil && !errors.Is(err, ErrNotFound) { return nil, nil, err } return spec, extInfos, nil } // symbolOffsets extracts all symbols offsets from an ELF and indexes them by // section and variable name. // // References to variables in BTF data sections carry unsigned 32-bit offsets. // Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well // beyond this range. Since these symbols cannot be described by BTF info, // ignore them here. func symbolOffsets(file *internal.SafeELFFile) (map[elfSymbol]uint32, error) { symbols, err := file.Symbols() if err != nil { return nil, fmt.Errorf("can't read symbols: %v", err) } offsets := make(map[elfSymbol]uint32) for _, sym := range symbols { if idx := sym.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE { // Ignore things like SHN_ABS continue } if sym.Value > math.MaxUint32 { // VarSecinfo offset is u32, cannot reference symbols in higher regions. continue } if int(sym.Section) >= len(file.Sections) { return nil, fmt.Errorf("symbol %s: invalid section %d", sym.Name, sym.Section) } secName := file.Sections[sym.Section].Name offsets[elfSymbol{secName, sym.Name}] = uint32(sym.Value) } return offsets, nil } func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) { var ( btfSection *elf.Section sectionSizes = make(map[string]uint32) ) for _, sec := range file.Sections { switch sec.Name { case ".BTF": btfSection = sec default: if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS { break } if sec.Size > math.MaxUint32 { return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name) } sectionSizes[sec.Name] = uint32(sec.Size) } } if btfSection == nil { return nil, fmt.Errorf("btf: %w", ErrNotFound) } offsets, err := symbolOffsets(file) if err != nil { return nil, err } rawBTF, err := btfSection.Data() if err != nil { return nil, fmt.Errorf("reading .BTF section: %w", err) } spec, err := loadRawSpec(rawBTF, nil) if err != nil { return nil, err } if spec.decoder.byteOrder != file.ByteOrder { return nil, fmt.Errorf("BTF byte order %s does not match ELF byte order %s", spec.decoder.byteOrder, file.ByteOrder) } spec.elf = &elfData{ sectionSizes, offsets, make(map[Type]bool), } return spec, nil } func loadRawSpec(btf []byte, base *Spec) (*Spec, error) { var ( baseDecoder *decoder baseStrings *stringTable err error ) if base != nil { baseDecoder = base.decoder baseStrings = base.strings } header, bo, err := parseBTFHeader(btf) if err != nil { return nil, fmt.Errorf("parsing .BTF header: %v", err) } if header.HdrLen > uint32(len(btf)) { return nil, fmt.Errorf("BTF header length is out of bounds") } btf = btf[header.HdrLen:] if int(header.StringOff+header.StringLen) > len(btf) { return nil, fmt.Errorf("string table is out of bounds") } stringsSection := btf[header.StringOff : header.StringOff+header.StringLen] rawStrings, err := newStringTable(stringsSection, baseStrings) if err != nil { return nil, fmt.Errorf("read string section: %w", err) } if int(header.TypeOff+header.TypeLen) > len(btf) { return nil, fmt.Errorf("types section is out of bounds") } typesSection := btf[header.TypeOff : header.TypeOff+header.TypeLen] decoder, err := newDecoder(typesSection, bo, rawStrings, baseDecoder) if err != nil { return nil, err } return &Spec{decoder, nil}, nil } // fixupDatasec attempts to patch up missing info in Datasecs and its members by // supplementing them with information from the ELF headers and symbol table. func (elf *elfData) fixupDatasec(typ Type) error { if elf == nil { return nil } if ds, ok := typ.(*Datasec); ok { if elf.fixups[ds] { return nil } elf.fixups[ds] = true name := ds.Name // Some Datasecs are virtual and don't have corresponding ELF sections. switch name { case ".ksyms": // .ksyms describes forward declarations of kfunc signatures, as well as // references to kernel symbols. // Nothing to fix up, all sizes and offsets are 0. for _, vsi := range ds.Vars { switch t := vsi.Type.(type) { case *Func: continue case *Var: if _, ok := t.Type.(*Void); !ok { return fmt.Errorf("data section %s: expected %s to be *Void, not %T: %w", name, vsi.Type.TypeName(), vsi.Type, ErrNotSupported) } default: return fmt.Errorf("data section %s: expected to be either *btf.Func or *btf.Var, not %T: %w", name, vsi.Type, ErrNotSupported) } } return nil case ".kconfig": // .kconfig has a size of 0 and has all members' offsets set to 0. // Fix up all offsets and set the Datasec's size. if err := fixupDatasecLayout(ds); err != nil { return err } // Fix up extern to global linkage to avoid a BTF verifier error. for _, vsi := range ds.Vars { vsi.Type.(*Var).Linkage = GlobalVar } return nil } if ds.Size != 0 { return nil } ds.Size, ok = elf.sectionSizes[name] if !ok { return fmt.Errorf("data section %s: missing size", name) } for i := range ds.Vars { symName := ds.Vars[i].Type.TypeName() ds.Vars[i].Offset, ok = elf.symbolOffsets[elfSymbol{name, symName}] if !ok { return fmt.Errorf("data section %s: missing offset for symbol %s", name, symName) } } } return nil } // fixupDatasecLayout populates ds.Vars[].Offset according to var sizes and // alignment. Calculate and set ds.Size. func fixupDatasecLayout(ds *Datasec) error { var off uint32 for i, vsi := range ds.Vars { v, ok := vsi.Type.(*Var) if !ok { return fmt.Errorf("member %d: unsupported type %T", i, vsi.Type) } size, err := Sizeof(v.Type) if err != nil { return fmt.Errorf("variable %s: getting size: %w", v.Name, err) } align, err := alignof(v.Type) if err != nil { return fmt.Errorf("variable %s: getting alignment: %w", v.Name, err) } // Align the current member based on the offset of the end of the previous // member and the alignment of the current member. off = internal.Align(off, uint32(align)) ds.Vars[i].Offset = off off += uint32(size) } ds.Size = off return nil } // Copy a Spec. // // All contained types are duplicated while preserving any modifications made // to them. func (s *Spec) Copy() *Spec { if s == nil { return nil } cpy := &Spec{ s.decoder.Copy(), nil, } if s.elf != nil { cpy.elf = &elfData{ s.elf.sectionSizes, s.elf.symbolOffsets, maps.Clone(s.elf.fixups), } } return cpy } // TypeByID returns the BTF Type with the given type ID. // // Returns an error wrapping ErrNotFound if a Type with the given ID // does not exist in the Spec. func (s *Spec) TypeByID(id TypeID) (Type, error) { typ, err := s.decoder.TypeByID(id) if err != nil { return nil, fmt.Errorf("inflate type: %w", err) } if err := s.elf.fixupDatasec(typ); err != nil { return nil, err } return typ, nil } // TypeID returns the ID for a given Type. // // Returns an error wrapping [ErrNotFound] if the type isn't part of the Spec. func (s *Spec) TypeID(typ Type) (TypeID, error) { return s.decoder.TypeID(typ) } // AnyTypesByName returns a list of BTF Types with the given name. // // If the BTF blob describes multiple compilation units like vmlinux, multiple // Types with the same name and kind can exist, but might not describe the same // data structure. // // Returns an error wrapping ErrNotFound if no matching Type exists in the Spec. func (s *Spec) AnyTypesByName(name string) ([]Type, error) { types, err := s.TypesByName(newEssentialName(name)) if err != nil { return nil, err } for i := 0; i < len(types); i++ { // Match against the full name, not just the essential one // in case the type being looked up is a struct flavor. if types[i].TypeName() != name { types = slices.Delete(types, i, i+1) continue } if err := s.elf.fixupDatasec(types[i]); err != nil { return nil, err } } return types, nil } // AnyTypeByName returns a Type with the given name. // // Returns an error if multiple types of that name exist. func (s *Spec) AnyTypeByName(name string) (Type, error) { types, err := s.AnyTypesByName(name) if err != nil { return nil, err } if len(types) > 1 { return nil, fmt.Errorf("found multiple types: %v", types) } return types[0], nil } // TypeByName searches for a Type with a specific name. Since multiple Types // with the same name can exist, the parameter typ is taken to narrow down the // search in case of a clash. // // typ must be a non-nil pointer to an implementation of a Type. On success, the // address of the found Type will be copied to typ. // // Returns an error wrapping ErrNotFound if no matching Type exists in the Spec. // Returns an error wrapping ErrMultipleTypes if multiple candidates are found. func (s *Spec) TypeByName(name string, typ interface{}) error { typeInterface := reflect.TypeOf((*Type)(nil)).Elem() // typ may be **T or *Type typValue := reflect.ValueOf(typ) if typValue.Kind() != reflect.Ptr { return fmt.Errorf("%T is not a pointer", typ) } typPtr := typValue.Elem() if !typPtr.CanSet() { return fmt.Errorf("%T cannot be set", typ) } wanted := typPtr.Type() if wanted == typeInterface { // This is *Type. Unwrap the value's type. wanted = typPtr.Elem().Type() } if !wanted.AssignableTo(typeInterface) { return fmt.Errorf("%T does not satisfy Type interface", typ) } types, err := s.AnyTypesByName(name) if err != nil { return err } var candidate Type for _, typ := range types { if reflect.TypeOf(typ) != wanted { continue } if candidate != nil { return fmt.Errorf("type %s(%T): %w", name, typ, ErrMultipleMatches) } candidate = typ } if candidate == nil { return fmt.Errorf("%s %s: %w", wanted, name, ErrNotFound) } typPtr.Set(reflect.ValueOf(candidate)) return nil } // LoadSplitSpec loads split BTF from the given file. // // Types from base are used to resolve references in the split BTF. // The returned Spec only contains types from the split BTF, not from the base. func LoadSplitSpec(file string, base *Spec) (*Spec, error) { fh, err := os.Open(file) if err != nil { return nil, err } defer fh.Close() return LoadSplitSpecFromReader(fh, base) } // LoadSplitSpecFromReader loads split BTF from a reader. // // Types from base are used to resolve references in the split BTF. // The returned Spec only contains types from the split BTF, not from the base. func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) { raw, err := io.ReadAll(io.NewSectionReader(r, 0, math.MaxInt64)) if err != nil { return nil, fmt.Errorf("read raw BTF: %w", err) } return loadRawSpec(raw, base) } // All iterates over all types. func (s *Spec) All() iter.Seq2[Type, error] { return func(yield func(Type, error) bool) { for id := s.firstTypeID; ; id++ { typ, err := s.TypeByID(id) if errors.Is(err, ErrNotFound) { return } else if err != nil { yield(nil, err) return } // Skip declTags, during unmarshaling declTags become `Tags` fields of other types. // We keep them in the spec to avoid holes in the ID space, but for the purposes of // iteration, they are not useful to the user. if _, ok := typ.(*declTag); ok { continue } if !yield(typ, nil) { return } } } } ================================================ FILE: btf/btf_test.go ================================================ package btf import ( "bytes" "encoding/binary" "errors" "fmt" "io/fs" "os" "runtime" "sync" "sync/atomic" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" ) func vmlinuxSpec(tb testing.TB) *Spec { tb.Helper() // /sys/kernel/btf was introduced in 341dfcf8d78e ("btf: expose BTF info // through sysfs"), which shipped in Linux 5.4. if _, err := os.Stat("/sys/kernel/btf/vmlinux"); errors.Is(err, fs.ErrNotExist) { tb.Skip("No /sys/kernel/btf/vmlinux") } spec, err := LoadKernelSpec() if err != nil { tb.Fatal(err) } return spec } type specAndRawBTF struct { raw []byte spec *Spec } var vmlinuxTestdata = sync.OnceValues(func() (specAndRawBTF, error) { b, err := internal.ReadAllCompressed("testdata/vmlinux.btf.gz") if err != nil { return specAndRawBTF{}, err } spec, err := loadRawSpec(b, nil) if err != nil { return specAndRawBTF{}, err } return specAndRawBTF{b, spec}, nil }) func vmlinuxTestdataSpec(tb testing.TB) *Spec { tb.Helper() td, err := vmlinuxTestdata() if err != nil { tb.Fatal(err) } return td.spec.Copy() } func vmlinuxTestdataBytes(tb testing.TB) []byte { tb.Helper() td, err := vmlinuxTestdata() if err != nil { tb.Fatal(err) } return td.raw } func parseELFBTF(tb testing.TB, file string) *Spec { tb.Helper() spec, err := LoadSpec(file) if err != nil { tb.Fatal("Can't load BTF:", err) } return spec } func TestAnyTypesByName(t *testing.T) { testutils.Files(t, testutils.Glob(t, "testdata/relocs-*.elf"), func(t *testing.T, file string) { spec := parseELFBTF(t, file) types, err := spec.AnyTypesByName("ambiguous") if err != nil { t.Fatal(err) } if len(types) != 1 { t.Fatalf("expected to receive exactly 1 types from querying ambiguous type, got: %v", types) } types, err = spec.AnyTypesByName("ambiguous___flavour") if err != nil { t.Fatal(err) } if len(types) != 1 { t.Fatalf("expected to receive exactly 1 type from querying ambiguous flavour, got: %v", types) } }) } func TestTypeByNameAmbiguous(t *testing.T) { testutils.Files(t, testutils.Glob(t, "testdata/relocs-*.elf"), func(t *testing.T, file string) { spec := parseELFBTF(t, file) var typ *Struct if err := spec.TypeByName("ambiguous", &typ); err != nil { t.Fatal(err) } if name := typ.TypeName(); name != "ambiguous" { t.Fatal("expected type name 'ambiguous', got:", name) } if err := spec.TypeByName("ambiguous___flavour", &typ); err != nil { t.Fatal(err) } if name := typ.TypeName(); name != "ambiguous___flavour" { t.Fatal("expected type name 'ambiguous___flavour', got:", name) } }) } func TestTypeByName(t *testing.T) { spec := vmlinuxTestdataSpec(t) for _, typ := range []interface{}{ nil, Struct{}, &Struct{}, []Struct{}, &[]Struct{}, map[int]Struct{}, &map[int]Struct{}, int(0), new(int), } { t.Run(fmt.Sprintf("%T", typ), func(t *testing.T) { // spec.TypeByName MUST fail if typ is a nil btf.Type. if err := spec.TypeByName("iphdr", typ); err == nil { t.Fatalf("TypeByName does not fail with type %T", typ) } }) } // spec.TypeByName MUST return the same address for multiple calls with the same type name. var iphdr1, iphdr2 *Struct if err := spec.TypeByName("iphdr", &iphdr1); err != nil { t.Fatal(err) } if err := spec.TypeByName("iphdr", &iphdr2); err != nil { t.Fatal(err) } if iphdr1 != iphdr2 { t.Fatal("multiple TypeByName calls for `iphdr` name do not return the same addresses") } // It's valid to pass a *Type to TypeByName. typ := Type(iphdr2) if err := spec.TypeByName("iphdr", &typ); err != nil { t.Fatal("Can't look up using *Type:", err) } // Excerpt from linux/ip.h, https://elixir.bootlin.com/linux/latest/A/ident/iphdr // // struct iphdr { // #if defined(__LITTLE_ENDIAN_BITFIELD) // __u8 ihl:4, version:4; // #elif defined (__BIG_ENDIAN_BITFIELD) // __u8 version:4, ihl:4; // #else // ... // } // // The BTF we test against is for little endian. m := iphdr1.Members[1] if m.Name != "version" { t.Fatal("Expected version as the second member, got", m.Name) } td, ok := m.Type.(*Typedef) if !ok { t.Fatalf("version member of iphdr should be a __u8 typedef: actual: %T", m.Type) } u8, ok := td.Type.(*Int) if !ok { t.Fatalf("__u8 typedef should point to an Int type: actual: %T", td.Type) } if m.BitfieldSize != 4 { t.Fatalf("incorrect bitfield size: expected: 4 actual: %d", m.BitfieldSize) } if u8.Encoding != 0 { t.Fatalf("incorrect encoding of an __u8 int: expected: 0 actual: %x", u8.Encoding) } if m.Offset != 4 { t.Fatalf("incorrect bitfield offset: expected: 4 actual: %d", m.Offset) } } func BenchmarkParseVmlinux(b *testing.B) { vmlinux := vmlinuxTestdataBytes(b) b.ReportAllocs() for b.Loop() { if _, err := loadRawSpec(vmlinux, nil); err != nil { b.Fatal("Can't load BTF:", err) } } } func BenchmarkIterateVmlinux(b *testing.B) { vmlinux := vmlinuxTestdataBytes(b) b.ReportAllocs() for b.Loop() { spec, err := loadRawSpec(vmlinux, nil) if err != nil { b.Fatal("Can't load BTF:", err) } for range spec.All() { } } } func TestParseCurrentKernelBTF(t *testing.T) { spec := vmlinuxSpec(t) if len(spec.offsets) == 0 { t.Fatal("Empty kernel BTF") } } func TestFindVMLinux(t *testing.T) { file, err := findVMLinux() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't find vmlinux:", err) } defer file.Close() spec, err := LoadSpecFromReader(file) if err != nil { t.Fatal("Can't load BTF:", err) } if len(spec.offsets) == 0 { t.Fatal("Empty kernel BTF") } } func TestLoadSpecFromElf(t *testing.T) { testutils.Files(t, testutils.Glob(t, "../testdata/loader-e*.elf"), func(t *testing.T, file string) { spec := parseELFBTF(t, file) vt, err := spec.TypeByID(0) if err != nil { t.Error("Can't retrieve void type by ID:", err) } if _, ok := vt.(*Void); !ok { t.Errorf("Expected Void for type id 0, but got: %T", vt) } var bpfMapDef *Struct if err := spec.TypeByName("bpf_map_def", &bpfMapDef); err != nil { t.Error("Can't find bpf_map_def:", err) } var tmp *Void if err := spec.TypeByName("totally_bogus_type", &tmp); !errors.Is(err, ErrNotFound) { t.Error("TypeByName doesn't return ErrNotFound:", err) } var fn *Func if err := spec.TypeByName("global_fn", &fn); err != nil { t.Error("Can't find global_fn():", err) } else { if fn.Linkage != GlobalFunc { t.Error("Expected global linkage:", fn) } } var v *Var if err := spec.TypeByName("key3", &v); err != nil { t.Error("Can't find key3:", err) } else { if v.Linkage != GlobalVar { t.Error("Expected global linkage:", v) } } }) } func TestVerifierError(t *testing.T) { b, err := NewBuilder([]Type{&Int{Encoding: 255}}, nil) qt.Assert(t, qt.IsNil(err)) _, err = NewHandle(b) testutils.SkipIfNotSupported(t, err) var ve *internal.VerifierError if !errors.As(err, &ve) { t.Fatalf("expected a VerifierError, got: %v", err) } } func TestSpecCopy(t *testing.T) { qt.Check(t, qt.IsNil((*Spec)(nil).Copy())) spec := parseELFBTF(t, "../testdata/loader-el.elf") cpy := spec.Copy() have := typesFromSpec(t, spec) qt.Assert(t, qt.IsTrue(len(have) > 0)) want := typesFromSpec(t, cpy) qt.Assert(t, qt.HasLen(want, len(have))) for i := range want { if _, ok := have[i].(*Void); ok { // Since Void is an empty struct, a Type interface value containing // &Void{} stores (*Void, nil). Since interface equality first compares // the type and then the concrete value, Void is always equal. continue } if have[i] == want[i] { t.Fatalf("Type at index %d is not a copy: %T == %T", i, have[i], want[i]) } } } func TestSpecCopyModifications(t *testing.T) { spec := specFromTypes(t, []Type{&Int{Name: "a", Size: 4}}) typ, err := spec.TypeByID(1) qt.Assert(t, qt.IsNil(err)) i := typ.(*Int) i.Name = "b" i.Size = 2 cpy := spec.Copy() typ2, err := cpy.TypeByID(1) qt.Assert(t, qt.IsNil(err)) i2 := typ2.(*Int) qt.Assert(t, qt.Not(qt.Equals(i2, i)), qt.Commentf("Types are distinct")) qt.Assert(t, qt.DeepEquals(i2, i), qt.Commentf("Modifications are preserved")) i.Name = "bar" qt.Assert(t, qt.Equals(i2.Name, "b")) } func TestSpecTypeByID(t *testing.T) { spec := specFromTypes(t, nil) _, err := spec.TypeByID(0) qt.Assert(t, qt.IsNil(err)) _, err = spec.TypeByID(1) qt.Assert(t, qt.ErrorIs(err, ErrNotFound)) } func ExampleSpec_TypeByName() { // Acquire a Spec via one of its constructors. spec := new(Spec) // Declare a variable of the desired type var foo *Struct if err := spec.TypeByName("foo", &foo); err != nil { // There is no struct with name foo, or there // are multiple possibilities. } // We've found struct foo fmt.Println(foo.Name) } func TestTypesIterator(t *testing.T) { types := []Type{(*Void)(nil), &Int{Size: 4}, &Int{Size: 2}} b, err := NewBuilder(types[1:], nil) if err != nil { t.Fatal(err) } raw, err := b.Marshal(nil, nil) if err != nil { t.Fatal(err) } spec, err := LoadSpecFromReader(bytes.NewReader(raw)) if err != nil { t.Fatal(err) } var have []Type for typ, err := range spec.All() { qt.Assert(t, qt.IsNil(err)) have = append(have, typ) } qt.Assert(t, qt.DeepEquals(have, types)) } func TestLoadSplitSpec(t *testing.T) { spec, err := LoadSpec("testdata/btf_testmod.btf.base") if err != nil { t.Fatal(err) } splitSpec, err := LoadSplitSpec("testdata/btf_testmod.btf", spec) if err != nil { t.Fatal(err) } var fnType *Func qt.Assert(t, qt.IsNil(splitSpec.TypeByName("bpf_testmod_init", &fnType))) typeID, err := splitSpec.TypeID(fnType) qt.Assert(t, qt.IsNil(err)) typeByID, err := splitSpec.TypeByID(typeID) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(typeByID, Type(fnType))) fnProto := fnType.Type.(*FuncProto) // 'int' is defined in the base BTF... intType, err := spec.AnyTypeByName("int") qt.Assert(t, qt.IsNil(err)) // ... but not in the split BTF _, err = splitSpec.AnyTypeByName("int") qt.Assert(t, qt.ErrorIs(err, ErrNotFound)) qt.Assert(t, qt.Equals(fnProto.Return, intType), qt.Commentf("types found in base of split spec should be reused")) fnProto.Params = []FuncParam{{"a", &Pointer{(*Void)(nil)}}} // The behaviour of copying a split spec is quite subtle. When initially // creating a split spec, types in the split base are shared. This allows // amortising the cost of decoding vmlinux. // // However, we currently define copying a spec to be like forking a process: // in-memory changes to types are preserved. After the copy finished we have // two fully independent states. // // For split BTF this means that we also need to copy the base and ensure // that future references to a modified type work correctly. splitSpecCopy := splitSpec.Copy() var fnCopyType *Func qt.Assert(t, qt.IsNil(splitSpecCopy.TypeByName("bpf_testmod_init", &fnCopyType))) qt.Assert(t, testutils.IsDeepCopy(fnCopyType, fnType)) // Pull out a second type which refers to "int" in the base, but which hasn't // been inflated yet. This forces inflating int from the base. var str *Struct qt.Assert(t, qt.IsNil(splitSpecCopy.TypeByName("bpf_testmod_struct_arg_1", &str))) // Ensure that the int types are indeed the same. qt.Assert(t, qt.Equals(str.Members[0].Type, fnCopyType.Type.(*FuncProto).Return)) copyTypeID, err := splitSpecCopy.TypeID(fnCopyType) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(copyTypeID, typeID), qt.Commentf("ID of copied type must match")) } func TestFixupDatasecLayout(t *testing.T) { ds := &Datasec{ Size: 0, // Populated by fixup. Vars: []VarSecinfo{ {Type: &Var{Type: &Int{Size: 4}}}, {Type: &Var{Type: &Int{Size: 1}}}, {Type: &Var{Type: &Int{Size: 1}}}, {Type: &Var{Type: &Int{Size: 2}}}, {Type: &Var{Type: &Int{Size: 16}}}, {Type: &Var{Type: &Int{Size: 8}}}, }, } qt.Assert(t, qt.IsNil(fixupDatasecLayout(ds))) qt.Assert(t, qt.Equals(ds.Size, 40)) qt.Assert(t, qt.Equals(ds.Vars[0].Offset, 0)) qt.Assert(t, qt.Equals(ds.Vars[1].Offset, 4)) qt.Assert(t, qt.Equals(ds.Vars[2].Offset, 5)) qt.Assert(t, qt.Equals(ds.Vars[3].Offset, 6)) qt.Assert(t, qt.Equals(ds.Vars[4].Offset, 16)) qt.Assert(t, qt.Equals(ds.Vars[5].Offset, 32)) } func TestSpecConcurrentAccess(t *testing.T) { spec := vmlinuxTestdataSpec(t) maxprocs := runtime.GOMAXPROCS(0) if maxprocs < 2 { t.Error("GOMAXPROCS is lower than 2:", maxprocs) } var cond atomic.Int64 var wg sync.WaitGroup for i := 0; i < maxprocs; i++ { wg.Add(1) go func() { defer wg.Done() n := cond.Add(1) for cond.Load() != int64(maxprocs) { // Spin to increase the chances of a race. } if n%2 == 0 { _, _ = spec.AnyTypeByName("gov_update_cpu_data") } else { _ = spec.Copy() } }() // Try to get the Goroutines scheduled and spinning. runtime.Gosched() } wg.Wait() } func TestLoadEmptyRawSpec(t *testing.T) { buf, err := binary.Append(nil, binary.LittleEndian, &btfHeader{ Magic: btfMagic, Version: 1, Flags: 0, HdrLen: uint32(btfHeaderLen), TypeOff: 0, TypeLen: 0, StringOff: 0, StringLen: 0, }) qt.Assert(t, qt.IsNil(err)) _, err = loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err)) } func BenchmarkSpecCopy(b *testing.B) { spec := vmlinuxTestdataSpec(b) for b.Loop() { spec.Copy() } } func BenchmarkSpecTypeByID(b *testing.B) { spec := vmlinuxTestdataSpec(b) b.ReportAllocs() for b.Loop() { _, err := spec.TypeByID(1) if err != nil { b.Fatal(err) } } } func BenchmarkInspektorGadget(b *testing.B) { // This benchmark is the baseline for what Inspektor Gadget loads for a // common configuration. types := []string{ "pt_regs", "file", "inode", "super_block", "socket", "syscall_trace_enter", "task_struct", "nsproxy", "mnt_namespace", // "fanotify_event", "pid", "trace_event_raw_sched_process_exec", "fs_struct", "path", "mount", "qstr", "vfsmount", "dentry", // "bpf_func_id", "mm_struct", "syscall_trace_exit", "linux_binprm", "sock", "net", "inet_sock", } vmlinux, err := internal.ReadAllCompressed("testdata/vmlinux.btf.gz") qt.Assert(b, qt.IsNil(err)) var rd bytes.Reader for b.Loop() { rd.Reset(vmlinux) spec, err := LoadSpecFromReader(&rd) if err != nil { b.Fatal(err) } var s *Struct for _, name := range types { if err := spec.TypeByName(name, &s); err != nil { b.Fatal(name, err) } } } } ================================================ FILE: btf/btf_types.go ================================================ package btf import ( "encoding/binary" "errors" "fmt" "unsafe" ) //go:generate go tool stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind // btfKind describes a Type. type btfKind uint8 // Equivalents of the BTF_KIND_* constants. const ( kindUnknown btfKind = iota // Unknown kindInt // Int kindPointer // Pointer kindArray // Array kindStruct // Struct kindUnion // Union kindEnum // Enum kindForward // Forward kindTypedef // Typedef kindVolatile // Volatile kindConst // Const kindRestrict // Restrict // Added ~4.20 kindFunc // Func kindFuncProto // FuncProto // Added ~5.1 kindVar // Var kindDatasec // Datasec // Added ~5.13 kindFloat // Float // Added 5.16 kindDeclTag // DeclTag // Added 5.17 kindTypeTag // TypeTag // Added 6.0 kindEnum64 // Enum64 ) // FuncLinkage describes BTF function linkage metadata. type FuncLinkage int // Equivalent of enum btf_func_linkage. const ( StaticFunc FuncLinkage = iota // static GlobalFunc // global ExternFunc // extern ) // VarLinkage describes BTF variable linkage metadata. type VarLinkage int const ( StaticVar VarLinkage = iota // static GlobalVar // global ExternVar // extern ) const ( btfTypeKindShift = 24 btfTypeKindLen = 5 btfTypeVlenShift = 0 btfTypeVlenMask = 16 btfTypeKindFlagShift = 31 btfTypeKindFlagMask = 1 ) var btfHeaderLen = binary.Size(&btfHeader{}) type btfHeader struct { Magic uint16 Version uint8 Flags uint8 HdrLen uint32 TypeOff uint32 TypeLen uint32 StringOff uint32 StringLen uint32 } // parseBTFHeader parses the header of the .BTF section. func parseBTFHeader(buf []byte) (*btfHeader, binary.ByteOrder, error) { var header btfHeader var bo binary.ByteOrder for _, order := range []binary.ByteOrder{binary.LittleEndian, binary.BigEndian} { n, err := binary.Decode(buf, order, &header) if err != nil { return nil, nil, fmt.Errorf("read header: %v", err) } if header.Magic != btfMagic { continue } buf = buf[n:] bo = order break } if bo == nil { return nil, nil, fmt.Errorf("no valid BTF header") } if header.Version != 1 { return nil, nil, fmt.Errorf("unexpected version %v", header.Version) } if header.Flags != 0 { return nil, nil, fmt.Errorf("unsupported flags %v", header.Flags) } remainder := int64(header.HdrLen) - int64(binary.Size(&header)) if remainder < 0 { return nil, nil, errors.New("header length shorter than btfHeader size") } for _, b := range buf[:remainder] { if b != 0 { return nil, nil, errors.New("header contains non-zero trailer") } } return &header, bo, nil } // btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst. type btfType struct { NameOff uint32 /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members), linkage * bits 16-23: unused * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ Info uint32 /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ SizeType uint32 } var btfTypeSize = int(unsafe.Sizeof(btfType{})) func unmarshalBtfType(bt *btfType, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfTypeSize { return 0, fmt.Errorf("not enough bytes to unmarshal btfType") } bt.NameOff = bo.Uint32(b[0:]) bt.Info = bo.Uint32(b[4:]) bt.SizeType = bo.Uint32(b[8:]) return btfTypeSize, nil } func mask(len uint32) uint32 { return (1 << len) - 1 } func readBits(value, len, shift uint32) uint32 { return (value >> shift) & mask(len) } func writeBits(value, len, shift, new uint32) uint32 { value &^= mask(len) << shift value |= (new & mask(len)) << shift return value } func (bt *btfType) info(len, shift uint32) uint32 { return readBits(bt.Info, len, shift) } func (bt *btfType) setInfo(value, len, shift uint32) { bt.Info = writeBits(bt.Info, len, shift, value) } func (bt *btfType) Kind() btfKind { return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift)) } func (bt *btfType) SetKind(kind btfKind) { bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift) } func (bt *btfType) Vlen() int { return int(bt.info(btfTypeVlenMask, btfTypeVlenShift)) } func (bt *btfType) SetVlen(vlen int) { bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift) } func (bt *btfType) kindFlagBool() bool { return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1 } func (bt *btfType) setKindFlagBool(set bool) { var value uint32 if set { value = 1 } bt.setInfo(value, btfTypeKindFlagMask, btfTypeKindFlagShift) } // Bitfield returns true if the struct or union contain a bitfield. func (bt *btfType) Bitfield() bool { return bt.kindFlagBool() } func (bt *btfType) SetBitfield(isBitfield bool) { bt.setKindFlagBool(isBitfield) } func (bt *btfType) FwdKind() FwdKind { return FwdKind(bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift)) } func (bt *btfType) SetFwdKind(kind FwdKind) { bt.setInfo(uint32(kind), btfTypeKindFlagMask, btfTypeKindFlagShift) } func (bt *btfType) Signed() bool { return bt.kindFlagBool() } func (bt *btfType) SetSigned(signed bool) { bt.setKindFlagBool(signed) } func (bt *btfType) Linkage() FuncLinkage { return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift)) } func (bt *btfType) SetLinkage(linkage FuncLinkage) { bt.setInfo(uint32(linkage), btfTypeVlenMask, btfTypeVlenShift) } func (bt *btfType) Type() TypeID { // TODO: Panic here if wrong kind? return TypeID(bt.SizeType) } func (bt *btfType) SetType(id TypeID) { bt.SizeType = uint32(id) } func (bt *btfType) Size() uint32 { // TODO: Panic here if wrong kind? return bt.SizeType } func (bt *btfType) SetSize(size uint32) { bt.SizeType = size } func (bt *btfType) Encode(buf []byte, bo binary.ByteOrder) (int, error) { if len(buf) < btfTypeSize { return 0, fmt.Errorf("not enough bytes to marshal btfType") } bo.PutUint32(buf[0:], bt.NameOff) bo.PutUint32(buf[4:], bt.Info) bo.PutUint32(buf[8:], bt.SizeType) return btfTypeSize, nil } // DataLen returns the length of additional type specific data in bytes. func (bt *btfType) DataLen() (int, error) { switch bt.Kind() { case kindInt: return int(unsafe.Sizeof(btfInt{})), nil case kindPointer: case kindArray: return int(unsafe.Sizeof(btfArray{})), nil case kindStruct: fallthrough case kindUnion: return int(unsafe.Sizeof(btfMember{})) * bt.Vlen(), nil case kindEnum: return int(unsafe.Sizeof(btfEnum{})) * bt.Vlen(), nil case kindForward: case kindTypedef: case kindVolatile: case kindConst: case kindRestrict: case kindFunc: case kindFuncProto: return int(unsafe.Sizeof(btfParam{})) * bt.Vlen(), nil case kindVar: return int(unsafe.Sizeof(btfVariable{})), nil case kindDatasec: return int(unsafe.Sizeof(btfVarSecinfo{})) * bt.Vlen(), nil case kindFloat: case kindDeclTag: return int(unsafe.Sizeof(btfDeclTag{})), nil case kindTypeTag: case kindEnum64: return int(unsafe.Sizeof(btfEnum64{})) * bt.Vlen(), nil default: return 0, fmt.Errorf("unknown kind: %v", bt.Kind()) } return 0, nil } // btfInt encodes additional data for integers. // // ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b // ? = undefined // e = encoding // o = offset (bitfields?) // b = bits (bitfields) type btfInt struct { Raw uint32 } const ( btfIntEncodingLen = 4 btfIntEncodingShift = 24 btfIntOffsetLen = 8 btfIntOffsetShift = 16 btfIntBitsLen = 8 btfIntBitsShift = 0 ) var btfIntLen = int(unsafe.Sizeof(btfInt{})) func unmarshalBtfInt(bi *btfInt, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfIntLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfInt") } bi.Raw = bo.Uint32(b[0:]) return btfIntLen, nil } func (bi btfInt) Encoding() IntEncoding { return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift)) } func (bi *btfInt) SetEncoding(e IntEncoding) { bi.Raw = writeBits(uint32(bi.Raw), btfIntEncodingLen, btfIntEncodingShift, uint32(e)) } func (bi btfInt) Offset() Bits { return Bits(readBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift)) } func (bi *btfInt) SetOffset(offset uint32) { bi.Raw = writeBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift, offset) } func (bi btfInt) Bits() Bits { return Bits(readBits(bi.Raw, btfIntBitsLen, btfIntBitsShift)) } func (bi *btfInt) SetBits(bits byte) { bi.Raw = writeBits(bi.Raw, btfIntBitsLen, btfIntBitsShift, uint32(bits)) } type btfArray struct { Type TypeID IndexType TypeID Nelems uint32 } var btfArrayLen = int(unsafe.Sizeof(btfArray{})) func unmarshalBtfArray(ba *btfArray, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfArrayLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfArray") } ba.Type = TypeID(bo.Uint32(b[0:])) ba.IndexType = TypeID(bo.Uint32(b[4:])) ba.Nelems = bo.Uint32(b[8:]) return btfArrayLen, nil } type btfMember struct { NameOff uint32 Type TypeID Offset uint32 } var btfMemberLen = int(unsafe.Sizeof(btfMember{})) func unmarshalBtfMember(bm *btfMember, b []byte, bo binary.ByteOrder) (int, error) { if btfMemberLen > len(b) { return 0, fmt.Errorf("not enough bytes to unmarshal btfMember") } bm.NameOff = bo.Uint32(b[0:]) bm.Type = TypeID(bo.Uint32(b[4:])) bm.Offset = bo.Uint32(b[8:]) return btfMemberLen, nil } type btfVarSecinfo struct { Type TypeID Offset uint32 Size uint32 } var btfVarSecinfoLen = int(unsafe.Sizeof(btfVarSecinfo{})) func unmarshalBtfVarSecInfo(bvsi *btfVarSecinfo, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfVarSecinfoLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfVarSecinfo") } bvsi.Type = TypeID(bo.Uint32(b[0:])) bvsi.Offset = bo.Uint32(b[4:]) bvsi.Size = bo.Uint32(b[8:]) return btfVarSecinfoLen, nil } type btfVariable struct { Linkage uint32 } var btfVariableLen = int(unsafe.Sizeof(btfVariable{})) func unmarshalBtfVariable(bv *btfVariable, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfVariableLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfVariable") } bv.Linkage = bo.Uint32(b[0:]) return btfVariableLen, nil } type btfEnum struct { NameOff uint32 Val uint32 } var btfEnumLen = int(unsafe.Sizeof(btfEnum{})) func unmarshalBtfEnum(be *btfEnum, b []byte, bo binary.ByteOrder) (int, error) { if btfEnumLen > len(b) { return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum") } be.NameOff = bo.Uint32(b[0:]) be.Val = bo.Uint32(b[4:]) return btfEnumLen, nil } type btfEnum64 struct { NameOff uint32 ValLo32 uint32 ValHi32 uint32 } var btfEnum64Len = int(unsafe.Sizeof(btfEnum64{})) func unmarshalBtfEnum64(enum *btfEnum64, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfEnum64Len { return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum64") } enum.NameOff = bo.Uint32(b[0:]) enum.ValLo32 = bo.Uint32(b[4:]) enum.ValHi32 = bo.Uint32(b[8:]) return btfEnum64Len, nil } type btfParam struct { NameOff uint32 Type TypeID } var btfParamLen = int(unsafe.Sizeof(btfParam{})) func unmarshalBtfParam(param *btfParam, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfParamLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfParam") } param.NameOff = bo.Uint32(b[0:]) param.Type = TypeID(bo.Uint32(b[4:])) return btfParamLen, nil } type btfDeclTag struct { ComponentIdx uint32 } var btfDeclTagLen = int(unsafe.Sizeof(btfDeclTag{})) func unmarshalBtfDeclTag(bdt *btfDeclTag, b []byte, bo binary.ByteOrder) (int, error) { if len(b) < btfDeclTagLen { return 0, fmt.Errorf("not enough bytes to unmarshal btfDeclTag") } bdt.ComponentIdx = bo.Uint32(b[0:]) return btfDeclTagLen, nil } ================================================ FILE: btf/btf_types_string.go ================================================ // Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind"; DO NOT EDIT. package btf import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[StaticFunc-0] _ = x[GlobalFunc-1] _ = x[ExternFunc-2] } const _FuncLinkage_name = "staticglobalextern" var _FuncLinkage_index = [...]uint8{0, 6, 12, 18} func (i FuncLinkage) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_FuncLinkage_index)-1 { return "FuncLinkage(" + strconv.FormatInt(int64(i), 10) + ")" } return _FuncLinkage_name[_FuncLinkage_index[idx]:_FuncLinkage_index[idx+1]] } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[StaticVar-0] _ = x[GlobalVar-1] _ = x[ExternVar-2] } const _VarLinkage_name = "staticglobalextern" var _VarLinkage_index = [...]uint8{0, 6, 12, 18} func (i VarLinkage) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_VarLinkage_index)-1 { return "VarLinkage(" + strconv.FormatInt(int64(i), 10) + ")" } return _VarLinkage_name[_VarLinkage_index[idx]:_VarLinkage_index[idx+1]] } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[kindUnknown-0] _ = x[kindInt-1] _ = x[kindPointer-2] _ = x[kindArray-3] _ = x[kindStruct-4] _ = x[kindUnion-5] _ = x[kindEnum-6] _ = x[kindForward-7] _ = x[kindTypedef-8] _ = x[kindVolatile-9] _ = x[kindConst-10] _ = x[kindRestrict-11] _ = x[kindFunc-12] _ = x[kindFuncProto-13] _ = x[kindVar-14] _ = x[kindDatasec-15] _ = x[kindFloat-16] _ = x[kindDeclTag-17] _ = x[kindTypeTag-18] _ = x[kindEnum64-19] } const _btfKind_name = "UnknownIntPointerArrayStructUnionEnumForwardTypedefVolatileConstRestrictFuncFuncProtoVarDatasecFloatDeclTagTypeTagEnum64" var _btfKind_index = [...]uint8{0, 7, 10, 17, 22, 28, 33, 37, 44, 51, 59, 64, 72, 76, 85, 88, 95, 100, 107, 114, 120} func (i btfKind) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_btfKind_index)-1 { return "btfKind(" + strconv.FormatInt(int64(i), 10) + ")" } return _btfKind_name[_btfKind_index[idx]:_btfKind_index[idx+1]] } ================================================ FILE: btf/core.go ================================================ package btf import ( "encoding/binary" "errors" "fmt" "math" "reflect" "strconv" "strings" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" ) // Code in this file is derived from libbpf, which is available under a BSD // 2-Clause license. // A constant used when CO-RE relocation has to remove instructions. // // Taken from libbpf. const COREBadRelocationSentinel = 0xbad2310 // COREFixup is the result of computing a CO-RE relocation for a target. type COREFixup struct { kind coreKind local uint64 target uint64 // True if there is no valid fixup. The instruction is replaced with an // invalid dummy. poison bool // True if the validation of the local value should be skipped. Used by // some kinds of bitfield relocations. skipLocalValidation bool } func (f *COREFixup) equal(other COREFixup) bool { return f.local == other.local && f.target == other.target } func (f *COREFixup) String() string { if f.poison { return fmt.Sprintf("%s=poison", f.kind) } return fmt.Sprintf("%s=%d->%d", f.kind, f.local, f.target) } func (f *COREFixup) Apply(ins *asm.Instruction) error { if !platform.IsLinux { return fmt.Errorf("CO-RE fixup: %w", internal.ErrNotSupportedOnOS) } if f.poison { // Relocation is poisoned, replace the instruction with an invalid one. if ins.OpCode.IsDWordLoad() { // Replace a dword load with a invalid dword load to preserve instruction size. *ins = asm.LoadImm(asm.R10, COREBadRelocationSentinel, asm.DWord) } else { // Replace all single size instruction with a invalid call instruction. *ins = asm.BuiltinFunc(COREBadRelocationSentinel).Call() } // Add context to the kernel verifier output. if source := ins.Source(); source != nil { *ins = ins.WithSource(asm.Comment(fmt.Sprintf("instruction poisoned by CO-RE: %s", source))) } else { *ins = ins.WithSource(asm.Comment("instruction poisoned by CO-RE")) } return nil } switch class := ins.OpCode.Class(); class { case asm.LdXClass, asm.StClass, asm.StXClass: if want := int16(f.local); !f.skipLocalValidation && want != ins.Offset { return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, f.local) } if f.target > math.MaxInt16 { return fmt.Errorf("offset %d exceeds MaxInt16", f.target) } ins.Offset = int16(f.target) case asm.LdClass: if !ins.IsConstantLoad(asm.DWord) { return fmt.Errorf("not a dword-sized immediate load") } if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant { return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v)", ins.Constant, want, f) } ins.Constant = int64(f.target) case asm.ALUClass: if ins.OpCode.ALUOp() == asm.Swap { return fmt.Errorf("relocation against swap") } fallthrough case asm.ALU64Class: if src := ins.OpCode.Source(); src != asm.ImmSource { return fmt.Errorf("invalid source %s", src) } if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant { return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v, kind: %v, ins: %v)", ins.Constant, want, f, f.kind, ins) } if f.target > math.MaxInt32 { return fmt.Errorf("immediate %d exceeds MaxInt32", f.target) } ins.Constant = int64(f.target) default: return fmt.Errorf("invalid class %s", class) } return nil } func (f COREFixup) isNonExistant() bool { return f.kind.checksForExistence() && f.target == 0 } // coreKind is the type of CO-RE relocation as specified in BPF source code. type coreKind uint32 const ( reloFieldByteOffset coreKind = iota /* field byte offset */ reloFieldByteSize /* field size in bytes */ reloFieldExists /* field existence in target kernel */ reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */ reloFieldLShiftU64 /* bitfield-specific left bitshift */ reloFieldRShiftU64 /* bitfield-specific right bitshift */ reloTypeIDLocal /* type ID in local BPF object */ reloTypeIDTarget /* type ID in target kernel */ reloTypeExists /* type existence in target kernel */ reloTypeSize /* type size in bytes */ reloEnumvalExists /* enum value existence in target kernel */ reloEnumvalValue /* enum value integer value */ reloTypeMatches /* type matches kernel type */ ) func (k coreKind) checksForExistence() bool { return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists || k == reloTypeMatches } func (k coreKind) String() string { switch k { case reloFieldByteOffset: return "byte_off" case reloFieldByteSize: return "byte_sz" case reloFieldExists: return "field_exists" case reloFieldSigned: return "signed" case reloFieldLShiftU64: return "lshift_u64" case reloFieldRShiftU64: return "rshift_u64" case reloTypeIDLocal: return "local_type_id" case reloTypeIDTarget: return "target_type_id" case reloTypeExists: return "type_exists" case reloTypeSize: return "type_size" case reloEnumvalExists: return "enumval_exists" case reloEnumvalValue: return "enumval_value" case reloTypeMatches: return "type_matches" default: return fmt.Sprintf("unknown (%d)", k) } } // CORERelocate calculates changes needed to adjust eBPF instructions for differences // in types. // // targets forms the set of types to relocate against. The first element has to be // BTF for vmlinux, the following must be types for kernel modules. // // resolveLocalTypeID is called for each local type which requires a stable TypeID. // Calling the function with the same type multiple times must produce the same // result. It is the callers responsibility to ensure that the relocated instructions // are loaded with matching BTF. // // Returns a list of fixups which can be applied to instructions to make them // match the target type(s). // // Fixups are returned in the order of relos, e.g. fixup[i] is the solution // for relos[i]. func CORERelocate(relos []*CORERelocation, targets []*Spec, bo binary.ByteOrder, resolveLocalTypeID func(Type) (TypeID, error)) ([]COREFixup, error) { if len(targets) == 0 { // Explicitly check for nil here since the argument used to be optional. return nil, fmt.Errorf("targets must be provided") } // We can't encode type IDs that aren't for vmlinux into instructions at the // moment. resolveTargetTypeID := targets[0].TypeID for _, target := range targets { if bo != target.byteOrder { return nil, fmt.Errorf("can't relocate %s against %s", bo, target.byteOrder) } } type reloGroup struct { relos []*CORERelocation // Position of each relocation in relos. indices []int } // Split relocations into per Type lists. relosByType := make(map[Type]*reloGroup) result := make([]COREFixup, len(relos)) for i, relo := range relos { if relo.kind == reloTypeIDLocal { // Filtering out reloTypeIDLocal here makes our lives a lot easier // down the line, since it doesn't have a target at all. if len(relo.accessor) > 1 || relo.accessor[0] != 0 { return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) } id, err := resolveLocalTypeID(relo.typ) if err != nil { return nil, fmt.Errorf("%s: get type id: %w", relo.kind, err) } result[i] = COREFixup{ kind: relo.kind, local: uint64(relo.id), target: uint64(id), } continue } group, ok := relosByType[relo.typ] if !ok { group = &reloGroup{} relosByType[relo.typ] = group } group.relos = append(group.relos, relo) group.indices = append(group.indices, i) } for localType, group := range relosByType { localTypeName := localType.TypeName() if localTypeName == "" { return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported) } essentialName := newEssentialName(localTypeName) var targetTypes []Type for _, target := range targets { namedTypes, err := target.TypesByName(essentialName) if errors.Is(err, ErrNotFound) { continue } else if err != nil { return nil, err } targetTypes = append(targetTypes, namedTypes...) } fixups, err := coreCalculateFixups(group.relos, targetTypes, bo, resolveTargetTypeID) if err != nil { return nil, fmt.Errorf("relocate %s: %w", localType, err) } for j, index := range group.indices { result[index] = fixups[j] } } return result, nil } var errAmbiguousRelocation = errors.New("ambiguous relocation") var errImpossibleRelocation = errors.New("impossible relocation") var errIncompatibleTypes = errors.New("incompatible types") // coreCalculateFixups finds the target type that best matches all relocations. // // All relos must target the same type. // // The best target is determined by scoring: the less poisoning we have to do // the better the target is. func coreCalculateFixups(relos []*CORERelocation, targets []Type, bo binary.ByteOrder, resolveTargetTypeID func(Type) (TypeID, error)) ([]COREFixup, error) { bestScore := len(relos) var bestFixups []COREFixup for _, target := range targets { score := 0 // lower is better fixups := make([]COREFixup, 0, len(relos)) for _, relo := range relos { fixup, err := coreCalculateFixup(relo, target, bo, resolveTargetTypeID) if err != nil { return nil, fmt.Errorf("target %s: %s: %w", target, relo.kind, err) } if fixup.poison || fixup.isNonExistant() { score++ } fixups = append(fixups, fixup) } if score > bestScore { // We have a better target already, ignore this one. continue } if score < bestScore { // This is the best target yet, use it. bestScore = score bestFixups = fixups continue } // Some other target has the same score as the current one. Make sure // the fixups agree with each other. for i, fixup := range bestFixups { if !fixup.equal(fixups[i]) { return nil, fmt.Errorf("%s: multiple types match: %w", fixup.kind, errAmbiguousRelocation) } } } if bestFixups == nil { // Nothing at all matched, probably because there are no suitable // targets at all. // // Poison everything except checksForExistence. bestFixups = make([]COREFixup, len(relos)) for i, relo := range relos { if relo.kind.checksForExistence() { bestFixups[i] = COREFixup{kind: relo.kind, local: 1, target: 0} } else { bestFixups[i] = COREFixup{kind: relo.kind, poison: true} } } } return bestFixups, nil } var errNoSignedness = errors.New("no signedness") // coreCalculateFixup calculates the fixup given a relocation and a target type. func coreCalculateFixup(relo *CORERelocation, target Type, bo binary.ByteOrder, resolveTargetTypeID func(Type) (TypeID, error)) (COREFixup, error) { fixup := func(local, target uint64) (COREFixup, error) { return COREFixup{kind: relo.kind, local: local, target: target}, nil } fixupWithoutValidation := func(local, target uint64) (COREFixup, error) { return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil } poison := func() (COREFixup, error) { if relo.kind.checksForExistence() { return fixup(1, 0) } return COREFixup{kind: relo.kind, poison: true}, nil } zero := COREFixup{} local := relo.typ switch relo.kind { case reloTypeMatches: if len(relo.accessor) > 1 || relo.accessor[0] != 0 { return zero, fmt.Errorf("unexpected accessor %v", relo.accessor) } err := coreTypesMatch(local, target, nil) if errors.Is(err, errIncompatibleTypes) { return poison() } if err != nil { return zero, err } return fixup(1, 1) case reloTypeIDTarget, reloTypeSize, reloTypeExists: if len(relo.accessor) > 1 || relo.accessor[0] != 0 { return zero, fmt.Errorf("unexpected accessor %v", relo.accessor) } err := CheckTypeCompatibility(local, target) if errors.Is(err, errIncompatibleTypes) { return poison() } if err != nil { return zero, err } switch relo.kind { case reloTypeExists: return fixup(1, 1) case reloTypeIDTarget: targetID, err := resolveTargetTypeID(target) if errors.Is(err, ErrNotFound) { // Probably a relocation trying to get the ID // of a type from a kmod. return poison() } if err != nil { return zero, err } return fixup(uint64(relo.id), uint64(targetID)) case reloTypeSize: localSize, err := Sizeof(local) if err != nil { return zero, err } targetSize, err := Sizeof(target) if err != nil { return zero, err } return fixup(uint64(localSize), uint64(targetSize)) } case reloEnumvalValue, reloEnumvalExists: localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target) if errors.Is(err, errImpossibleRelocation) { return poison() } if err != nil { return zero, err } switch relo.kind { case reloEnumvalExists: return fixup(1, 1) case reloEnumvalValue: return fixup(localValue.Value, targetValue.Value) } case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64, reloFieldSigned: if _, ok := As[*Fwd](target); ok { // We can't relocate fields using a forward declaration, so // skip it. If a non-forward declaration is present in the BTF // we'll find it in one of the other iterations. return poison() } localField, targetField, err := coreFindField(local, relo.accessor, target) if errors.Is(err, errImpossibleRelocation) { return poison() } if err != nil { return zero, err } maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) { f.skipLocalValidation = localField.bitfieldSize > 0 return f, err } switch relo.kind { case reloFieldExists: return fixup(1, 1) case reloFieldByteOffset: return maybeSkipValidation(fixup(uint64(localField.offset), uint64(targetField.offset))) case reloFieldByteSize: localSize, err := Sizeof(localField.Type) if err != nil { return zero, err } targetSize, err := Sizeof(targetField.Type) if err != nil { return zero, err } return maybeSkipValidation(fixup(uint64(localSize), uint64(targetSize))) case reloFieldLShiftU64: var target uint64 if bo == binary.LittleEndian { targetSize, err := targetField.sizeBits() if err != nil { return zero, err } target = uint64(64 - targetField.bitfieldOffset - targetSize) } else { loadWidth, err := Sizeof(targetField.Type) if err != nil { return zero, err } target = uint64(64 - Bits(loadWidth*8) + targetField.bitfieldOffset) } return fixupWithoutValidation(0, target) case reloFieldRShiftU64: targetSize, err := targetField.sizeBits() if err != nil { return zero, err } return fixupWithoutValidation(0, uint64(64-targetSize)) case reloFieldSigned: switch local := UnderlyingType(localField.Type).(type) { case *Enum: target, ok := As[*Enum](targetField.Type) if !ok { return zero, fmt.Errorf("target isn't *Enum but %T", targetField.Type) } return fixup(boolToUint64(local.Signed), boolToUint64(target.Signed)) case *Int: target, ok := As[*Int](targetField.Type) if !ok { return zero, fmt.Errorf("target isn't *Int but %T", targetField.Type) } return fixup( uint64(local.Encoding&Signed), uint64(target.Encoding&Signed), ) default: return zero, fmt.Errorf("type %T: %w", local, errNoSignedness) } } } return zero, ErrNotSupported } func boolToUint64(val bool) uint64 { if val { return 1 } return 0 } /* coreAccessor contains a path through a struct. It contains at least one index. * * The interpretation depends on the kind of the relocation. The following is * taken from struct bpf_core_relo in libbpf_internal.h: * * - for field-based relocations, string encodes an accessed field using * a sequence of field and array indices, separated by colon (:). It's * conceptually very close to LLVM's getelementptr ([0]) instruction's * arguments for identifying offset to a field. * - for type-based relocations, strings is expected to be just "0"; * - for enum value-based relocations, string contains an index of enum * value within its enum type; * * Example to provide a better feel. * * struct sample { * int a; * struct { * int b[10]; * }; * }; * * struct sample s = ...; * int x = &s->a; // encoded as "0:0" (a is field #0) * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, * // b is field #0 inside anon struct, accessing elem #5) * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) */ type coreAccessor []int func parseCOREAccessor(accessor string) (coreAccessor, error) { if accessor == "" { return nil, fmt.Errorf("empty accessor") } parts := strings.Split(accessor, ":") result := make(coreAccessor, 0, len(parts)) for _, part := range parts { // 31 bits to avoid overflowing int on 32 bit platforms. index, err := strconv.ParseUint(part, 10, 31) if err != nil { return nil, fmt.Errorf("accessor index %q: %s", part, err) } result = append(result, int(index)) } return result, nil } func (ca coreAccessor) String() string { strs := make([]string, 0, len(ca)) for _, i := range ca { strs = append(strs, strconv.Itoa(i)) } return strings.Join(strs, ":") } func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) { e, ok := As[*Enum](t) if !ok { return nil, fmt.Errorf("not an enum: %s", t) } if len(ca) > 1 { return nil, fmt.Errorf("invalid accessor %s for enum", ca) } i := ca[0] if i >= len(e.Values) { return nil, fmt.Errorf("invalid index %d for %s", i, e) } return &e.Values[i], nil } // coreField represents the position of a "child" of a composite type from the // start of that type. // // /- start of composite // | offset * 8 | bitfieldOffset | bitfieldSize | ... | // \- start of field end of field -/ type coreField struct { Type Type // The position of the field from the start of the composite type in bytes. offset uint32 // The offset of the bitfield in bits from the start of the field. bitfieldOffset Bits // The size of the bitfield in bits. // // Zero if the field is not a bitfield. bitfieldSize Bits } func (cf *coreField) adjustOffsetToNthElement(n int) error { if n == 0 { return nil } size, err := Sizeof(cf.Type) if err != nil { return err } cf.offset += uint32(n) * uint32(size) return nil } func (cf *coreField) adjustOffsetBits(offset Bits) error { align, err := alignof(cf.Type) if err != nil { return err } // We can compute the load offset by: // 1) converting the bit offset to bytes with a flooring division. // 2) dividing and multiplying that offset by the alignment, yielding the // load size aligned offset. offsetBytes := uint32(offset/8) / uint32(align) * uint32(align) // The number of bits remaining is the bit offset less the number of bits // we can "skip" with the aligned offset. cf.bitfieldOffset = offset - Bits(offsetBytes*8) // We know that cf.offset is aligned at to at least align since we get it // from the compiler via BTF. Adding an aligned offsetBytes preserves the // alignment. cf.offset += offsetBytes return nil } func (cf *coreField) sizeBits() (Bits, error) { if cf.bitfieldSize > 0 { return cf.bitfieldSize, nil } // Someone is trying to access a non-bitfield via a bit shift relocation. // This happens when a field changes from a bitfield to a regular field // between kernel versions. Synthesise the size to make the shifts work. size, err := Sizeof(cf.Type) if err != nil { return 0, err } return Bits(size * 8), nil } // coreFindField descends into the local type using the accessor and tries to // find an equivalent field in target at each step. // // Returns the field and the offset of the field from the start of // target in bits. func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, coreField, error) { local := coreField{Type: localT} target := coreField{Type: targetT} if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { return coreField{}, coreField{}, fmt.Errorf("fields: %w", err) } // The first index is used to offset a pointer of the base type like // when accessing an array. if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil { return coreField{}, coreField{}, err } if err := target.adjustOffsetToNthElement(localAcc[0]); err != nil { return coreField{}, coreField{}, err } var localMaybeFlex, targetMaybeFlex bool for i, acc := range localAcc[1:] { switch localType := UnderlyingType(local.Type).(type) { case composite: // For composite types acc is used to find the field in the local type, // and then we try to find a field in target with the same name. localMembers := localType.members() if acc >= len(localMembers) { return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, localType) } localMember := localMembers[acc] if localMember.Name == "" { localMemberType, ok := As[composite](localMember.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported) } // This is an anonymous struct or union, ignore it. local = coreField{ Type: localMemberType, offset: local.offset + localMember.Offset.Bytes(), } localMaybeFlex = false continue } targetType, ok := As[composite](target.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation) } targetMember, last, err := coreFindMember(targetType, localMember.Name) if err != nil { return coreField{}, coreField{}, err } local = coreField{ Type: localMember.Type, offset: local.offset, bitfieldSize: localMember.BitfieldSize, } localMaybeFlex = acc == len(localMembers)-1 target = coreField{ Type: targetMember.Type, offset: target.offset, bitfieldSize: targetMember.BitfieldSize, } targetMaybeFlex = last if local.bitfieldSize == 0 && target.bitfieldSize == 0 { local.offset += localMember.Offset.Bytes() target.offset += targetMember.Offset.Bytes() break } // Either of the members is a bitfield. Make sure we're at the // end of the accessor. if next := i + 1; next < len(localAcc[1:]) { return coreField{}, coreField{}, fmt.Errorf("can't descend into bitfield") } if err := local.adjustOffsetBits(localMember.Offset); err != nil { return coreField{}, coreField{}, err } if err := target.adjustOffsetBits(targetMember.Offset); err != nil { return coreField{}, coreField{}, err } case *Array: // For arrays, acc is the index in the target. targetType, ok := As[*Array](target.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation) } if localType.Nelems == 0 && !localMaybeFlex { return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array") } if targetType.Nelems == 0 && !targetMaybeFlex { return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array") } if localType.Nelems > 0 && acc >= int(localType.Nelems) { return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc) } if targetType.Nelems > 0 && acc >= int(targetType.Nelems) { return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation) } local = coreField{ Type: localType.Type, offset: local.offset, } localMaybeFlex = false if err := local.adjustOffsetToNthElement(acc); err != nil { return coreField{}, coreField{}, err } target = coreField{ Type: targetType.Type, offset: target.offset, } targetMaybeFlex = false if err := target.adjustOffsetToNthElement(acc); err != nil { return coreField{}, coreField{}, err } default: return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported) } if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { return coreField{}, coreField{}, err } } return local, target, nil } // coreFindMember finds a member in a composite type while handling anonymous // structs and unions. func coreFindMember(typ composite, name string) (Member, bool, error) { if name == "" { return Member{}, false, errors.New("can't search for anonymous member") } type offsetTarget struct { composite offset Bits } targets := []offsetTarget{{typ, 0}} visited := make(map[composite]bool) for i := 0; i < len(targets); i++ { target := targets[i] // Only visit targets once to prevent infinite recursion. if visited[target] { continue } if len(visited) >= maxResolveDepth { // This check is different than libbpf, which restricts the entire // path to BPF_CORE_SPEC_MAX_LEN items. return Member{}, false, fmt.Errorf("type is nested too deep") } visited[target] = true members := target.members() for j, member := range members { if member.Name == name { // NB: This is safe because member is a copy. member.Offset += target.offset return member, j == len(members)-1, nil } // The names don't match, but this member could be an anonymous struct // or union. if member.Name != "" { continue } comp, ok := As[composite](member.Type) if !ok { return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type) } targets = append(targets, offsetTarget{comp, target.offset + member.Offset}) } } return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation) } // coreFindEnumValue follows localAcc to find the equivalent enum value in target. func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) { localValue, err := localAcc.enumValue(local) if err != nil { return nil, nil, err } targetEnum, ok := As[*Enum](target) if !ok { return nil, nil, errImpossibleRelocation } localName := newEssentialName(localValue.Name) for i, targetValue := range targetEnum.Values { if newEssentialName(targetValue.Name) != localName { continue } return localValue, &targetEnum.Values[i], nil } return nil, nil, errImpossibleRelocation } // CheckTypeCompatibility checks local and target types for Compatibility according to CO-RE rules. // // Only layout compatibility is checked, ignoring names of the root type. func CheckTypeCompatibility(localType Type, targetType Type) error { return coreAreTypesCompatible(localType, targetType, nil) } type pair struct { A, B Type } /* The comment below is from bpf_core_types_are_compat in libbpf.c: * * Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already * checked for name match. Beyond that initial root-level name check, names * are completely ignored. Compatibility rules are as follows: * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but * kind should match for local and target types (i.e., STRUCT is not * compatible with UNION); * - for ENUMs, the size is ignored; * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; * - CONST/VOLATILE/RESTRICT modifiers are ignored; * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; * - FUNC_PROTOs are compatible if they have compatible signature: same * number of input args and compatible return and argument types. * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. * * Returns errIncompatibleTypes if types are not compatible. */ func coreAreTypesCompatible(localType Type, targetType Type, visited map[pair]struct{}) error { localType = UnderlyingType(localType) targetType = UnderlyingType(targetType) if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { return fmt.Errorf("type mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) } if _, ok := visited[pair{localType, targetType}]; ok { return nil } if visited == nil { visited = make(map[pair]struct{}) } visited[pair{localType, targetType}] = struct{}{} switch lv := localType.(type) { case *Void, *Struct, *Union, *Enum, *Fwd, *Int: return nil case *Pointer: tv := targetType.(*Pointer) return coreAreTypesCompatible(lv.Target, tv.Target, visited) case *Array: tv := targetType.(*Array) if err := coreAreTypesCompatible(lv.Index, tv.Index, visited); err != nil { return err } return coreAreTypesCompatible(lv.Type, tv.Type, visited) case *FuncProto: tv := targetType.(*FuncProto) if err := coreAreTypesCompatible(lv.Return, tv.Return, visited); err != nil { return err } if len(lv.Params) != len(tv.Params) { return fmt.Errorf("function param mismatch: %w", errIncompatibleTypes) } for i, localParam := range lv.Params { targetParam := tv.Params[i] if err := coreAreTypesCompatible(localParam.Type, targetParam.Type, visited); err != nil { return err } } return nil default: return fmt.Errorf("unsupported type %T", localType) } } /* coreAreMembersCompatible checks two types for field-based relocation compatibility. * * The comment below is from bpf_core_fields_are_compat in libbpf.c: * * Check two types for compatibility for the purpose of field access * relocation. const/volatile/restrict and typedefs are skipped to ensure we * are relocating semantically compatible entities: * - any two STRUCTs/UNIONs are compatible and can be mixed; * - any two FWDs are compatible, if their names match (modulo flavor suffix); * - any two PTRs are always compatible; * - for ENUMs, names should be the same (ignoring flavor suffix) or at * least one of enums should be anonymous; * - for ENUMs, check sizes, names are ignored; * - for INT, size and signedness are ignored; * - any two FLOATs are always compatible; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; * [ NB: coreAreMembersCompatible doesn't recurse, this check is done * by coreFindField. ] * - everything else shouldn't be ever a target of relocation. * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. * * Returns errImpossibleRelocation if the members are not compatible. */ func coreAreMembersCompatible(localType Type, targetType Type) error { localType = UnderlyingType(localType) targetType = UnderlyingType(targetType) _, lok := localType.(composite) _, tok := targetType.(composite) if lok && tok { return nil } if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) } switch lv := localType.(type) { case *Array, *Pointer, *Float, *Int: return nil case *Enum: tv := targetType.(*Enum) if !coreEssentialNamesMatch(lv.Name, tv.Name) { return fmt.Errorf("names %q and %q don't match: %w", lv.Name, tv.Name, errImpossibleRelocation) } return nil case *Fwd: tv := targetType.(*Fwd) if !coreEssentialNamesMatch(lv.Name, tv.Name) { return fmt.Errorf("names %q and %q don't match: %w", lv.Name, tv.Name, errImpossibleRelocation) } return nil default: return fmt.Errorf("type %s: %w", localType, ErrNotSupported) } } // coreEssentialNamesMatch compares two names while ignoring their flavour suffix. // // This should only be used on names which are in the global scope, like struct // names, typedefs or enum values. func coreEssentialNamesMatch(a, b string) bool { if a == "" || b == "" { // allow anonymous and named type to match return true } return newEssentialName(a) == newEssentialName(b) } /* The comment below is from __bpf_core_types_match in relo_core.c: * * Check that two types "match". This function assumes that root types were * already checked for name match. * * The matching relation is defined as follows: * - modifiers and typedefs are stripped (and, hence, effectively ignored) * - generally speaking types need to be of same kind (struct vs. struct, union * vs. union, etc.) * - exceptions are struct/union behind a pointer which could also match a * forward declaration of a struct or union, respectively, and enum vs. * enum64 (see below) * Then, depending on type: * - integers: * - match if size and signedness match * - arrays & pointers: * - target types are recursively matched * - structs & unions: * - local members need to exist in target with the same name * - for each member we recursively check match unless it is already behind a * pointer, in which case we only check matching names and compatible kind * - enums: * - local variants have to have a match in target by symbolic name (but not * numeric value) * - size has to match (but enum may match enum64 and vice versa) * - function pointers: * - number and position of arguments in local type has to match target * - for each argument and the return value we recursively check match */ func coreTypesMatch(localType Type, targetType Type, visited map[pair]struct{}) error { localType = UnderlyingType(localType) targetType = UnderlyingType(targetType) if !coreEssentialNamesMatch(localType.TypeName(), targetType.TypeName()) { return fmt.Errorf("type name %q don't match %q: %w", localType.TypeName(), targetType.TypeName(), errIncompatibleTypes) } if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { return fmt.Errorf("type mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) } if _, ok := visited[pair{localType, targetType}]; ok { return nil } if visited == nil { visited = make(map[pair]struct{}) } visited[pair{localType, targetType}] = struct{}{} switch lv := (localType).(type) { case *Void: case *Fwd: if targetType.(*Fwd).Kind != lv.Kind { return fmt.Errorf("fwd kind mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) } case *Enum: return coreEnumsMatch(lv, targetType.(*Enum)) case composite: tv := targetType.(composite) if len(lv.members()) > len(tv.members()) { return errIncompatibleTypes } localMembers := lv.members() targetMembers := map[string]Member{} for _, member := range tv.members() { targetMembers[member.Name] = member } for _, localMember := range localMembers { targetMember, found := targetMembers[localMember.Name] if !found { return fmt.Errorf("no field %q in %v: %w", localMember.Name, targetType, errIncompatibleTypes) } err := coreTypesMatch(localMember.Type, targetMember.Type, visited) if err != nil { return err } } case *Int: if !coreEncodingMatches(lv, targetType.(*Int)) { return fmt.Errorf("int mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) } case *Pointer: tv := targetType.(*Pointer) // Allow a pointer to a forward declaration to match a struct // or union. if fwd, ok := As[*Fwd](lv.Target); ok && fwd.matches(tv.Target) { return nil } if fwd, ok := As[*Fwd](tv.Target); ok && fwd.matches(lv.Target) { return nil } return coreTypesMatch(lv.Target, tv.Target, visited) case *Array: tv := targetType.(*Array) if lv.Nelems != tv.Nelems { return fmt.Errorf("array mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) } return coreTypesMatch(lv.Type, tv.Type, visited) case *FuncProto: tv := targetType.(*FuncProto) if len(lv.Params) != len(tv.Params) { return fmt.Errorf("function param mismatch: %w", errIncompatibleTypes) } for i, lparam := range lv.Params { if err := coreTypesMatch(lparam.Type, tv.Params[i].Type, visited); err != nil { return err } } return coreTypesMatch(lv.Return, tv.Return, visited) default: return fmt.Errorf("unsupported type %T", localType) } return nil } // coreEncodingMatches returns true if both ints have the same size and signedness. // All encodings other than `Signed` are considered unsigned. func coreEncodingMatches(local, target *Int) bool { return local.Size == target.Size && (local.Encoding == Signed) == (target.Encoding == Signed) } // coreEnumsMatch checks two enums match, which is considered to be the case if the following is true: // - size has to match (but enum may match enum64 and vice versa) // - local variants have to have a match in target by symbolic name (but not numeric value) func coreEnumsMatch(local *Enum, target *Enum) error { if local.Size != target.Size { return fmt.Errorf("size mismatch between %v and %v: %w", local, target, errIncompatibleTypes) } // If there are more values in the local than the target, there must be at least one value in the local // that isn't in the target, and therefor the types are incompatible. if len(local.Values) > len(target.Values) { return fmt.Errorf("local has more values than target: %w", errIncompatibleTypes) } outer: for _, lv := range local.Values { for _, rv := range target.Values { if coreEssentialNamesMatch(lv.Name, rv.Name) { continue outer } } return fmt.Errorf("no match for %v in %v: %w", lv, target, errIncompatibleTypes) } return nil } ================================================ FILE: btf/core_reloc_test.go ================================================ package btf_test import ( "bytes" "io" "os" "slices" "strings" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestCORERelocationLoad(t *testing.T) { file := testutils.NativeFile(t, "testdata/relocs-%s.elf") fh, err := os.Open(file) if err != nil { t.Fatal(err) } defer fh.Close() spec, err := ebpf.LoadCollectionSpecFromReader(fh) if err != nil { t.Fatal(err) } for _, progSpec := range spec.Programs { t.Run(progSpec.Name, func(t *testing.T) { if _, err := fh.Seek(0, io.SeekStart); err != nil { t.Fatal(err) } prog, err := ebpf.NewProgramWithOptions(progSpec, ebpf.ProgramOptions{ KernelTypes: spec.Types, }) testutils.SkipIfNotSupported(t, err) if strings.HasPrefix(progSpec.Name, "err_") { if err == nil { prog.Close() t.Fatal("Expected an error") } t.Log("Got expected error:", err) return } if err != nil { t.Fatal("Load program:", err) } defer prog.Close() ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Error when running:", err) } if ret != 0 { t.Error("Assertion failed on line", ret) } }) } } func TestCORERelocationRead(t *testing.T) { file := testutils.NativeFile(t, "testdata/relocs_read-%s.elf") spec, err := ebpf.LoadCollectionSpec(file) if err != nil { t.Fatal(err) } targetFile := testutils.NativeFile(t, "testdata/relocs_read_tgt-%s.elf") targetSpec, err := btf.LoadSpec(targetFile) if err != nil { t.Fatal(err) } tests := []struct { name string opts ebpf.ProgramOptions }{ { name: "KernelTypes", opts: ebpf.ProgramOptions{ KernelTypes: targetSpec, }, }, { name: "ExtraRelocationTargets", opts: ebpf.ProgramOptions{ ExtraRelocationTargets: []*btf.Spec{targetSpec}, }, }, } for _, progSpec := range spec.Programs { for _, test := range tests { t.Run(progSpec.Name+"_"+test.name, func(t *testing.T) { prog, err := ebpf.NewProgramWithOptions(progSpec, test.opts) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Load program:", err) } defer prog.Close() ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Error when running:", err) } if ret != 0 { t.Error("Assertion failed on line", ret) } }) } } } func TestLD64IMMReloc(t *testing.T) { testutils.SkipOnOldKernel(t, "5.4", "vmlinux BTF in sysfs") file := testutils.NativeFile(t, "testdata/relocs_enum-%s.elf") fh, err := os.Open(file) if err != nil { t.Fatal(err) } defer fh.Close() spec, err := ebpf.LoadCollectionSpecFromReader(fh) if err != nil { t.Fatal(err) } coll, err := ebpf.NewCollection(spec) testutils.SkipIfNotSupportedOnOS(t, err) if err != nil { t.Fatal(err) } defer coll.Close() } func TestCOREPoisonLineInfo(t *testing.T) { spec, err := ebpf.LoadCollectionSpec(testutils.NativeFile(t, "../testdata/errors-%s.elf")) qt.Assert(t, qt.IsNil(err)) var b btf.Builder raw, err := b.Marshal(nil, nil) qt.Assert(t, qt.IsNil(err)) empty, err := btf.LoadSpecFromReader(bytes.NewReader(raw)) qt.Assert(t, qt.IsNil(err)) for _, test := range []struct { name string }{ {"poisoned_single"}, {"poisoned_double"}, } { progSpec := spec.Programs[test.name] qt.Assert(t, qt.IsNotNil(progSpec)) t.Run(test.name, func(t *testing.T) { t.Log(progSpec.Instructions) _, err := ebpf.NewProgramWithOptions(progSpec, ebpf.ProgramOptions{ KernelTypes: empty, }) testutils.SkipIfNotSupported(t, err) var ve *ebpf.VerifierError qt.Assert(t, qt.ErrorAs(err, &ve)) found := slices.ContainsFunc(ve.Log, func(line string) bool { return strings.HasPrefix(line, "; instruction poisoned by CO-RE") }) qt.Assert(t, qt.IsTrue(found)) t.Logf("%-5v", ve) }) } } ================================================ FILE: btf/core_test.go ================================================ package btf import ( "errors" "fmt" "os" "slices" "strings" "testing" "github.com/google/go-cmp/cmp" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestCheckTypeCompatibility(t *testing.T) { tests := []struct { a, b Type compatible bool }{ {&Void{}, &Void{}, true}, {&Struct{Name: "a"}, &Struct{Name: "b"}, true}, {&Union{Name: "a"}, &Union{Name: "b"}, true}, {&Union{Name: "a"}, &Struct{Name: "b"}, false}, {&Enum{Name: "a"}, &Enum{Name: "b"}, true}, {&Fwd{Name: "a"}, &Fwd{Name: "b"}, true}, {&Int{Name: "a", Size: 2}, &Int{Name: "b", Size: 4}, true}, {&Pointer{Target: &Void{}}, &Pointer{Target: &Void{}}, true}, {&Pointer{Target: &Void{}}, &Void{}, false}, {&Array{Index: &Void{}, Type: &Void{}}, &Array{Index: &Void{}, Type: &Void{}}, true}, {&Array{Index: &Void{}, Type: &Int{}}, &Array{Index: &Void{}, Type: &Void{}}, false}, {&FuncProto{Return: &Int{}}, &FuncProto{Return: &Void{}}, false}, { &FuncProto{Return: &Void{}, Params: []FuncParam{{Name: "a", Type: &Void{}}}}, &FuncProto{Return: &Void{}, Params: []FuncParam{{Name: "b", Type: &Void{}}}}, true, }, { &FuncProto{Return: &Void{}, Params: []FuncParam{{Type: &Void{}}}}, &FuncProto{Return: &Void{}, Params: []FuncParam{{Type: &Int{}}}}, false, }, { &FuncProto{Return: &Void{}, Params: []FuncParam{{Type: &Void{}}, {Type: &Void{}}}}, &FuncProto{Return: &Void{}, Params: []FuncParam{{Type: &Void{}}}}, false, }, {&FuncProto{Return: &Typedef{Type: &Int{}}}, &FuncProto{Return: &Int{}}, true}, {&FuncProto{Return: &Typedef{Type: &Int{}}}, &FuncProto{Return: &Void{}}, false}, } for _, test := range tests { err := CheckTypeCompatibility(test.a, test.b) if test.compatible { if err != nil { t.Errorf("Expected types to be compatible: %s\na = %#v\nb = %#v", err, test.a, test.b) continue } } else { if !errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected types to be incompatible: %s\na = %#v\nb = %#v", err, test.a, test.b) continue } } err = CheckTypeCompatibility(test.b, test.a) if test.compatible { if err != nil { t.Errorf("Expected reversed types to be compatible: %s\na = %#v\nb = %#v", err, test.a, test.b) } } else { if !errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected reversed types to be incompatible: %s\na = %#v\nb = %#v", err, test.a, test.b) } } } for _, invalid := range []Type{&Var{}, &Datasec{}} { err := CheckTypeCompatibility(invalid, invalid) if errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected an error for %T, not errIncompatibleTypes", invalid) } else if err == nil { t.Errorf("Expected an error for %T", invalid) } } } func TestCOREAreMembersCompatible(t *testing.T) { tests := []struct { a, b Type compatible bool }{ {&Struct{Name: "a"}, &Struct{Name: "b"}, true}, {&Union{Name: "a"}, &Union{Name: "b"}, true}, {&Union{Name: "a"}, &Struct{Name: "b"}, true}, {&Enum{Name: "a"}, &Enum{Name: "b"}, false}, {&Enum{Name: "a"}, &Enum{Name: "a___foo"}, true}, {&Enum{Name: "a"}, &Enum{Name: ""}, true}, {&Fwd{Name: "a"}, &Fwd{Name: "b"}, false}, {&Fwd{Name: "a"}, &Fwd{Name: "a___foo"}, true}, {&Fwd{Name: "a"}, &Fwd{Name: ""}, true}, {&Int{Name: "a", Size: 2}, &Int{Name: "b", Size: 4}, true}, {&Pointer{Target: &Void{}}, &Pointer{Target: &Void{}}, true}, {&Pointer{Target: &Void{}}, &Void{}, false}, {&Array{Type: &Int{Size: 1}}, &Array{Type: &Int{Encoding: Signed}}, true}, {&Float{Size: 2}, &Float{Size: 4}, true}, } for _, test := range tests { err := coreAreMembersCompatible(test.a, test.b) if test.compatible { if err != nil { t.Errorf("Expected members to be compatible: %s\na = %#v\nb = %#v", err, test.a, test.b) continue } } else { if !errors.Is(err, errImpossibleRelocation) { t.Errorf("Expected members to be incompatible: %s\na = %#v\nb = %#v", err, test.a, test.b) continue } } err = coreAreMembersCompatible(test.b, test.a) if test.compatible { if err != nil { t.Errorf("Expected reversed members to be compatible: %s\na = %#v\nb = %#v", err, test.a, test.b) } } else { if !errors.Is(err, errImpossibleRelocation) { t.Errorf("Expected reversed members to be incompatible: %s\na = %#v\nb = %#v", err, test.a, test.b) } } } for _, invalid := range []Type{&Void{}, &FuncProto{}, &Var{}, &Datasec{}} { err := coreAreMembersCompatible(invalid, invalid) if errors.Is(err, errImpossibleRelocation) { t.Errorf("Expected an error for %T, not errImpossibleRelocation", invalid) } else if err == nil { t.Errorf("Expected an error for %T", invalid) } } } func TestCOREAccessor(t *testing.T) { for _, valid := range []string{ "0", "1:0", "1:0:3:34:10:1", } { _, err := parseCOREAccessor(valid) if err != nil { t.Errorf("Parse %q: %s", valid, err) } } for _, invalid := range []string{ "", "-1", ":", "0:", ":12", "4294967296", } { _, err := parseCOREAccessor(invalid) if err == nil { t.Errorf("Accepted invalid accessor %q", invalid) } } } func TestCOREFindEnumValue(t *testing.T) { a := &Enum{Values: []EnumValue{{"foo", 23}, {"bar", 42}}} b := &Enum{Values: []EnumValue{ {"foo___flavour", 0}, {"bar", 123}, {"garbage", 3}, }} invalid := []struct { name string local Type target Type acc coreAccessor err error }{ {"o-o-b accessor", a, b, coreAccessor{len(a.Values)}, nil}, {"long accessor", a, b, coreAccessor{0, 1}, nil}, {"wrong target", a, &Void{}, coreAccessor{0, 1}, nil}, { "no matching value", b, a, coreAccessor{2}, errImpossibleRelocation, }, } for _, test := range invalid { t.Run(test.name, func(t *testing.T) { _, _, err := coreFindEnumValue(test.local, test.acc, test.target) if test.err != nil && !errors.Is(err, test.err) { t.Fatalf("Expected %s, got %s", test.err, err) } if err == nil { t.Fatal("Accepted invalid case") } }) } valid := []struct { name string local, target Type acc coreAccessor localValue, targetValue uint64 }{ {"a to b", a, b, coreAccessor{0}, 23, 0}, {"b to a", b, a, coreAccessor{1}, 123, 42}, } for _, test := range valid { t.Run(test.name, func(t *testing.T) { local, target, err := coreFindEnumValue(test.local, test.acc, test.target) qt.Assert(t, qt.IsNil(err)) qt.Check(t, qt.Equals(local.Value, test.localValue)) qt.Check(t, qt.Equals(target.Value, test.targetValue)) }) } } func TestCOREFindField(t *testing.T) { ptr := &Pointer{} u16 := &Int{Size: 2} u32 := &Int{Size: 4} aFields := []Member{ {Name: "foo", Type: ptr, Offset: 8}, {Name: "bar", Type: u16, Offset: 16}, {Name: "baz", Type: u32, Offset: 32, BitfieldSize: 3}, {Name: "quux", Type: u32, Offset: 35, BitfieldSize: 10}, {Name: "quuz", Type: u32, Offset: 45, BitfieldSize: 8}, } bFields := []Member{ {Name: "foo", Type: ptr, Offset: 16}, {Name: "bar", Type: u32, Offset: 8}, {Name: "other", Offset: 4}, // baz is separated out from the other bitfields {Name: "baz", Type: u32, Offset: 64, BitfieldSize: 3}, // quux's type changes u32->u16 {Name: "quux", Type: u16, Offset: 96, BitfieldSize: 10}, // quuz becomes a normal field {Name: "quuz", Type: u16, Offset: 112}, } aStruct := &Struct{Members: aFields, Size: 48} bStruct := &Struct{Members: bFields, Size: 80} aArray := &Array{Nelems: 4, Type: u16} bArray := &Array{Nelems: 3, Type: u32} invalid := []struct { name string local, target Type acc coreAccessor err error }{ { "unsupported type", &Void{}, &Void{}, coreAccessor{0, 0}, ErrNotSupported, }, { "different types", &Union{}, &Array{Type: u16}, coreAccessor{0}, errImpossibleRelocation, }, { "invalid composite accessor", aStruct, aStruct, coreAccessor{0, len(aStruct.Members)}, nil, }, { "invalid array accessor", aArray, aArray, coreAccessor{0, int(aArray.Nelems)}, nil, }, { "o-o-b array accessor", aArray, bArray, coreAccessor{0, int(bArray.Nelems)}, errImpossibleRelocation, }, { "no match", bStruct, aStruct, coreAccessor{0, 2}, errImpossibleRelocation, }, { "incompatible match", &Union{Members: []Member{{Name: "foo", Type: &Pointer{}}}}, &Union{Members: []Member{{Name: "foo", Type: &Int{}}}}, coreAccessor{0, 0}, errImpossibleRelocation, }, { "unsized type", bStruct, &Func{}, // non-zero accessor to force calculating the offset. coreAccessor{1}, errImpossibleRelocation, }, } for _, test := range invalid { t.Run(test.name, func(t *testing.T) { _, _, err := coreFindField(test.local, test.acc, test.target) if test.err != nil && !errors.Is(err, test.err) { t.Fatalf("Expected %s, got %s", test.err, err) } if err == nil { t.Fatal("Accepted invalid case") } t.Log(err) }) } bytes := func(typ Type) uint32 { sz, err := Sizeof(typ) if err != nil { t.Fatal(err) } return uint32(sz) } anon := func(t Type, offset Bits) []Member { return []Member{{Type: t, Offset: offset}} } anonStruct := func(m ...Member) Member { return Member{Type: &Struct{Members: m}} } anonUnion := func(m ...Member) Member { return Member{Type: &Union{Members: m}} } valid := []struct { name string local Type target Type acc coreAccessor localField, targetField coreField }{ { "array[0]", aArray, bArray, coreAccessor{0, 0}, coreField{u16, 0, 0, 0}, coreField{u32, 0, 0, 0}, }, { "array[1]", aArray, bArray, coreAccessor{0, 1}, coreField{u16, bytes(aArray.Type), 0, 0}, coreField{u32, bytes(bArray.Type), 0, 0}, }, { "array[0] with base offset", aArray, bArray, coreAccessor{1, 0}, coreField{u16, bytes(aArray), 0, 0}, coreField{u32, bytes(bArray), 0, 0}, }, { "array[2] with base offset", aArray, bArray, coreAccessor{1, 2}, coreField{u16, bytes(aArray) + 2*bytes(aArray.Type), 0, 0}, coreField{u32, bytes(bArray) + 2*bytes(bArray.Type), 0, 0}, }, { "flex array", &Struct{Members: []Member{{Name: "foo", Type: &Array{Nelems: 0, Type: u16}}}}, &Struct{Members: []Member{{Name: "foo", Type: &Array{Nelems: 0, Type: u32}}}}, coreAccessor{0, 0, 9000}, coreField{u16, bytes(u16) * 9000, 0, 0}, coreField{u32, bytes(u32) * 9000, 0, 0}, }, { "struct.0", aStruct, bStruct, coreAccessor{0, 0}, coreField{ptr, 1, 0, 0}, coreField{ptr, 2, 0, 0}, }, { "struct.0 anon", aStruct, &Struct{Members: anon(bStruct, 24)}, coreAccessor{0, 0}, coreField{ptr, 1, 0, 0}, coreField{ptr, 3 + 2, 0, 0}, }, { "struct.0 with base offset", aStruct, bStruct, coreAccessor{3, 0}, coreField{ptr, 3*bytes(aStruct) + 1, 0, 0}, coreField{ptr, 3*bytes(bStruct) + 2, 0, 0}, }, { "struct.1", aStruct, bStruct, coreAccessor{0, 1}, coreField{u16, 2, 0, 0}, coreField{u32, 1, 0, 0}, }, { "struct.1 anon", aStruct, &Struct{Members: anon(bStruct, 24)}, coreAccessor{0, 1}, coreField{u16, 2, 0, 0}, coreField{u32, 3 + 1, 0, 0}, }, { "union.1", &Union{Members: aFields, Size: 32}, &Union{Members: bFields, Size: 32}, coreAccessor{0, 1}, coreField{u16, 2, 0, 0}, coreField{u32, 1, 0, 0}, }, { "interchangeable composites", &Struct{ Members: []Member{ anonStruct(anonUnion(Member{Name: "_1", Type: u16})), }, }, &Struct{ Members: []Member{ anonUnion(anonStruct(Member{Name: "_1", Type: u16})), }, }, coreAccessor{0, 0, 0, 0}, coreField{u16, 0, 0, 0}, coreField{u16, 0, 0, 0}, }, { "struct.2 (bitfield baz)", aStruct, bStruct, coreAccessor{0, 2}, coreField{u32, 4, 0, 3}, coreField{u32, 8, 0, 3}, }, { "struct.3 (bitfield quux)", aStruct, bStruct, coreAccessor{0, 3}, coreField{u32, 4, 3, 10}, coreField{u16, 12, 0, 10}, }, { "struct.4 (bitfield quuz)", aStruct, bStruct, coreAccessor{0, 4}, coreField{u32, 4, 13, 8}, coreField{u16, 14, 0, 0}, }, } allowCoreField := cmp.AllowUnexported(coreField{}) checkCOREField := func(t *testing.T, which string, got, want coreField) { t.Helper() if diff := cmp.Diff(want, got, allowCoreField); diff != "" { t.Errorf("%s mismatch (-want +got):\n%s", which, diff) } } for _, test := range valid { t.Run(test.name, func(t *testing.T) { localField, targetField, err := coreFindField(test.local, test.acc, test.target) qt.Assert(t, qt.IsNil(err)) checkCOREField(t, "local", localField, test.localField) checkCOREField(t, "target", targetField, test.targetField) }) } } func TestCOREFindFieldCyclical(t *testing.T) { members := []Member{{Name: "foo", Type: &Pointer{}}} cyclicStruct := &Struct{} cyclicStruct.Members = []Member{{Type: cyclicStruct}} cyclicUnion := &Union{} cyclicUnion.Members = []Member{{Type: cyclicUnion}} cyclicArray := &Array{Nelems: 1} cyclicArray.Type = &Pointer{Target: cyclicArray} tests := []struct { name string local, cyclic Type }{ {"struct", &Struct{Members: members}, cyclicStruct}, {"union", &Union{Members: members}, cyclicUnion}, {"array", &Array{Nelems: 2, Type: &Int{}}, cyclicArray}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { _, _, err := coreFindField(test.local, coreAccessor{0, 0}, test.cyclic) if !errors.Is(err, errImpossibleRelocation) { t.Fatal("Should return errImpossibleRelocation, got", err) } }) } } func TestCORERelocation(t *testing.T) { testutils.Files(t, testutils.Glob(t, "testdata/*.elf"), func(t *testing.T, file string) { rd, err := os.Open(file) if err != nil { t.Fatal(err) } defer rd.Close() spec, extInfos, err := LoadSpecAndExtInfosFromReader(rd) if err != nil { t.Fatal(err) } if extInfos == nil { t.Skip("No ext_infos") } errs := map[string]error{ "err_ambiguous": errAmbiguousRelocation, "err_ambiguous_flavour": errAmbiguousRelocation, } for section := range extInfos.Funcs { name := strings.TrimPrefix(section, "socket/") t.Run(name, func(t *testing.T) { var relos []*CORERelocation for _, reloInfo := range extInfos.CORERelos[section] { relos = append(relos, reloInfo.Relo) } fixups, err := CORERelocate(relos, []*Spec{spec}, spec.byteOrder, spec.TypeID) if want := errs[name]; want != nil { if !errors.Is(err, want) { t.Fatal("Expected", want, "got", err) } return } if err != nil { t.Fatal("Can't relocate against itself:", err) } for offset, fixup := range fixups { if want := fixup.local; !fixup.skipLocalValidation && want != fixup.target { // Since we're relocating against ourselves both values // should match. t.Errorf("offset %d: local %v doesn't match target %d (kind %s)", offset, fixup.local, fixup.target, fixup.kind) } } }) } }) } func TestCOREReloFieldSigned(t *testing.T) { for _, typ := range []Type{&Int{}, &Enum{}} { t.Run(fmt.Sprintf("%T with invalid target", typ), func(t *testing.T) { relo := &CORERelocation{ typ, coreAccessor{0}, reloFieldSigned, 0, } fixup, err := coreCalculateFixup(relo, &Void{}, internal.NativeEndian, dummyTypeID) qt.Assert(t, qt.IsTrue(fixup.poison)) qt.Assert(t, qt.IsNil(err)) }) } t.Run("type without signedness", func(t *testing.T) { relo := &CORERelocation{ &Array{}, coreAccessor{0}, reloFieldSigned, 0, } _, err := coreCalculateFixup(relo, &Array{}, internal.NativeEndian, dummyTypeID) qt.Assert(t, qt.ErrorIs(err, errNoSignedness)) }) } func TestCOREReloFieldShiftU64(t *testing.T) { typ := &Struct{ Members: []Member{ {Name: "A", Type: &Fwd{}}, }, } for _, relo := range []*CORERelocation{ {typ, coreAccessor{0, 0}, reloFieldRShiftU64, 1}, {typ, coreAccessor{0, 0}, reloFieldLShiftU64, 1}, } { t.Run(relo.kind.String(), func(t *testing.T) { _, err := coreCalculateFixup(relo, typ, internal.NativeEndian, dummyTypeID) qt.Assert(t, qt.ErrorIs(err, errUnsizedType)) }) } } func TestCORERelosKmodTypeID(t *testing.T) { a := &Int{Name: "a"} b := &Int{Name: "b"} relos := []*CORERelocation{ {&Int{}, coreAccessor{0}, reloTypeIDTarget, 0}, } typeID := func(t Type) (TypeID, error) { if t == a { return 42, nil } return 0, ErrNotFound } fixups, err := coreCalculateFixups( relos, []Type{a, b}, internal.NativeEndian, typeID, ) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsFalse(fixups[0].poison)) qt.Assert(t, qt.Equals(fixups[0].target, 42)) fixups, err = coreCalculateFixups( relos, []Type{b}, internal.NativeEndian, typeID, ) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(fixups[0].poison)) } func BenchmarkCORESkBuff(b *testing.B) { spec := vmlinuxTestdataSpec(b) var skb *Struct err := spec.TypeByName("sk_buff", &skb) qt.Assert(b, qt.IsNil(err)) skbID, err := spec.TypeID(skb) qt.Assert(b, qt.IsNil(err)) lenIndex := slices.IndexFunc(skb.Members, func(m Member) bool { return m.Name == "len" }) qt.Assert(b, qt.Not(qt.Equals(lenIndex, -1))) var pktHashTypes *Enum err = spec.TypeByName("pkt_hash_types", &pktHashTypes) qt.Assert(b, qt.IsNil(err)) pktHashTypesID, err := spec.TypeID(pktHashTypes) qt.Assert(b, qt.IsNil(err)) for _, relo := range []*CORERelocation{ {skb, coreAccessor{0, lenIndex}, reloFieldByteOffset, skbID}, {skb, coreAccessor{0, lenIndex}, reloFieldByteSize, skbID}, {skb, coreAccessor{0, lenIndex}, reloFieldExists, skbID}, {skb, coreAccessor{0, lenIndex}, reloFieldSigned, skbID}, {skb, coreAccessor{0, lenIndex}, reloFieldLShiftU64, skbID}, {skb, coreAccessor{0, lenIndex}, reloFieldRShiftU64, skbID}, {skb, coreAccessor{0}, reloTypeIDLocal, skbID}, {skb, coreAccessor{0}, reloTypeIDTarget, skbID}, {skb, coreAccessor{0}, reloTypeExists, skbID}, {skb, coreAccessor{0}, reloTypeSize, skbID}, {pktHashTypes, coreAccessor{0}, reloEnumvalExists, pktHashTypesID}, {pktHashTypes, coreAccessor{0}, reloEnumvalValue, pktHashTypesID}, } { b.Run(relo.kind.String(), func(b *testing.B) { b.ReportAllocs() for b.Loop() { _, err = CORERelocate([]*CORERelocation{relo}, []*Spec{spec}, spec.byteOrder, spec.TypeID) if err != nil { b.Fatal(err) } } }) } } func TestCORETypesMatch(t *testing.T) { tests := []struct { a, b Type match bool reversible bool }{ {&Void{}, &Void{}, true, true}, {&Int{Size: 32}, &Int{Size: 32}, true, true}, {&Int{Size: 64}, &Int{Size: 32}, false, true}, {&Int{Size: 32}, &Int{Size: 32, Encoding: Signed}, false, true}, {&Fwd{Name: "a"}, &Fwd{Name: "a"}, true, true}, {&Fwd{Name: "a"}, &Fwd{Name: "b___new"}, false, true}, {&Fwd{Name: "a"}, &Fwd{Name: "a___new"}, true, true}, {&Fwd{Name: "a"}, &Struct{Name: "a___new"}, false, true}, {&Fwd{Name: "a"}, &Union{Name: "a___new"}, false, true}, {&Fwd{Name: "a", Kind: FwdStruct}, &Fwd{Name: "a___new", Kind: FwdUnion}, false, true}, {&Pointer{&Fwd{Name: "a", Kind: FwdStruct}}, &Pointer{&Struct{Name: "a___new"}}, true, true}, {&Pointer{&Fwd{Name: "a", Kind: FwdUnion}}, &Pointer{&Union{Name: "a___new"}}, true, true}, {&Pointer{&Fwd{Name: "a", Kind: FwdStruct}}, &Pointer{&Union{Name: "a___new"}}, false, true}, {&Struct{Name: "a___new"}, &Union{Name: "a___new"}, false, true}, {&Pointer{&Struct{Name: "a"}}, &Pointer{&Union{Name: "a___new"}}, false, true}, { &Struct{Name: "a", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, &Struct{Name: "a___new", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, true, true, }, { &Struct{Name: "a", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, &Struct{Name: "a___new", Members: []Member{ {Name: "foo", Type: &Int{}}, {Name: "bar", Type: &Int{}}, }}, true, false, }, { &Struct{Name: "a", Members: []Member{ {Name: "foo", Type: &Int{}}, {Name: "bar", Type: &Int{}}, }}, &Struct{Name: "a___new", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, false, false, }, { &Struct{Name: "a", Members: []Member{ {Name: "bar", Type: &Int{}}, }}, &Struct{Name: "a___new", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, false, false, }, { &Struct{Name: "a", Members: []Member{ {Name: "foo", Type: &Int{Encoding: Signed}}, }}, &Struct{Name: "a___new", Members: []Member{ {Name: "foo", Type: &Int{}}, }}, false, false, }, { &Enum{Name: "a", Values: []EnumValue{ {"foo", 1}, }}, &Enum{Name: "a___new", Values: []EnumValue{ {"foo", 1}, }}, true, true, }, { &Enum{Name: "a", Values: []EnumValue{ {"foo", 1}, }}, &Enum{Name: "a___new", Values: []EnumValue{ {"foo", 1}, {"bar", 2}, }}, true, false, }, { &Enum{Name: "a", Values: []EnumValue{ {"foo", 1}, {"bar", 2}, }}, &Enum{Name: "a___new", Values: []EnumValue{ {"foo", 1}, }}, false, false, }, { &Enum{Name: "a", Values: []EnumValue{ {"foo", 1}, }}, &Enum{Name: "a___new", Values: []EnumValue{ {"bar", 1}, }}, false, false, }, { &Enum{Name: "a", Values: []EnumValue{ {"foo", 1}, }, Size: 1}, &Enum{Name: "a___new", Values: []EnumValue{ {"foo", 1}, }, Size: 2}, false, false, }, { &Array{Type: &Int{}, Nelems: 2}, &Array{Type: &Int{}, Nelems: 2}, true, true, }, { &Array{Type: &Int{}, Nelems: 3}, &Array{Type: &Int{}, Nelems: 2}, false, true, }, { &Array{Type: &Void{}, Nelems: 2}, &Array{Type: &Int{}, Nelems: 2}, false, true, }, { &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "foo", Type: &Int{}}, }}, &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "bar", Type: &Int{}}, }}, true, true, }, { &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "foo", Type: &Int{}}, }}, &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "bar", Type: &Int{}}, {Name: "baz", Type: &Int{}}, }}, false, true, }, { &FuncProto{Return: &Void{}, Params: []FuncParam{ {Name: "foo", Type: &Int{}}, }}, &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "bar", Type: &Int{}}, }}, false, true, }, { &FuncProto{Return: &Void{}, Params: []FuncParam{ {Name: "bar", Type: &Int{Encoding: Signed}}, }}, &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "bar", Type: &Int{}}, }}, false, true, }, } for _, test := range tests { err := coreTypesMatch(test.a, test.b, nil) if test.match { if err != nil { t.Errorf("Expected types to match: %s\na = %#v\nb = %#v", err, test.a, test.b) continue } } else { if !errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected types to be incompatible: \na = %#v\nb = %#v", test.a, test.b) continue } } if test.reversible { err = coreTypesMatch(test.b, test.a, nil) if test.match { if err != nil { t.Errorf("Expected reversed types to match: %s\na = %#v\nb = %#v", err, test.a, test.b) } } else { if !errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected reversed types to be incompatible: %s\na = %#v\nb = %#v", err, test.a, test.b) } } } } for _, invalid := range []Type{&Var{}, &Datasec{}} { err := coreTypesMatch(invalid, invalid, nil) if errors.Is(err, errIncompatibleTypes) { t.Errorf("Expected an error for %T, not errIncompatibleTypes", invalid) } else if err == nil { t.Errorf("Expected an error for %T", invalid) } } } // dummyTypeID returns 0, nil for any passed type. func dummyTypeID(Type) (TypeID, error) { return 0, nil } ================================================ FILE: btf/dedup.go ================================================ package btf import ( "errors" "fmt" "hash/maphash" "slices" ) // deduper deduplicates BTF types by finding all types in a Type graph that are // Equivalent and replaces them with a single instance. // // See doc comments in types.go to understand the various ways in which Types // can relate to each other and how they are compared for equality. We separate // Identity (same memory location), Equivalence (same shape/layout), and // Compatibility (CO-RE compatible) to be explicit about intent. // // This deduper opportunistically uses a combination of Identity and Equivalence // to find types that can be deduplicated. type deduper struct { visited map[Type]struct{} hashCache map[hashCacheKey]uint64 // Set of types that have been deduplicated. done map[Type]Type // Map of hash to types with that hash. hashed map[uint64][]Type eqCache map[typKey]bool seed maphash.Seed } func newDeduper() *deduper { return &deduper{ make(map[Type]struct{}), make(map[hashCacheKey]uint64), make(map[Type]Type), make(map[uint64][]Type), make(map[typKey]bool), maphash.MakeSeed(), } } func (d *deduper) deduplicate(t Type) (Type, error) { // If we have already attempted to deduplicate this exact type, return the // result. if done, ok := d.done[t]; ok { return done, nil } // Visit the subtree, if a type has children, attempt to replace it with a // deduplicated version of those children. for t := range postorder(t, d.visited) { for c := range children(t) { var err error *c, err = d.hashInsert(*c) if err != nil { return nil, err } } } // Finally, deduplicate the root type itself. return d.hashInsert(t) } // hashInsert attempts to deduplicate t by hashing it and comparing against // other types with the same hash. Returns the Type to be used as the common // substitute at this position in the graph. func (d *deduper) hashInsert(t Type) (Type, error) { // If we have deduplicated this type before, return the result of that // deduplication. if done, ok := d.done[t]; ok { return done, nil } // Compute the hash of this type. Types with the same hash are candidates for // deduplication. hash, err := d.hash(t, -1) if err != nil { return nil, err } // A hash collision is possible, so we need to compare against all candidates // with the same hash. for _, candidate := range d.hashed[hash] { // Pre-size the visited slice, experimentation on VMLinux shows a capacity // of 16 to give the best performance. const visitedCapacity = 16 err := d.typesEquivalent(candidate, t, make([]Type, 0, visitedCapacity)) if errors.Is(err, errNotEquivalent) { continue } if err != nil { return nil, err } // Found a Type that's both Equivalent and hashes to the same value, choose // it as the deduplicated version. d.done[t] = candidate return candidate, nil } d.hashed[hash] = append(d.hashed[hash], t) return t, nil } // The hash of a Type is the same given its pointer and depth budget. type hashCacheKey struct { t Type depthBudget int } // hash computes a hash for t. The produced hash is the same for Types which // are similar. The hash can collide such that two different Types may produce // the same hash, so equivalence must be checked explicitly. It will recurse // into children. The initial call should use a depthBudget of -1. func (d *deduper) hash(t Type, depthBudget int) (uint64, error) { if depthBudget == 0 { return 0, nil } h := &maphash.Hash{} h.SetSeed(d.seed) switch t := t.(type) { case *Void: maphash.WriteComparable(h, kindUnknown) case *Int: maphash.WriteComparable(h, kindInt) maphash.WriteComparable(h, *t) case *Pointer: maphash.WriteComparable(h, kindPointer) // If the depth budget is positive, decrement it every time we follow a // pointer. if depthBudget > 0 { depthBudget-- } // If this is the first time we are following a pointer, set the depth // budget. This limits amount of recursion we do when hashing pointers that // form cycles. This is cheaper than tracking visited types and works // because hash collisions are allowed. if depthBudget < 0 { depthBudget = 1 // Double pointers are common in C. However, with a depth budget of 1, all // double pointers would hash the same, causing a performance issue when // checking equivalence. So we give double pointers a bit more budget. if _, ok := t.Target.(*Pointer); ok { depthBudget = 2 } } sub, err := d.hash(t.Target, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Array: maphash.WriteComparable(h, kindArray) maphash.WriteComparable(h, t.Nelems) sub, err := d.hash(t.Index, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) _, err = d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Struct, *Union: // Check the cache to avoid recomputing the hash for this type and depth // budget. key := hashCacheKey{t, depthBudget} if cached, ok := d.hashCache[key]; ok { return cached, nil } var members []Member switch t := t.(type) { case *Struct: maphash.WriteComparable(h, kindStruct) maphash.WriteComparable(h, t.Name) maphash.WriteComparable(h, t.Size) members = t.Members case *Union: maphash.WriteComparable(h, kindUnion) maphash.WriteComparable(h, t.Name) maphash.WriteComparable(h, t.Size) members = t.Members } maphash.WriteComparable(h, len(members)) for _, m := range members { maphash.WriteComparable(h, m.Name) maphash.WriteComparable(h, m.Offset) sub, err := d.hash(m.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) } sum := h.Sum64() d.hashCache[key] = sum return sum, nil case *Enum: maphash.WriteComparable(h, kindEnum) maphash.WriteComparable(h, t.Name) maphash.WriteComparable(h, t.Size) maphash.WriteComparable(h, t.Signed) for _, v := range t.Values { maphash.WriteComparable(h, v) } case *Fwd: maphash.WriteComparable(h, kindForward) maphash.WriteComparable(h, *t) case *Typedef: maphash.WriteComparable(h, kindTypedef) maphash.WriteComparable(h, t.Name) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Volatile: maphash.WriteComparable(h, kindVolatile) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Const: maphash.WriteComparable(h, kindConst) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Restrict: maphash.WriteComparable(h, kindRestrict) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Func: maphash.WriteComparable(h, kindFunc) maphash.WriteComparable(h, t.Name) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *FuncProto: // It turns out that pointers to function prototypes are common in C code, // function pointers. Function prototypes frequently have similar patterns // of [ptr, ptr] -> int, or [ptr, ptr, ptr] -> int. Causing frequent hash // collisions, for the default depth budget of 1. So allow one additional // level of pointers when we encounter a function prototype. if depthBudget >= 0 { depthBudget++ } maphash.WriteComparable(h, kindFuncProto) for _, p := range t.Params { maphash.WriteComparable(h, p.Name) sub, err := d.hash(p.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) } sub, err := d.hash(t.Return, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Var: maphash.WriteComparable(h, kindVar) maphash.WriteComparable(h, t.Name) maphash.WriteComparable(h, t.Linkage) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Datasec: maphash.WriteComparable(h, kindDatasec) maphash.WriteComparable(h, t.Name) for _, v := range t.Vars { maphash.WriteComparable(h, v.Offset) maphash.WriteComparable(h, v.Size) sub, err := d.hash(v.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) } case *declTag: maphash.WriteComparable(h, kindDeclTag) maphash.WriteComparable(h, t.Value) maphash.WriteComparable(h, t.Index) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *TypeTag: maphash.WriteComparable(h, kindTypeTag) maphash.WriteComparable(h, t.Value) sub, err := d.hash(t.Type, depthBudget) if err != nil { return 0, err } maphash.WriteComparable(h, sub) case *Float: maphash.WriteComparable(h, kindFloat) maphash.WriteComparable(h, *t) default: return 0, fmt.Errorf("unsupported type for hashing: %T", t) } return h.Sum64(), nil } type typKey struct { a Type b Type } var errNotEquivalent = errors.New("types are not equivalent") // typesEquivalent checks if two types are Equivalent. func (d *deduper) typesEquivalent(ta, tb Type, visited []Type) error { // Fast path: if Types are Identical, they are also Equivalent. if ta == tb { return nil } switch a := ta.(type) { case *Void: if _, ok := tb.(*Void); ok { return nil } return errNotEquivalent case *Int: b, ok := tb.(*Int) if !ok { return errNotEquivalent } if a.Name != b.Name || a.Size != b.Size || a.Encoding != b.Encoding { return errNotEquivalent } return nil case *Enum: b, ok := tb.(*Enum) if !ok { return errNotEquivalent } if a.Name != b.Name || len(a.Values) != len(b.Values) { return errNotEquivalent } for i := range a.Values { if a.Values[i].Name != b.Values[i].Name || a.Values[i].Value != b.Values[i].Value { return errNotEquivalent } } return nil case *Fwd: b, ok := tb.(*Fwd) if !ok { return errNotEquivalent } if a.Name != b.Name || a.Kind != b.Kind { return errNotEquivalent } return nil case *Float: b, ok := tb.(*Float) if !ok { return errNotEquivalent } if a.Name != b.Name || a.Size != b.Size { return errNotEquivalent } return nil case *Array: b, ok := tb.(*Array) if !ok { return errNotEquivalent } if a.Nelems != b.Nelems { return errNotEquivalent } if err := d.typesEquivalent(a.Index, b.Index, visited); err != nil { return err } if err := d.typesEquivalent(a.Type, b.Type, visited); err != nil { return err } return nil case *Pointer: b, ok := tb.(*Pointer) if !ok { return errNotEquivalent } // Detect cycles by tracking visited types. Assume types are Equivalent if // we have already visited this type in the current Equivalence check. if slices.Contains(visited, ta) { return nil } visited = append(visited, ta) return d.typesEquivalent(a.Target, b.Target, visited) case *Struct, *Union: // Use a cache to avoid recomputation. We only do this for composite types // since they are where types fan out the most. For other types, the // overhead of the lookup and update outweighs performance benefits. cacheKey := typKey{a: ta, b: tb} if equal, ok := d.eqCache[cacheKey]; ok { if equal { return nil } return errNotEquivalent } compErr := d.compositeEquivalent(ta, tb, visited) d.eqCache[cacheKey] = compErr == nil return compErr case *Typedef: b, ok := tb.(*Typedef) if !ok { return errNotEquivalent } if a.Name != b.Name { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *Volatile: b, ok := tb.(*Volatile) if !ok { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *Const: b, ok := tb.(*Const) if !ok { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *Restrict: b, ok := tb.(*Restrict) if !ok { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *Func: b, ok := tb.(*Func) if !ok { return errNotEquivalent } if a.Name != b.Name { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *FuncProto: b, ok := tb.(*FuncProto) if !ok { return errNotEquivalent } if err := d.typesEquivalent(a.Return, b.Return, visited); err != nil { return err } if len(a.Params) != len(b.Params) { return errNotEquivalent } for i := range a.Params { if a.Params[i].Name != b.Params[i].Name { return errNotEquivalent } if err := d.typesEquivalent(a.Params[i].Type, b.Params[i].Type, visited); err != nil { return err } } return nil case *Var: b, ok := tb.(*Var) if !ok { return errNotEquivalent } if a.Name != b.Name { return errNotEquivalent } if err := d.typesEquivalent(a.Type, b.Type, visited); err != nil { return err } if a.Linkage != b.Linkage { return errNotEquivalent } return nil case *Datasec: b, ok := tb.(*Datasec) if !ok { return errNotEquivalent } if a.Name != b.Name || len(a.Vars) != len(b.Vars) { return errNotEquivalent } for i := range a.Vars { if a.Vars[i].Offset != b.Vars[i].Offset || a.Vars[i].Size != b.Vars[i].Size { return errNotEquivalent } if err := d.typesEquivalent(a.Vars[i].Type, b.Vars[i].Type, visited); err != nil { return err } } return nil case *declTag: b, ok := tb.(*declTag) if !ok { return errNotEquivalent } if a.Value != b.Value || a.Index != b.Index { return errNotEquivalent } return d.typesEquivalent(a.Type, b.Type, visited) case *TypeTag: b, ok := tb.(*TypeTag) if !ok { return errNotEquivalent } if a.Value != b.Value { return errNotEquivalent } if err := d.typesEquivalent(a.Type, b.Type, visited); err != nil { return err } return nil default: return fmt.Errorf("unsupported type for equivalence: %T", a) } } // compositeEquivalent checks if two composite types (Struct or Union) are // Equivalent. func (d *deduper) compositeEquivalent(at, bt Type, visited []Type) error { var ma, mb []Member switch a := at.(type) { case *Struct: b, ok := bt.(*Struct) if !ok { return errNotEquivalent } if a.Name != b.Name || a.Size != b.Size || len(a.Members) != len(b.Members) { return errNotEquivalent } ma = a.Members mb = b.Members case *Union: b, ok := bt.(*Union) if !ok { return errNotEquivalent } if a.Name != b.Name || a.Size != b.Size || len(a.Members) != len(b.Members) { return errNotEquivalent } ma = a.Members mb = b.Members } for i := range ma { if ma[i].Name != mb[i].Name || ma[i].Offset != mb[i].Offset { return errNotEquivalent } if err := d.typesEquivalent(ma[i].Type, mb[i].Type, visited); err != nil { return err } } return nil } ================================================ FILE: btf/dedup_test.go ================================================ package btf import ( "testing" "github.com/go-quicktest/qt" ) func countTypes(typs ...Type) int { i := 0 visited := make(map[Type]struct{}) for _, typ := range typs { for range postorder(typ, visited) { i++ } } return i } func TestDedupSKBuff(t *testing.T) { vmlinux := vmlinuxTestdataBytes(t) spec, err := loadRawSpec(vmlinux, nil) qt.Assert(t, qt.IsNil(err)) var skBuffOne *Struct qt.Assert(t, qt.IsNil(spec.TypeByName("sk_buff", &skBuffOne))) skbCount := countTypes(skBuffOne) spec = spec.Copy() var skBuffTwo *Struct qt.Assert(t, qt.IsNil(spec.TypeByName("sk_buff", &skBuffTwo))) deduper := newDeduper() types := []Type{skBuffOne, skBuffTwo} for i, typ := range types { types[i], err = deduper.deduplicate(typ) } qt.Assert(t, qt.IsNil(err)) dedupedCount := countTypes(types...) qt.Assert(t, qt.Equals(skbCount, dedupedCount), qt.Commentf("Expected deduplicated sk_buff to have same number of types as original")) } func TestDedupVmlinux(t *testing.T) { vmlinux := vmlinuxTestdataBytes(t) spec1, err := loadRawSpec(vmlinux, nil) qt.Assert(t, qt.IsNil(err)) spec2 := spec1.Copy() rootTypes := func(spec *Spec) []Type { refs := make(map[Type]int) for t := range spec.All() { refs[t] = 0 } for t := range spec.All() { for child := range children(t) { refs[*child]++ } } types := make([]Type, 0) for typ := range refs { if refs[typ] == 0 { types = append(types, typ) } } return types } spec1Roots := rootTypes(spec1) spec1TypeCount := countTypes(spec1Roots...) spec2Roots := rootTypes(spec2) types := append(spec1Roots, spec2Roots...) deduper := newDeduper() for i, typ := range types { types[i], err = deduper.deduplicate(typ) qt.Assert(t, qt.IsNil(err)) } qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(countTypes(types...), spec1TypeCount), qt.Commentf("Expected deduplicated vmlinux to have same number of types as original")) } func BenchmarkDeduplicateSKBuff(b *testing.B) { b.ReportAllocs() vmlinux := vmlinuxTestdataBytes(b) base, err := loadRawSpec(vmlinux, nil) qt.Assert(b, qt.IsNil(err)) // Obtain b.N unique copies of sk_buff. types := make([]Type, 0, b.N) for range b.N { var skb *Struct if err := base.Copy().TypeByName("sk_buff", &skb); err != nil { b.Fatal(err) } types = append(types, skb) } dedup := newDeduper() b.ResetTimer() for i := range b.N { if _, err := dedup.deduplicate(types[i]); err != nil { b.Fatal(err) } } } func BenchmarkDeduplicateVMLinux(b *testing.B) { b.ReportAllocs() vmlinux := vmlinuxTestdataBytes(b) base, err := loadRawSpec(vmlinux, nil) qt.Assert(b, qt.IsNil(err)) var types [][]Type for range b.N { var specTypes []Type for typ := range base.Copy().All() { specTypes = append(specTypes, typ) } types = append(types, specTypes) } dedup := newDeduper() b.ResetTimer() for i := range b.N { for _, typ := range types[i] { if _, err := dedup.deduplicate(typ); err != nil { b.Fatal(err) } } } } ================================================ FILE: btf/doc.go ================================================ // Package btf handles data encoded according to the BPF Type Format. // // The canonical documentation lives in the Linux kernel repository and is // available at https://www.kernel.org/doc/html/latest/bpf/btf.html package btf ================================================ FILE: btf/ext_info.go ================================================ package btf import ( "bytes" "encoding/binary" "errors" "fmt" "io" "math" "sort" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" ) // ExtInfos contains raw, per-section extended BTF metadata from the .BTF.ext // ELF section. type ExtInfos struct { Funcs map[string]FuncOffsets Lines map[string]LineOffsets CORERelos map[string]CORERelocationOffsets } // Section returns the FuncOffsets, LineOffsets and CORERelocationOffsets for // the given section name. Returns all nils if ExtInfos is nil, or individual // nils if there is no metadata of that type for the section. func (ei *ExtInfos) Section(name string) (FuncOffsets, LineOffsets, CORERelocationOffsets) { if ei == nil { return nil, nil, nil } return ei.Funcs[name], ei.Lines[name], ei.CORERelos[name] } // loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF. // // Returns an error wrapping ErrNotFound if no ext infos are present. func loadExtInfosFromELF(file *internal.SafeELFFile, spec *Spec) (*ExtInfos, error) { section := file.Section(".BTF.ext") if section == nil { return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound) } if section.ReaderAt == nil { return nil, fmt.Errorf("compressed ext_info is not supported") } return loadExtInfos(section.ReaderAt, file.ByteOrder, spec) } // loadExtInfos parses bare ext infos. func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec) (*ExtInfos, error) { // Open unbuffered section reader. binary.Read() calls io.ReadFull on // the header structs, resulting in one syscall per header. headerRd := io.NewSectionReader(r, 0, math.MaxInt64) extHeader, err := parseBTFExtHeader(headerRd, bo) if err != nil { return nil, fmt.Errorf("parsing BTF extension header: %w", err) } coreHeader, err := parseBTFExtCOREHeader(headerRd, bo, extHeader) if err != nil { return nil, fmt.Errorf("parsing BTF CO-RE header: %w", err) } buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen)) btfFuncInfos, err := parseFuncInfos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing BTF function info: %w", err) } funcInfos := make(map[string]FuncOffsets, len(btfFuncInfos)) for section, bfis := range btfFuncInfos { funcInfos[section], err = newFuncOffsets(bfis, spec) if err != nil { return nil, fmt.Errorf("section %s: func infos: %w", section, err) } } buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen)) btfLineInfos, err := parseLineInfos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing BTF line info: %w", err) } lineInfos := make(map[string]LineOffsets, len(btfLineInfos)) for section, blis := range btfLineInfos { lineInfos[section], err = newLineInfos(blis, spec.strings) if err != nil { return nil, fmt.Errorf("section %s: line infos: %w", section, err) } } if coreHeader == nil || coreHeader.COREReloLen == 0 { return &ExtInfos{funcInfos, lineInfos, nil}, nil } var btfCORERelos map[string][]bpfCORERelo buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen)) btfCORERelos, err = parseCORERelos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err) } coreRelos := make(map[string]CORERelocationOffsets, len(btfCORERelos)) for section, brs := range btfCORERelos { coreRelos[section], err = newRelocationInfos(brs, spec, spec.strings) if err != nil { return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err) } } return &ExtInfos{funcInfos, lineInfos, coreRelos}, nil } // MarshalExtInfos encodes function and line info embedded in insns into kernel // wire format. // // If an instruction has an [asm.Comment], it will be synthesized into a mostly // empty line info. func MarshalExtInfos(insns asm.Instructions, b *Builder) (funcInfos, lineInfos []byte, _ error) { iter := insns.Iterate() for iter.Next() { if iter.Ins.Source() != nil || FuncMetadata(iter.Ins) != nil { goto marshal } } return nil, nil, nil marshal: var fiBuf, liBuf bytes.Buffer for { if fn := FuncMetadata(iter.Ins); fn != nil { fi := &FuncOffset{ Func: fn, Offset: iter.Offset, } if err := fi.marshal(&fiBuf, b); err != nil { return nil, nil, fmt.Errorf("write func info: %w", err) } } if source := iter.Ins.Source(); source != nil { var line *Line if l, ok := source.(*Line); ok { line = l } else { line = &Line{ line: source.String(), } } li := &LineOffset{ Offset: iter.Offset, Line: line, } if err := li.marshal(&liBuf, b); err != nil { return nil, nil, fmt.Errorf("write line info: %w", err) } } if !iter.Next() { break } } return fiBuf.Bytes(), liBuf.Bytes(), nil } // btfExtHeader is found at the start of the .BTF.ext section. type btfExtHeader struct { Magic uint16 Version uint8 Flags uint8 // HdrLen is larger than the size of struct btfExtHeader when it is // immediately followed by a btfExtCOREHeader. HdrLen uint32 FuncInfoOff uint32 FuncInfoLen uint32 LineInfoOff uint32 LineInfoLen uint32 } // parseBTFExtHeader parses the header of the .BTF.ext section. func parseBTFExtHeader(r io.Reader, bo binary.ByteOrder) (*btfExtHeader, error) { var header btfExtHeader if err := binary.Read(r, bo, &header); err != nil { return nil, fmt.Errorf("can't read header: %v", err) } if header.Magic != btfMagic { return nil, fmt.Errorf("incorrect magic value %v", header.Magic) } if header.Version != 1 { return nil, fmt.Errorf("unexpected version %v", header.Version) } if header.Flags != 0 { return nil, fmt.Errorf("unsupported flags %v", header.Flags) } if int64(header.HdrLen) < int64(binary.Size(&header)) { return nil, fmt.Errorf("header length shorter than btfExtHeader size") } return &header, nil } // funcInfoStart returns the offset from the beginning of the .BTF.ext section // to the start of its func_info entries. func (h *btfExtHeader) funcInfoStart() int64 { return int64(h.HdrLen + h.FuncInfoOff) } // lineInfoStart returns the offset from the beginning of the .BTF.ext section // to the start of its line_info entries. func (h *btfExtHeader) lineInfoStart() int64 { return int64(h.HdrLen + h.LineInfoOff) } // coreReloStart returns the offset from the beginning of the .BTF.ext section // to the start of its CO-RE relocation entries. func (h *btfExtHeader) coreReloStart(ch *btfExtCOREHeader) int64 { return int64(h.HdrLen + ch.COREReloOff) } // btfExtCOREHeader is found right after the btfExtHeader when its HdrLen // field is larger than its size. type btfExtCOREHeader struct { COREReloOff uint32 COREReloLen uint32 } // parseBTFExtCOREHeader parses the tail of the .BTF.ext header. If additional // header bytes are present, extHeader.HdrLen will be larger than the struct, // indicating the presence of a CO-RE extension header. func parseBTFExtCOREHeader(r io.Reader, bo binary.ByteOrder, extHeader *btfExtHeader) (*btfExtCOREHeader, error) { extHdrSize := int64(binary.Size(&extHeader)) remainder := int64(extHeader.HdrLen) - extHdrSize if remainder == 0 { return nil, nil } var coreHeader btfExtCOREHeader if err := binary.Read(r, bo, &coreHeader); err != nil { return nil, fmt.Errorf("can't read header: %v", err) } return &coreHeader, nil } type btfExtInfoSec struct { SecNameOff uint32 NumInfo uint32 } // parseExtInfoSec parses a btf_ext_info_sec header within .BTF.ext, // appearing within func_info and line_info sub-sections. // These headers appear once for each program section in the ELF and are // followed by one or more func/line_info records for the section. func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings *stringTable) (string, *btfExtInfoSec, error) { var infoHeader btfExtInfoSec if err := binary.Read(r, bo, &infoHeader); err != nil { return "", nil, fmt.Errorf("read ext info header: %w", err) } secName, err := strings.Lookup(infoHeader.SecNameOff) if err != nil { return "", nil, fmt.Errorf("get section name: %w", err) } if secName == "" { return "", nil, fmt.Errorf("extinfo header refers to empty section name") } if infoHeader.NumInfo == 0 { return "", nil, fmt.Errorf("section %s has zero records", secName) } return secName, &infoHeader, nil } // parseExtInfoRecordSize parses the uint32 at the beginning of a func_infos // or line_infos segment that describes the length of all extInfoRecords in // that segment. func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) { const maxRecordSize = 256 var recordSize uint32 if err := binary.Read(r, bo, &recordSize); err != nil { return 0, fmt.Errorf("can't read record size: %v", err) } if recordSize < 4 { // Need at least InsnOff worth of bytes per record. return 0, errors.New("record size too short") } if recordSize > maxRecordSize { return 0, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize) } return recordSize, nil } // FuncOffsets is a slice of FuncOffsets sorted by offset. type FuncOffsets = []FuncOffset // The size of a FuncInfo in BTF wire format. var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{})) // FuncOffset represents a [btf.Func] and its raw instruction offset within a // BPF program. type FuncOffset struct { Offset asm.RawInstructionOffset Func *Func } type bpfFuncInfo struct { // Instruction offset of the function within an ELF section. InsnOff uint32 TypeID TypeID } func newFuncOffset(fi bpfFuncInfo, spec *Spec) (*FuncOffset, error) { typ, err := spec.TypeByID(fi.TypeID) if err != nil { return nil, err } fn, ok := typ.(*Func) if !ok { return nil, fmt.Errorf("type ID %d is a %T, but expected a Func", fi.TypeID, typ) } // C doesn't have anonymous functions, but check just in case. if fn.Name == "" { return nil, fmt.Errorf("func with type ID %d doesn't have a name", fi.TypeID) } return &FuncOffset{ asm.RawInstructionOffset(fi.InsnOff), fn, }, nil } func newFuncOffsets(bfis []bpfFuncInfo, spec *Spec) (FuncOffsets, error) { fos := make(FuncOffsets, 0, len(bfis)) for _, bfi := range bfis { fi, err := newFuncOffset(bfi, spec) if err != nil { return FuncOffsets{}, fmt.Errorf("offset %d: %w", bfi.InsnOff, err) } fos = append(fos, *fi) } sort.Slice(fos, func(i, j int) bool { return fos[i].Offset <= fos[j].Offset }) return fos, nil } // LoadFuncInfos parses BTF func info from kernel wire format into a // [FuncOffsets], a sorted slice of [btf.Func]s of (sub)programs within a BPF // program with their corresponding raw instruction offsets. func LoadFuncInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (FuncOffsets, error) { fis, err := parseFuncInfoRecords( reader, bo, FuncInfoSize, recordNum, false, ) if err != nil { return FuncOffsets{}, fmt.Errorf("parsing BTF func info: %w", err) } return newFuncOffsets(fis, spec) } // marshal into the BTF wire format. func (fi *FuncOffset) marshal(w *bytes.Buffer, b *Builder) error { id, err := b.Add(fi.Func) if err != nil { return err } bfi := bpfFuncInfo{ InsnOff: uint32(fi.Offset), TypeID: id, } buf := make([]byte, FuncInfoSize) internal.NativeEndian.PutUint32(buf, bfi.InsnOff) internal.NativeEndian.PutUint32(buf[4:], uint32(bfi.TypeID)) _, err = w.Write(buf) return err } // parseFuncInfos parses a func_info sub-section within .BTF.ext ito a map of // func infos indexed by section name. func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) { recordSize, err := parseExtInfoRecordSize(r, bo) if err != nil { return nil, err } result := make(map[string][]bpfFuncInfo) for { secName, infoHeader, err := parseExtInfoSec(r, bo, strings) if errors.Is(err, io.EOF) { return result, nil } if err != nil { return nil, err } records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true) if err != nil { return nil, fmt.Errorf("section %v: %w", secName, err) } result[secName] = records } } // parseFuncInfoRecords parses a stream of func_infos into a funcInfos. // These records appear after a btf_ext_info_sec header in the func_info // sub-section of .BTF.ext. func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfFuncInfo, error) { var out []bpfFuncInfo var fi bpfFuncInfo if exp, got := FuncInfoSize, recordSize; exp != got { // BTF blob's record size is longer than we know how to parse. return nil, fmt.Errorf("expected FuncInfo record size %d, but BTF blob contains %d", exp, got) } for i := uint32(0); i < recordNum; i++ { if err := binary.Read(r, bo, &fi); err != nil { return nil, fmt.Errorf("can't read function info: %v", err) } if offsetInBytes { if fi.InsnOff%asm.InstructionSize != 0 { return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff) } // ELF tracks offset in bytes, the kernel expects raw BPF instructions. // Convert as early as possible. fi.InsnOff /= asm.InstructionSize } out = append(out, fi) } return out, nil } var LineInfoSize = uint32(binary.Size(bpfLineInfo{})) // Line represents the location and contents of a single line of source // code a BPF ELF was compiled from. type Line struct { fileName string line string lineNumber uint32 lineColumn uint32 } func (li *Line) FileName() string { return li.fileName } func (li *Line) Line() string { return li.line } func (li *Line) LineNumber() uint32 { return li.lineNumber } func (li *Line) LineColumn() uint32 { return li.lineColumn } func (li *Line) String() string { return li.line } // LineOffsets is a slice of LineOffsets sorted by offset. type LineOffsets = []LineOffset // LineOffset represents a line info and its raw instruction offset. type LineOffset struct { Offset asm.RawInstructionOffset Line *Line } // Constants for the format of bpfLineInfo.LineCol. const ( bpfLineShift = 10 bpfLineMax = (1 << (32 - bpfLineShift)) - 1 bpfColumnMax = (1 << bpfLineShift) - 1 ) type bpfLineInfo struct { // Instruction offset of the line within the whole instruction stream, in instructions. InsnOff uint32 FileNameOff uint32 LineOff uint32 LineCol uint32 } // LoadLineInfos parses BTF line info in kernel wire format. func LoadLineInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (LineOffsets, error) { lis, err := parseLineInfoRecords( reader, bo, LineInfoSize, recordNum, false, ) if err != nil { return LineOffsets{}, fmt.Errorf("parsing BTF line info: %w", err) } return newLineInfos(lis, spec.strings) } func newLineInfo(li bpfLineInfo, strings *stringTable) (LineOffset, error) { line, err := strings.LookupCached(li.LineOff) if err != nil { return LineOffset{}, fmt.Errorf("lookup of line: %w", err) } fileName, err := strings.LookupCached(li.FileNameOff) if err != nil { return LineOffset{}, fmt.Errorf("lookup of filename: %w", err) } lineNumber := li.LineCol >> bpfLineShift lineColumn := li.LineCol & bpfColumnMax return LineOffset{ asm.RawInstructionOffset(li.InsnOff), &Line{ fileName, line, lineNumber, lineColumn, }, }, nil } func newLineInfos(blis []bpfLineInfo, strings *stringTable) (LineOffsets, error) { lis := make([]LineOffset, 0, len(blis)) for _, bli := range blis { li, err := newLineInfo(bli, strings) if err != nil { return LineOffsets{}, fmt.Errorf("offset %d: %w", bli.InsnOff, err) } lis = append(lis, li) } sort.Slice(lis, func(i, j int) bool { return lis[i].Offset <= lis[j].Offset }) return lis, nil } // marshal writes the binary representation of the LineInfo to w. func (li *LineOffset) marshal(w *bytes.Buffer, b *Builder) error { line := li.Line if line.lineNumber > bpfLineMax { return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax) } if line.lineColumn > bpfColumnMax { return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax) } fileNameOff, err := b.addString(line.fileName) if err != nil { return fmt.Errorf("file name %q: %w", line.fileName, err) } lineOff, err := b.addString(line.line) if err != nil { return fmt.Errorf("line %q: %w", line.line, err) } bli := bpfLineInfo{ uint32(li.Offset), fileNameOff, lineOff, (line.lineNumber << bpfLineShift) | line.lineColumn, } buf := make([]byte, LineInfoSize) internal.NativeEndian.PutUint32(buf, bli.InsnOff) internal.NativeEndian.PutUint32(buf[4:], bli.FileNameOff) internal.NativeEndian.PutUint32(buf[8:], bli.LineOff) internal.NativeEndian.PutUint32(buf[12:], bli.LineCol) _, err = w.Write(buf) return err } // parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of // line infos indexed by section name. func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfLineInfo, error) { recordSize, err := parseExtInfoRecordSize(r, bo) if err != nil { return nil, err } result := make(map[string][]bpfLineInfo) for { secName, infoHeader, err := parseExtInfoSec(r, bo, strings) if errors.Is(err, io.EOF) { return result, nil } if err != nil { return nil, err } records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true) if err != nil { return nil, fmt.Errorf("section %v: %w", secName, err) } result[secName] = records } } // parseLineInfoRecords parses a stream of line_infos into a lineInfos. // These records appear after a btf_ext_info_sec header in the line_info // sub-section of .BTF.ext. func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfLineInfo, error) { if exp, got := uint32(binary.Size(bpfLineInfo{})), recordSize; exp != got { // BTF blob's record size is longer than we know how to parse. return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got) } out := make([]bpfLineInfo, recordNum) if err := binary.Read(r, bo, out); err != nil { return nil, fmt.Errorf("can't read line info: %v", err) } if offsetInBytes { for i := range out { li := &out[i] if li.InsnOff%asm.InstructionSize != 0 { return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff) } // ELF tracks offset in bytes, the kernel expects raw BPF instructions. // Convert as early as possible. li.InsnOff /= asm.InstructionSize } } return out, nil } // bpfCORERelo matches the kernel's struct bpf_core_relo. type bpfCORERelo struct { InsnOff uint32 TypeID TypeID AccessStrOff uint32 Kind coreKind } type CORERelocation struct { // The local type of the relocation, stripped of typedefs and qualifiers. typ Type accessor coreAccessor kind coreKind // The ID of the local type in the source BTF. id TypeID } func (cr *CORERelocation) String() string { return fmt.Sprintf("CORERelocation(%s, %s[%s], local_id=%d)", cr.kind, cr.typ, cr.accessor, cr.id) } type coreRelocationMeta struct{} // CORERelocationMetadata returns the CORERelocation associated with ins. func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation { relo, _ := ins.Metadata.Get(coreRelocationMeta{}).(*CORERelocation) return relo } // WithCORERelocationMetadata associates a CORERelocation with ins and returns // the modified Instruction. func WithCORERelocationMetadata(ins asm.Instruction, relo *CORERelocation) asm.Instruction { ins.Metadata.Set(coreRelocationMeta{}, relo) return ins } // CORERelocationOffsets is a slice of CORERelocationOffsets sorted by offset. type CORERelocationOffsets = []CORERelocationOffset // CORERelocationOffset represents a CO-RE relocation and an offset at which it // should be applied. type CORERelocationOffset struct { Relo *CORERelocation Offset asm.RawInstructionOffset } func newRelocationInfo(relo bpfCORERelo, spec *Spec, strings *stringTable) (*CORERelocationOffset, error) { typ, err := spec.TypeByID(relo.TypeID) if err != nil { return nil, err } accessorStr, err := strings.Lookup(relo.AccessStrOff) if err != nil { return nil, err } accessor, err := parseCOREAccessor(accessorStr) if err != nil { return nil, fmt.Errorf("accessor %q: %s", accessorStr, err) } return &CORERelocationOffset{ &CORERelocation{ typ, accessor, relo.Kind, relo.TypeID, }, asm.RawInstructionOffset(relo.InsnOff), }, nil } func newRelocationInfos(brs []bpfCORERelo, spec *Spec, strings *stringTable) (CORERelocationOffsets, error) { rs := make(CORERelocationOffsets, 0, len(brs)) for _, br := range brs { relo, err := newRelocationInfo(br, spec, strings) if err != nil { return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err) } rs = append(rs, *relo) } sort.Slice(rs, func(i, j int) bool { return rs[i].Offset < rs[j].Offset }) return rs, nil } var extInfoReloSize = binary.Size(bpfCORERelo{}) // parseCORERelos parses a core_relos sub-section within .BTF.ext ito a map of // CO-RE relocations indexed by section name. func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfCORERelo, error) { recordSize, err := parseExtInfoRecordSize(r, bo) if err != nil { return nil, err } if recordSize != uint32(extInfoReloSize) { return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize) } result := make(map[string][]bpfCORERelo) for { secName, infoHeader, err := parseExtInfoSec(r, bo, strings) if errors.Is(err, io.EOF) { return result, nil } if err != nil { return nil, err } records, err := parseCOREReloRecords(r, bo, infoHeader.NumInfo) if err != nil { return nil, fmt.Errorf("section %v: %w", secName, err) } result[secName] = records } } // parseCOREReloRecords parses a stream of CO-RE relocation entries into a // coreRelos. These records appear after a btf_ext_info_sec header in the // core_relos sub-section of .BTF.ext. func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordNum uint32) ([]bpfCORERelo, error) { var out []bpfCORERelo var relo bpfCORERelo for i := uint32(0); i < recordNum; i++ { if err := binary.Read(r, bo, &relo); err != nil { return nil, fmt.Errorf("can't read CO-RE relocation: %v", err) } if relo.InsnOff%asm.InstructionSize != 0 { return nil, fmt.Errorf("offset %v is not aligned with instruction size", relo.InsnOff) } // ELF tracks offset in bytes, the kernel expects raw BPF instructions. // Convert as early as possible. relo.InsnOff /= asm.InstructionSize out = append(out, relo) } return out, nil } ================================================ FILE: btf/ext_info_test.go ================================================ package btf import ( "bytes" "encoding/binary" "strings" "testing" "github.com/cilium/ebpf/internal" "github.com/go-quicktest/qt" ) func TestParseExtInfoBigRecordSize(t *testing.T) { rd := strings.NewReader("\xff\xff\xff\xff\x00\x00\x00\x000709171295166016") table, err := readStringTable(bytes.NewReader([]byte{0}), nil) if err != nil { t.Fatal(err) } if _, err := parseFuncInfos(rd, internal.NativeEndian, table); err == nil { t.Error("Parsing func info with large record size doesn't return an error") } if _, err := parseLineInfos(rd, internal.NativeEndian, table); err == nil { t.Error("Parsing line info with large record size doesn't return an error") } } func BenchmarkParseLineInfoRecords(b *testing.B) { size := uint32(binary.Size(bpfLineInfo{})) count := uint32(4096) buf := make([]byte, size*count) b.ReportAllocs() for b.Loop() { parseLineInfoRecords(bytes.NewReader(buf), internal.NativeEndian, size, count, true) } } func TestParseLineInfoRecordsAllocations(t *testing.T) { size := uint32(binary.Size(bpfLineInfo{})) count := uint32(4096) buf := make([]byte, size*count) allocs := testing.AllocsPerRun(5, func() { parseLineInfoRecords(bytes.NewReader(buf), internal.NativeEndian, size, count, true) }) // 7 is the number of allocations on go 1.22 // what we want to test is that we are not allocating // once per record qt.Assert(t, qt.IsTrue(allocs <= 7)) } ================================================ FILE: btf/feature.go ================================================ package btf import ( "errors" "math" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // haveBTF attempts to load a BTF blob containing an Int. It should pass on any // kernel that supports BPF_BTF_LOAD. var haveBTF = internal.NewFeatureTest("BTF", func() error { // 0-length anonymous integer err := probeBTF(&Int{}) if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { return internal.ErrNotSupported } return err }, "4.18") // haveMapBTF attempts to load a minimal BTF blob containing a Var. It is // used as a proxy for .bss, .data and .rodata map support, which generally // come with a Var and Datasec. These were introduced in Linux 5.2. var haveMapBTF = internal.NewFeatureTest("Map BTF (Var/Datasec)", func() error { if err := haveBTF(); err != nil { return err } v := &Var{ Name: "a", Type: &Pointer{(*Void)(nil)}, } err := probeBTF(v) if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { // Treat both EINVAL and EPERM as not supported: creating the map may still // succeed without Btf* attrs. return internal.ErrNotSupported } return err }, "5.2") // haveProgBTF attempts to load a BTF blob containing a Func and FuncProto. It // is used as a proxy for ext_info (func_info) support, which depends on // Func(Proto) by definition. var haveProgBTF = internal.NewFeatureTest("Program BTF (func/line_info)", func() error { if err := haveBTF(); err != nil { return err } fn := &Func{ Name: "a", Type: &FuncProto{Return: (*Void)(nil)}, } err := probeBTF(fn) if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { return internal.ErrNotSupported } return err }, "5.0") var haveFuncLinkage = internal.NewFeatureTest("BTF func linkage", func() error { if err := haveProgBTF(); err != nil { return err } fn := &Func{ Name: "a", Type: &FuncProto{Return: (*Void)(nil)}, Linkage: GlobalFunc, } err := probeBTF(fn) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } return err }, "5.6") var haveDeclTags = internal.NewFeatureTest("BTF decl tags", func() error { if err := haveBTF(); err != nil { return err } t := &Typedef{ Name: "a", Type: &Int{}, Tags: []string{"a"}, } err := probeBTF(t) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } return err }, "5.16") var haveTypeTags = internal.NewFeatureTest("BTF type tags", func() error { if err := haveBTF(); err != nil { return err } t := &TypeTag{ Type: &Int{}, Value: "a", } err := probeBTF(t) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } return err }, "5.17") var haveEnum64 = internal.NewFeatureTest("ENUM64", func() error { if err := haveBTF(); err != nil { return err } enum := &Enum{ Size: 8, Values: []EnumValue{ {"TEST", math.MaxUint32 + 1}, }, } err := probeBTF(enum) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } return err }, "6.0") func probeBTF(typ Type) error { b, err := NewBuilder([]Type{typ}, nil) if err != nil { return err } buf, err := b.Marshal(nil, nil) if err != nil { return err } fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ Btf: sys.SlicePointer(buf), BtfSize: uint32(len(buf)), }) if err == nil { fd.Close() } return err } ================================================ FILE: btf/feature_test.go ================================================ package btf import ( "testing" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveBTF(t *testing.T) { testutils.CheckFeatureTest(t, haveBTF) } func TestHaveMapBTF(t *testing.T) { testutils.CheckFeatureTest(t, haveMapBTF) } func TestHaveProgBTF(t *testing.T) { testutils.CheckFeatureTest(t, haveProgBTF) } func TestHaveFuncLinkage(t *testing.T) { testutils.CheckFeatureTest(t, haveFuncLinkage) } func TestHaveDeclTags(t *testing.T) { testutils.CheckFeatureTest(t, haveDeclTags) } func TestHaveTypeTags(t *testing.T) { testutils.CheckFeatureTest(t, haveTypeTags) } func TestHaveEnum64(t *testing.T) { testutils.CheckFeatureTest(t, haveEnum64) } ================================================ FILE: btf/format.go ================================================ package btf import ( "errors" "fmt" "strings" ) var errNestedTooDeep = errors.New("nested too deep") // GoFormatter converts a Type to Go syntax. // // A zero GoFormatter is valid to use. type GoFormatter struct { w strings.Builder // Types present in this map are referred to using the given name if they // are encountered when outputting another type. Names map[Type]string // Identifier is called for each field of struct-like types. By default the // field name is used as is. Identifier func(string) string // EnumIdentifier is called for each element of an enum. By default the // name of the enum type is concatenated with Identifier(element). EnumIdentifier func(name, element string) string } // TypeDeclaration generates a Go type declaration for a BTF type. func (gf *GoFormatter) TypeDeclaration(name string, typ Type) (string, error) { gf.w.Reset() if err := gf.writeTypeDecl(name, typ); err != nil { return "", err } return gf.w.String(), nil } func (gf *GoFormatter) identifier(s string) string { if gf.Identifier != nil { return gf.Identifier(s) } return s } func (gf *GoFormatter) enumIdentifier(name, element string) string { if gf.EnumIdentifier != nil { return gf.EnumIdentifier(name, element) } return name + gf.identifier(element) } // writeTypeDecl outputs a declaration of the given type. // // It encodes https://golang.org/ref/spec#Type_declarations: // // type foo struct { _ structs.HostLayout; bar uint32; } // type bar int32 func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error { if name == "" { return fmt.Errorf("need a name for type %s", typ) } typ = skipQualifiers(typ) fmt.Fprintf(&gf.w, "type %s ", name) if err := gf.writeTypeLit(typ, 0); err != nil { return err } e, ok := typ.(*Enum) if !ok || len(e.Values) == 0 { return nil } gf.w.WriteString("; const ( ") for _, ev := range e.Values { id := gf.enumIdentifier(name, ev.Name) var value any if e.Signed { value = int64(ev.Value) } else { value = ev.Value } fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, value) } gf.w.WriteString(")") return nil } // writeType outputs the name of a named type or a literal describing the type. // // It encodes https://golang.org/ref/spec#Types. // // foo (if foo is a named type) // uint32 func (gf *GoFormatter) writeType(typ Type, depth int) error { typ = skipQualifiers(typ) name := gf.Names[typ] if name != "" { gf.w.WriteString(name) return nil } return gf.writeTypeLit(typ, depth) } // writeTypeLit outputs a literal describing the type. // // The function ignores named types. // // It encodes https://golang.org/ref/spec#TypeLit. // // struct { _ structs.HostLayout; bar uint32; } // uint32 func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error { depth++ if depth > maxResolveDepth { return errNestedTooDeep } var err error switch v := skipQualifiers(typ).(type) { case *Int: err = gf.writeIntLit(v) case *Enum: if !v.Signed { gf.w.WriteRune('u') } switch v.Size { case 1: gf.w.WriteString("int8") case 2: gf.w.WriteString("int16") case 4: gf.w.WriteString("int32") case 8: gf.w.WriteString("int64") default: err = fmt.Errorf("invalid enum size %d", v.Size) } case *Typedef: err = gf.writeType(v.Type, depth) case *Array: fmt.Fprintf(&gf.w, "[%d]", v.Nelems) err = gf.writeType(v.Type, depth) case *Struct: err = gf.writeStructLit(v.Size, v.Members, depth) case *Union: // Always choose the first member to represent the union in Go. err = gf.writeStructLit(v.Size, v.Members[:1], depth) case *Datasec: err = gf.writeDatasecLit(v, depth) case *Var: err = gf.writeTypeLit(v.Type, depth) default: return fmt.Errorf("type %T: %w", v, ErrNotSupported) } if err != nil { return fmt.Errorf("%s: %w", typ, err) } return nil } func (gf *GoFormatter) writeIntLit(i *Int) error { bits := i.Size * 8 switch i.Encoding { case Bool: if i.Size != 1 { return fmt.Errorf("bool with size %d", i.Size) } gf.w.WriteString("bool") case Char: if i.Size != 1 { return fmt.Errorf("char with size %d", i.Size) } // BTF doesn't have a way to specify the signedness of a char. Assume // we are dealing with unsigned, since this works nicely with []byte // in Go code. fallthrough case Unsigned, Signed: stem := "uint" if i.Encoding == Signed { stem = "int" } if i.Size > 8 { fmt.Fprintf(&gf.w, "[%d]byte /* %s%d */", i.Size, stem, i.Size*8) } else { fmt.Fprintf(&gf.w, "%s%d", stem, bits) } default: return fmt.Errorf("can't encode %s", i.Encoding) } return nil } func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error { gf.w.WriteString("struct { _ structs.HostLayout; ") prevOffset := uint32(0) skippedBitfield := false for i, m := range members { if m.BitfieldSize > 0 { skippedBitfield = true continue } offset := m.Offset.Bytes() if n := offset - prevOffset; skippedBitfield && n > 0 { fmt.Fprintf(&gf.w, "_ [%d]byte /* unsupported bitfield */; ", n) } else { gf.writePadding(n) } fieldSize, err := Sizeof(m.Type) if err != nil { return fmt.Errorf("field %d: %w", i, err) } prevOffset = offset + uint32(fieldSize) if prevOffset > size { return fmt.Errorf("field %d of size %d exceeds type size %d", i, fieldSize, size) } if err := gf.writeStructField(m, depth); err != nil { return fmt.Errorf("field %d: %w", i, err) } } gf.writePadding(size - prevOffset) gf.w.WriteString("}") return nil } func (gf *GoFormatter) writeStructField(m Member, depth int) error { if m.BitfieldSize > 0 { return fmt.Errorf("bitfields are not supported") } if m.Offset%8 != 0 { return fmt.Errorf("unsupported offset %d", m.Offset) } if m.Name == "" { // Special case a nested anonymous union like // struct foo { union { int bar; int baz }; } // by replacing the whole union with its first member. union, ok := m.Type.(*Union) if !ok { return fmt.Errorf("anonymous fields are not supported") } if len(union.Members) == 0 { return errors.New("empty anonymous union") } depth++ if depth > maxResolveDepth { return errNestedTooDeep } m := union.Members[0] size, err := Sizeof(m.Type) if err != nil { return err } if err := gf.writeStructField(m, depth); err != nil { return err } gf.writePadding(union.Size - uint32(size)) return nil } fmt.Fprintf(&gf.w, "%s ", gf.identifier(m.Name)) if err := gf.writeType(m.Type, depth); err != nil { return err } gf.w.WriteString("; ") return nil } func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error { gf.w.WriteString("struct { _ structs.HostLayout; ") prevOffset := uint32(0) for i, vsi := range ds.Vars { v, ok := vsi.Type.(*Var) if !ok { return fmt.Errorf("can't format %s as part of data section", vsi.Type) } if v.Linkage != GlobalVar { // Ignore static, extern, etc. for now. continue } if v.Name == "" { return fmt.Errorf("variable %d: empty name", i) } gf.writePadding(vsi.Offset - prevOffset) prevOffset = vsi.Offset + vsi.Size fmt.Fprintf(&gf.w, "%s ", gf.identifier(v.Name)) if err := gf.writeType(v.Type, depth); err != nil { return fmt.Errorf("variable %d: %w", i, err) } gf.w.WriteString("; ") } gf.writePadding(ds.Size - prevOffset) gf.w.WriteString("}") return nil } func (gf *GoFormatter) writePadding(bytes uint32) { if bytes > 0 { fmt.Fprintf(&gf.w, "_ [%d]byte; ", bytes) } } func skipQualifiers(typ Type) Type { result := typ for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (result).(type) { case qualifier: result = v.qualify() default: return result } } return &cycle{typ} } ================================================ FILE: btf/format_test.go ================================================ package btf import ( "errors" "fmt" "go/format" "math" "strings" "testing" ) func TestGoTypeDeclaration(t *testing.T) { tests := []struct { typ Type output string }{ {&Int{Size: 1}, "type t uint8"}, {&Int{Size: 1, Encoding: Bool}, "type t bool"}, {&Int{Size: 1, Encoding: Char}, "type t uint8"}, {&Int{Size: 2, Encoding: Signed}, "type t int16"}, {&Int{Size: 4, Encoding: Signed}, "type t int32"}, {&Int{Size: 8}, "type t uint64"}, {&Typedef{Name: "frob", Type: &Int{Size: 8}}, "type t uint64"}, {&Int{Size: 16}, "type t [16]byte /* uint128 */"}, {&Enum{Values: []EnumValue{{"FOO", 32}}, Size: 4}, "type t uint32; const ( tFOO t = 32; )"}, { &Enum{ Values: []EnumValue{ {"MINUS_ONE", math.MaxUint64}, {"MINUS_TWO", math.MaxUint64 - 1}, }, Size: 1, Signed: true, }, "type t int8; const ( tMINUS_ONE t = -1; tMINUS_TWO t = -2; )", }, { &Struct{ Name: "enum literals", Size: 2, Members: []Member{ {Name: "enum", Type: &Enum{Values: []EnumValue{{"BAR", 1}}, Size: 2}, Offset: 0}, }, }, "type t struct { _ structs.HostLayout; enum uint16; }", }, {&Array{Nelems: 2, Type: &Int{Size: 1}}, "type t [2]uint8"}, { &Union{ Size: 8, Members: []Member{ {Name: "a", Type: &Int{Size: 4}}, {Name: "b", Type: &Int{Size: 8}}, }, }, "type t struct { _ structs.HostLayout; a uint32; _ [4]byte; }", }, { &Struct{ Name: "field padding", Size: 16, Members: []Member{ {Name: "frob", Type: &Int{Size: 4}, Offset: 0}, {Name: "foo", Type: &Int{Size: 8}, Offset: 8 * 8}, }, }, "type t struct { _ structs.HostLayout; frob uint32; _ [4]byte; foo uint64; }", }, { &Struct{ Name: "end padding", Size: 16, Members: []Member{ {Name: "foo", Type: &Int{Size: 8}, Offset: 0}, {Name: "frob", Type: &Int{Size: 4}, Offset: 8 * 8}, }, }, "type t struct { _ structs.HostLayout; foo uint64; frob uint32; _ [4]byte; }", }, { &Struct{ Name: "bitfield", Size: 8, Members: []Member{ {Name: "foo", Type: &Int{Size: 4}, Offset: 0, BitfieldSize: 1}, {Name: "frob", Type: &Int{Size: 4}, Offset: 4 * 8}, }, }, "type t struct { _ structs.HostLayout; _ [4]byte /* unsupported bitfield */; frob uint32; }", }, { &Struct{ Name: "nested", Size: 8, Members: []Member{ { Name: "foo", Type: &Struct{ Size: 4, Members: []Member{ {Name: "bar", Type: &Int{Size: 4}, Offset: 0}, }, }, }, {Name: "frob", Type: &Int{Size: 4}, Offset: 4 * 8}, }, }, "type t struct { _ structs.HostLayout; foo struct { _ structs.HostLayout; bar uint32; }; frob uint32; }", }, { &Struct{ Name: "nested anon union", Size: 8, Members: []Member{ { Name: "", Type: &Union{ Size: 4, Members: []Member{ {Name: "foo", Type: &Int{Size: 4}, Offset: 0}, {Name: "bar", Type: &Int{Size: 4}, Offset: 0}, }, }, }, }, }, "type t struct { _ structs.HostLayout; foo uint32; _ [4]byte; }", }, { &Datasec{ Size: 16, Vars: []VarSecinfo{ {&Var{Name: "s", Type: &Int{Size: 2}, Linkage: StaticVar}, 0, 2}, {&Var{Name: "g", Type: &Int{Size: 4}, Linkage: GlobalVar}, 4, 4}, {&Var{Name: "e", Type: &Int{Size: 8}, Linkage: ExternVar}, 8, 8}, }, }, "type t struct { _ structs.HostLayout; _ [4]byte; g uint32; _ [8]byte; }", }, {&Var{Type: &Int{Size: 4}}, "type t uint32"}, } for _, test := range tests { t.Run(fmt.Sprint(test.typ), func(t *testing.T) { have := mustGoTypeDeclaration(t, test.typ, nil, nil) if have != test.output { t.Errorf("Unexpected output:\n\t-%s\n\t+%s", test.output, have) } }) } } func TestGoTypeDeclarationNamed(t *testing.T) { e1 := &Enum{Name: "e1", Size: 4} s1 := &Struct{ Name: "s1", Size: 4, Members: []Member{ {Name: "frob", Type: e1}, }, } s2 := &Struct{ Name: "s2", Size: 4, Members: []Member{ {Name: "frood", Type: s1}, }, } td := &Typedef{Name: "td", Type: e1} arr := &Array{Nelems: 1, Type: td} tests := []struct { typ Type named []Type output string }{ {e1, []Type{e1}, "type t uint32"}, {s1, []Type{e1, s1}, "type t struct { _ structs.HostLayout; frob E1; }"}, {s2, []Type{e1}, "type t struct { _ structs.HostLayout; frood struct { _ structs.HostLayout; frob E1; }; }"}, {s2, []Type{e1, s1}, "type t struct { _ structs.HostLayout; frood S1; }"}, {td, nil, "type t uint32"}, {td, []Type{td}, "type t uint32"}, {arr, []Type{td}, "type t [1]TD"}, } for _, test := range tests { t.Run(fmt.Sprint(test.typ), func(t *testing.T) { names := make(map[Type]string) for _, t := range test.named { names[t] = strings.ToUpper(t.TypeName()) } have := mustGoTypeDeclaration(t, test.typ, names, nil) if have != test.output { t.Errorf("Unexpected output:\n\t-%s\n\t+%s", test.output, have) } }) } } func TestGoTypeDeclarationQualifiers(t *testing.T) { i := &Int{Size: 4} want := mustGoTypeDeclaration(t, i, nil, nil) tests := []struct { typ Type }{ {&Volatile{Type: i}}, {&Const{Type: i}}, {&Restrict{Type: i}}, } for _, test := range tests { t.Run(fmt.Sprint(test.typ), func(t *testing.T) { have := mustGoTypeDeclaration(t, test.typ, nil, nil) if have != want { t.Errorf("Unexpected output:\n\t-%s\n\t+%s", want, have) } }) } } func TestGoTypeDeclarationCycle(t *testing.T) { s := &Struct{Name: "cycle"} s.Members = []Member{{Name: "f", Type: s}} var gf GoFormatter _, err := gf.TypeDeclaration("t", s) if !errors.Is(err, errNestedTooDeep) { t.Fatal("Expected errNestedTooDeep, got", err) } } func TestRejectBogusTypes(t *testing.T) { tests := []struct { typ Type }{ {&Struct{ Size: 1, Members: []Member{ {Name: "foo", Type: &Int{Size: 2}, Offset: 0}, }, }}, {&Int{Size: 2, Encoding: Bool}}, {&Int{Size: 1, Encoding: Char | Signed}}, {&Int{Size: 2, Encoding: Char}}, } for _, test := range tests { t.Run(fmt.Sprint(test.typ), func(t *testing.T) { var gf GoFormatter _, err := gf.TypeDeclaration("t", test.typ) if err == nil { t.Fatal("TypeDeclaration does not reject bogus type") } }) } } func mustGoTypeDeclaration(tb testing.TB, typ Type, names map[Type]string, id func(string) string) string { tb.Helper() gf := GoFormatter{ Names: names, Identifier: id, } have, err := gf.TypeDeclaration("t", typ) if err != nil { tb.Fatal(err) } _, err = format.Source([]byte(have)) if err != nil { tb.Fatalf("Output can't be formatted: %s\n%s", err, have) } return have } ================================================ FILE: btf/fuzz_test.go ================================================ package btf import ( "bytes" "encoding/binary" "fmt" "io" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal" ) func FuzzSpec(f *testing.F) { f.Add(mustBTFHeader(f)) f.Fuzz(func(t *testing.T, data []byte) { if len(data) < binary.Size(btfHeader{}) { t.Skip("data is too short") } spec, err := loadRawSpec(data, nil) if err != nil { if spec != nil { t.Fatal("spec is not nil") } return } if spec == nil { t.Fatal("spec is nil") } for typ, err := range spec.All() { if err == nil { fmt.Fprintf(io.Discard, "%+10v", typ) } } }) } func FuzzExtInfo(f *testing.F) { f.Add(mustBTFHeader(f), []byte("\x00foo\x00barfoo\x00")) f.Fuzz(func(t *testing.T, data, strings []byte) { if len(data) < binary.Size(btfExtHeader{}) { t.Skip("data is too short") } table, err := readStringTable(bytes.NewReader(strings), nil) if err != nil { t.Skip("invalid string table") } emptySpec := specFromTypes(t, nil) emptySpec.strings = table info, err := loadExtInfos(bytes.NewReader(data), internal.NativeEndian, emptySpec) if err != nil { if info != nil { t.Fatal("info is not nil") } } else if info == nil { t.Fatal("info is nil") } }) } func mustBTFHeader(f *testing.F) []byte { buf, err := binary.Append(nil, internal.NativeEndian, &btfHeader{ Magic: btfMagic, Version: 1, HdrLen: uint32(binary.Size(btfHeader{})), }) qt.Assert(f, qt.IsNil(err)) return buf } ================================================ FILE: btf/handle.go ================================================ package btf import ( "errors" "fmt" "math" "os" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // Handle is a reference to BTF loaded into the kernel. type Handle struct { fd *sys.FD // Size of the raw BTF in bytes. size uint32 needsKernelBase bool } // NewHandle loads the contents of a [Builder] into the kernel. // // Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF. func NewHandle(b *Builder) (*Handle, error) { small := getByteSlice() defer putByteSlice(small) buf, err := b.Marshal(*small, KernelMarshalOptions()) if err != nil { return nil, fmt.Errorf("marshal BTF: %w", err) } return NewHandleFromRawBTF(buf) } // NewHandleFromRawBTF loads raw BTF into the kernel. // // Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF. func NewHandleFromRawBTF(btf []byte) (*Handle, error) { const minLogSize = 64 * 1024 if platform.IsWindows { return nil, fmt.Errorf("btf: handle: %w", internal.ErrNotSupportedOnOS) } if uint64(len(btf)) > math.MaxUint32 { return nil, errors.New("BTF exceeds the maximum size") } attr := &sys.BtfLoadAttr{ Btf: sys.SlicePointer(btf), BtfSize: uint32(len(btf)), } var ( logBuf []byte err error ) for { var fd *sys.FD fd, err = sys.BtfLoad(attr) if err == nil { return &Handle{fd, attr.BtfSize, false}, nil } if attr.BtfLogTrueSize != 0 && attr.BtfLogSize >= attr.BtfLogTrueSize { // The log buffer already has the correct size. break } if attr.BtfLogSize != 0 && !errors.Is(err, unix.ENOSPC) { // Up until at least kernel 6.0, the BTF verifier does not return ENOSPC // if there are other verification errors. ENOSPC is only returned when // the BTF blob is correct, a log was requested, and the provided buffer // is too small. We're therefore not sure whether we got the full // log or not. break } // Make an educated guess how large the buffer should be. Start // at a reasonable minimum and then double the size. logSize := uint32(max(len(logBuf)*2, minLogSize)) if int(logSize) < len(logBuf) { return nil, errors.New("overflow while probing log buffer size") } if attr.BtfLogTrueSize != 0 { // The kernel has given us a hint how large the log buffer has to be. logSize = attr.BtfLogTrueSize } logBuf = make([]byte, logSize) attr.BtfLogSize = logSize attr.BtfLogBuf = sys.SlicePointer(logBuf) attr.BtfLogLevel = 1 } if err := haveBTF(); err != nil { return nil, err } return nil, internal.ErrorWithLog("load btf", err, logBuf) } // NewHandleFromID returns the BTF handle for a given id. // // Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible. // // Returns ErrNotExist, if there is no BTF with the given id. // // Requires CAP_SYS_ADMIN. func NewHandleFromID(id ID) (*Handle, error) { if platform.IsWindows { return nil, fmt.Errorf("btf: handle: %w", internal.ErrNotSupportedOnOS) } fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{ Id: uint32(id), }) if err != nil { return nil, fmt.Errorf("get FD for ID %d: %w", id, err) } info, err := newHandleInfoFromFD(fd) if err != nil { _ = fd.Close() return nil, err } return &Handle{fd, info.size, info.IsModule()}, nil } // Spec parses the kernel BTF into Go types. // // base must contain type information for vmlinux if the handle is for // a kernel module. It may be nil otherwise. func (h *Handle) Spec(base *Spec) (*Spec, error) { var btfInfo sys.BtfInfo btfBuffer := make([]byte, h.size) btfInfo.Btf = sys.SlicePointer(btfBuffer) btfInfo.BtfSize = uint32(len(btfBuffer)) if err := sys.ObjInfo(h.fd, &btfInfo); err != nil { return nil, err } if h.needsKernelBase && base == nil { return nil, fmt.Errorf("missing base types") } return loadRawSpec(btfBuffer, base) } // Close destroys the handle. // // Subsequent calls to FD will return an invalid value. func (h *Handle) Close() error { if h == nil { return nil } return h.fd.Close() } // FD returns the file descriptor for the handle. func (h *Handle) FD() int { return h.fd.Int() } // Info returns metadata about the handle. func (h *Handle) Info() (*HandleInfo, error) { return newHandleInfoFromFD(h.fd) } // HandleInfo describes a Handle. type HandleInfo struct { // ID of this handle in the kernel. The ID is only valid as long as the // associated handle is kept alive. ID ID // Name is an identifying name for the BTF, currently only used by the // kernel. Name string // IsKernel is true if the BTF originated with the kernel and not // userspace. IsKernel bool // Size of the raw BTF in bytes. size uint32 } func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) { // We invoke the syscall once with a empty BTF and name buffers to get size // information to allocate buffers. Then we invoke it a second time with // buffers to receive the data. var btfInfo sys.BtfInfo if err := sys.ObjInfo(fd, &btfInfo); err != nil { return nil, fmt.Errorf("get BTF info for fd %s: %w", fd, err) } if btfInfo.NameLen > 0 { // NameLen doesn't account for the terminating NUL. btfInfo.NameLen++ } // Don't pull raw BTF by default, since it may be quite large. btfSize := btfInfo.BtfSize btfInfo.BtfSize = 0 nameBuffer := make([]byte, btfInfo.NameLen) btfInfo.Name = sys.SlicePointer(nameBuffer) btfInfo.NameLen = uint32(len(nameBuffer)) if err := sys.ObjInfo(fd, &btfInfo); err != nil { return nil, err } return &HandleInfo{ ID: ID(btfInfo.Id), Name: unix.ByteSliceToString(nameBuffer), IsKernel: btfInfo.KernelBtf != 0, size: btfSize, }, nil } // IsVmlinux returns true if the BTF is for the kernel itself. func (i *HandleInfo) IsVmlinux() bool { return i.IsKernel && i.Name == "vmlinux" } // IsModule returns true if the BTF is for a kernel module. func (i *HandleInfo) IsModule() bool { return i.IsKernel && i.Name != "vmlinux" } // HandleIterator allows enumerating BTF blobs loaded into the kernel. type HandleIterator struct { // The ID of the current handle. Only valid after a call to Next. ID ID // The current Handle. Only valid until a call to Next. // See Take if you want to retain the handle. Handle *Handle err error } // Next retrieves a handle for the next BTF object. // // Returns true if another BTF object was found. Call [HandleIterator.Err] after // the function returns false. func (it *HandleIterator) Next() bool { if platform.IsWindows { it.err = fmt.Errorf("btf: %w", internal.ErrNotSupportedOnOS) return false } id := it.ID for { attr := &sys.BtfGetNextIdAttr{Id: id} err := sys.BtfGetNextId(attr) if errors.Is(err, os.ErrNotExist) { // There are no more BTF objects. break } else if err != nil { it.err = fmt.Errorf("get next BTF ID: %w", err) break } id = attr.NextId handle, err := NewHandleFromID(id) if errors.Is(err, os.ErrNotExist) { // Try again with the next ID. continue } else if err != nil { it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err) break } it.Handle.Close() it.ID, it.Handle = id, handle return true } // No more handles or we encountered an error. it.Handle.Close() it.Handle = nil return false } // Take the ownership of the current handle. // // It's the callers responsibility to close the handle. func (it *HandleIterator) Take() *Handle { handle := it.Handle it.Handle = nil return handle } // Err returns an error if iteration failed for some reason. func (it *HandleIterator) Err() error { return it.err } // FindHandle returns the first handle for which predicate returns true. // // Requires CAP_SYS_ADMIN. // // Returns an error wrapping ErrNotFound if predicate never returns true or if // there is no BTF loaded into the kernel. func FindHandle(predicate func(info *HandleInfo) bool) (*Handle, error) { it := new(HandleIterator) defer it.Handle.Close() for it.Next() { info, err := it.Handle.Info() if err != nil { return nil, fmt.Errorf("info for ID %d: %w", it.ID, err) } if predicate(info) { return it.Take(), nil } } if err := it.Err(); err != nil { return nil, fmt.Errorf("iterate handles: %w", err) } return nil, fmt.Errorf("find handle: %w", ErrNotFound) } ================================================ FILE: btf/handle_test.go ================================================ package btf_test import ( "fmt" "testing" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/testutils" ) func TestHandleIterator(t *testing.T) { // There is no guarantee that there is a BTF ID allocated, but loading a module // triggers loading vmlinux. // See https://github.com/torvalds/linux/commit/5329722057d41aebc31e391907a501feaa42f7d9 testutils.SkipOnOldKernel(t, "5.11", "vmlinux BTF ID") it := new(btf.HandleIterator) defer it.Handle.Close() if !it.Next() { testutils.SkipIfNotSupportedOnOS(t, it.Err()) t.Fatalf("No BTF loaded") } if it.Handle == nil { t.Fatal("Next doesn't assign handle") } prev := it.ID for it.Next() { // Iterate all loaded BTF. if it.Handle == nil { t.Fatal("Next doesn't assign handle") } if it.ID == prev { t.Fatal("Iterator doesn't advance ID") } prev = it.ID } if err := it.Err(); err != nil { t.Fatal("Iteration returned an error:", err) } if it.Handle != nil { t.Fatal("Next doesn't clean up handle on last iteration") } if prev != it.ID { t.Fatal("Next changes ID on last iteration") } } func TestParseModuleSplitSpec(t *testing.T) { // See TestNewHandleFromID for reasoning. testutils.SkipOnOldKernel(t, "5.11", "vmlinux BTF ID") module, err := btf.FindHandle(func(info *btf.HandleInfo) bool { if info.IsModule() { t.Log("Using module", info.Name) return true } return false }) testutils.SkipIfNotSupportedOnOS(t, err) if err != nil { t.Fatal(err) } defer module.Close() vmlinux, err := btf.FindHandle(func(info *btf.HandleInfo) bool { return info.IsVmlinux() }) if err != nil { t.Fatal(err) } defer vmlinux.Close() base, err := vmlinux.Spec(nil) if err != nil { t.Fatal(err) } _, err = module.Spec(base) if err != nil { t.Fatal("Parse module BTF:", err) } } func ExampleHandleIterator() { it := new(btf.HandleIterator) defer it.Handle.Close() for it.Next() { info, err := it.Handle.Info() if err != nil { panic(err) } fmt.Printf("Found handle with ID %d and name %s\n", it.ID, info.Name) } if err := it.Err(); err != nil { panic(err) } } ================================================ FILE: btf/kernel.go ================================================ package btf import ( "errors" "fmt" "os" "path/filepath" "runtime" "slices" "sort" "sync" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/unix" ) // globalCache amortises decoding BTF across all users of the library. var globalCache = struct { sync.RWMutex kernel *Spec modules map[string]*Spec }{ modules: make(map[string]*Spec), } // FlushKernelSpec removes any cached kernel type information. func FlushKernelSpec() { globalCache.Lock() defer globalCache.Unlock() globalCache.kernel = nil globalCache.modules = make(map[string]*Spec) } // LoadKernelSpec returns the current kernel's BTF information. // // Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system // for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled. // // Consider using [Cache] instead. func LoadKernelSpec() (*Spec, error) { spec, err := loadCachedKernelSpec() return spec.Copy(), err } // load (and cache) the kernel spec. // // Does not copy Spec. func loadCachedKernelSpec() (*Spec, error) { globalCache.RLock() spec := globalCache.kernel globalCache.RUnlock() if spec != nil { return spec, nil } globalCache.Lock() defer globalCache.Unlock() // check again, to prevent race between multiple callers if globalCache.kernel != nil { return globalCache.kernel, nil } spec, err := loadKernelSpec() if err != nil { return nil, err } globalCache.kernel = spec return spec, nil } // LoadKernelModuleSpec returns the BTF information for the named kernel module. // // Using [Cache.Module] is faster when loading BTF for more than one module. // // Defaults to /sys/kernel/btf/. // Returns an error wrapping ErrNotSupported if BTF is not enabled. // Returns an error wrapping fs.ErrNotExist if BTF for the specific module doesn't exist. func LoadKernelModuleSpec(module string) (*Spec, error) { spec, err := loadCachedKernelModuleSpec(module) return spec.Copy(), err } // load (and cache) a module spec. // // Does not copy Spec. func loadCachedKernelModuleSpec(module string) (*Spec, error) { globalCache.RLock() spec := globalCache.modules[module] globalCache.RUnlock() if spec != nil { return spec, nil } base, err := loadCachedKernelSpec() if err != nil { return nil, err } // NB: This only allows a single module to be parsed at a time. Not sure // it makes a difference. globalCache.Lock() defer globalCache.Unlock() // check again, to prevent race between multiple callers if spec := globalCache.modules[module]; spec != nil { return spec, nil } spec, err = loadKernelModuleSpec(module, base) if err != nil { return nil, err } globalCache.modules[module] = spec return spec, nil } func loadKernelSpec() (*Spec, error) { if platform.IsWindows { return nil, internal.ErrNotSupportedOnOS } fh, err := os.Open("/sys/kernel/btf/vmlinux") if err == nil { defer fh.Close() info, err := fh.Stat() if err != nil { return nil, fmt.Errorf("stat vmlinux: %w", err) } // NB: It's not safe to mmap arbitrary files because mmap(2) doesn't // guarantee that changes made after mmap are not visible in the mapping. // // This is not a problem for vmlinux, since it is always a read-only file. raw, err := unix.Mmap(int(fh.Fd()), 0, int(info.Size()), unix.PROT_READ, unix.MAP_PRIVATE) if err != nil { return LoadSplitSpecFromReader(fh, nil) } spec, err := loadRawSpec(raw, nil) if err != nil { _ = unix.Munmap(raw) return nil, fmt.Errorf("load vmlinux: %w", err) } runtime.AddCleanup(spec.decoder.sharedBuf, func(b []byte) { _ = unix.Munmap(b) }, raw) return spec, nil } file, err := findVMLinux() if err != nil { return nil, err } defer file.Close() spec, err := LoadSpecFromReader(file) return spec, err } func loadKernelModuleSpec(module string, base *Spec) (*Spec, error) { if platform.IsWindows { return nil, internal.ErrNotSupportedOnOS } dir, file := filepath.Split(module) if dir != "" || filepath.Ext(file) != "" { return nil, fmt.Errorf("invalid module name %q", module) } fh, err := os.Open(filepath.Join("/sys/kernel/btf", module)) if err != nil { return nil, err } defer fh.Close() return LoadSplitSpecFromReader(fh, base) } // findVMLinux scans multiple well-known paths for vmlinux kernel images. func findVMLinux() (*os.File, error) { if platform.IsWindows { return nil, fmt.Errorf("find vmlinux: %w", internal.ErrNotSupportedOnOS) } release, err := linux.KernelRelease() if err != nil { return nil, err } // use same list of locations as libbpf // https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122 locations := []string{ "/boot/vmlinux-%s", "/lib/modules/%s/vmlinux-%[1]s", "/lib/modules/%s/build/vmlinux", "/usr/lib/modules/%s/kernel/vmlinux", "/usr/lib/debug/boot/vmlinux-%s", "/usr/lib/debug/boot/vmlinux-%s.debug", "/usr/lib/debug/lib/modules/%s/vmlinux", } for _, loc := range locations { file, err := os.Open(fmt.Sprintf(loc, release)) if errors.Is(err, os.ErrNotExist) { continue } return file, err } return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported) } // Cache allows to amortise the cost of decoding BTF across multiple call-sites. // // It is not safe for concurrent use. type Cache struct { kernelTypes *Spec moduleTypes map[string]*Spec loadedModules []string } // NewCache creates a new Cache. // // Opportunistically reuses a global cache if possible. func NewCache() *Cache { globalCache.RLock() defer globalCache.RUnlock() // This copy is either a no-op or very cheap, since the spec won't contain // any inflated types. kernel := globalCache.kernel.Copy() if kernel == nil { return &Cache{} } modules := make(map[string]*Spec, len(globalCache.modules)) for name, spec := range globalCache.modules { decoder, _ := rebaseDecoder(spec.decoder, kernel.decoder) // NB: Kernel module BTF can't contain ELF fixups because it is always // read from sysfs. modules[name] = &Spec{decoder: decoder} } if len(modules) == 0 { return &Cache{kernel, nil, nil} } return &Cache{kernel, modules, nil} } // Kernel is equivalent to [LoadKernelSpec], except that repeated calls do // not copy the Spec. func (c *Cache) Kernel() (*Spec, error) { if c.kernelTypes != nil { return c.kernelTypes, nil } var err error c.kernelTypes, err = LoadKernelSpec() return c.kernelTypes, err } // Module is equivalent to [LoadKernelModuleSpec], except that repeated calls do // not copy the spec. // // All modules also share the return value of [Kernel] as their base. func (c *Cache) Module(name string) (*Spec, error) { if spec := c.moduleTypes[name]; spec != nil { return spec, nil } if c.moduleTypes == nil { c.moduleTypes = make(map[string]*Spec) } base, err := c.Kernel() if err != nil { return nil, err } spec, err := loadCachedKernelModuleSpec(name) if err != nil { return nil, err } // Important: base is shared between modules. This allows inflating common // types only once. decoder, err := rebaseDecoder(spec.decoder, base.decoder) if err != nil { return nil, err } spec = &Spec{decoder: decoder} c.moduleTypes[name] = spec return spec, err } // Modules returns a sorted list of all loaded modules. func (c *Cache) Modules() ([]string, error) { if c.loadedModules != nil { return c.loadedModules, nil } btfDir, err := os.Open("/sys/kernel/btf") if err != nil { return nil, err } defer btfDir.Close() entries, err := btfDir.Readdirnames(-1) if err != nil { return nil, err } entries = slices.DeleteFunc(entries, func(s string) bool { return s == "vmlinux" }) sort.Strings(entries) c.loadedModules = entries return entries, nil } ================================================ FILE: btf/kernel_test.go ================================================ package btf import ( "os" "runtime" "testing" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestLoadKernelSpec(t *testing.T) { if _, err := os.Stat("/sys/kernel/btf/vmlinux"); os.IsNotExist(err) { t.Skip("/sys/kernel/btf/vmlinux not present") } spec, err := LoadKernelSpec() if err != nil { t.Fatal("Can't load kernel spec:", err) } if !testutils.IsVersionLessThan(t, "linux:6.16") { maps, err := os.ReadFile("/proc/self/maps") qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.StringContains(string(maps), " /sys/kernel/btf/vmlinux\n")) } // Prevent finalizer from unmapping vmlinux. runtime.KeepAlive(spec) } func TestLoadKernelModuleSpec(t *testing.T) { if _, err := os.Stat("/sys/kernel/btf/bpf_testmod"); os.IsNotExist(err) { t.Skip("/sys/kernel/btf/bpf_testmod not present") } _, err := LoadKernelModuleSpec("bpf_testmod") qt.Assert(t, qt.IsNil(err)) } func TestCache(t *testing.T) { FlushKernelSpec() c := NewCache() qt.Assert(t, qt.IsNil(c.kernelTypes)) qt.Assert(t, qt.HasLen(c.moduleTypes, 0)) qt.Assert(t, qt.IsNil(c.loadedModules)) // Test that Kernel() creates only one copy spec1, err := c.Kernel() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(spec1)) spec2, err := c.Kernel() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(spec2)) qt.Assert(t, qt.Equals(spec1, spec2)) // Test that Module() creates only one copy mod1, err := c.Module("bpf_testmod") if !os.IsNotExist(err) { qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(mod1)) mod2, err := c.Module("bpf_testmod") qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(mod2)) qt.Assert(t, qt.Equals(mod1, mod2)) } // Pre-populate global cache vmlinux, err := LoadKernelSpec() qt.Assert(t, qt.IsNil(err)) testmod, err := LoadKernelModuleSpec("bpf_testmod") if !os.IsNotExist(err) { qt.Assert(t, qt.IsNil(err)) } // Test that NewCache populates from global cache c = NewCache() qt.Assert(t, qt.IsNotNil(c.kernelTypes)) qt.Assert(t, qt.Not(qt.Equals(c.kernelTypes, vmlinux))) if testmod != nil { qt.Assert(t, qt.IsNotNil(c.moduleTypes["bpf_testmod"])) qt.Assert(t, qt.Not(qt.Equals(c.moduleTypes["bpf_testmod"], testmod))) } // Test that Modules only reads modules once. _, err = c.Modules() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(c.loadedModules)) } ================================================ FILE: btf/marshal.go ================================================ package btf import ( "encoding/binary" "errors" "fmt" "maps" "math" "slices" "sync" "unsafe" "github.com/cilium/ebpf/internal" ) type MarshalOptions struct { // Target byte order. Defaults to the system's native endianness. Order binary.ByteOrder // Remove function linkage information for compatibility with <5.6 kernels. StripFuncLinkage bool // Replace decl tags with a placeholder for compatibility with <5.16 kernels. ReplaceDeclTags bool // Replace TypeTags with a placeholder for compatibility with <5.17 kernels. ReplaceTypeTags bool // Replace Enum64 with a placeholder for compatibility with <6.0 kernels. ReplaceEnum64 bool // Prevent the "No type found" error when loading BTF without any types. PreventNoTypeFound bool } // KernelMarshalOptions will generate BTF suitable for the current kernel. func KernelMarshalOptions() *MarshalOptions { return &MarshalOptions{ Order: internal.NativeEndian, StripFuncLinkage: haveFuncLinkage() != nil, ReplaceDeclTags: haveDeclTags() != nil, ReplaceTypeTags: haveTypeTags() != nil, ReplaceEnum64: haveEnum64() != nil, PreventNoTypeFound: true, // All current kernels require this. } } // encoder turns Types into raw BTF. type encoder struct { MarshalOptions pending internal.Deque[Type] strings *stringTableBuilder ids map[Type]TypeID visited map[Type]struct{} lastID TypeID } var bufferPool = sync.Pool{ New: func() any { buf := make([]byte, btfHeaderLen+128) return &buf }, } func getByteSlice() *[]byte { return bufferPool.Get().(*[]byte) } func putByteSlice(buf *[]byte) { *buf = (*buf)[:0] bufferPool.Put(buf) } // Builder turns Types into raw BTF. // // The default value may be used and represents an empty BTF blob. Void is // added implicitly if necessary. type Builder struct { // Explicitly added types. types []Type // IDs for all added types which the user knows about. stableIDs map[Type]TypeID // Explicitly added strings. strings *stringTableBuilder // Deduplication data structure. deduper *deduper } type BuilderOptions struct { // Deduplicate enables type deduplication. Deduplicate bool } // NewBuilder creates a Builder from a list of types. // // It is more efficient than calling [Add] individually. // // Returns an error if adding any of the types fails. func NewBuilder(types []Type, opts *BuilderOptions) (*Builder, error) { if opts == nil { opts = &BuilderOptions{} } b := &Builder{ make([]Type, 0, len(types)), make(map[Type]TypeID, len(types)), nil, nil, } if opts.Deduplicate { b.deduper = newDeduper() } for _, typ := range types { _, err := b.Add(typ) if err != nil { return nil, fmt.Errorf("add %s: %w", typ, err) } } return b, nil } // Empty returns true if neither types nor strings have been added. func (b *Builder) Empty() bool { return len(b.types) == 0 && (b.strings == nil || b.strings.Length() == 0) } // Add a Type and allocate a stable ID for it. // // Adding the identical Type multiple times is valid and will return the same ID. // // See [Type] for details on identity. func (b *Builder) Add(typ Type) (TypeID, error) { if _, ok := typ.(*Void); ok { // Equality is weird for void, since it is a zero sized type. return 0, nil } if err := internal.IsNil(typ); err != nil { return 0, fmt.Errorf("invalid type: %w", err) } if b.stableIDs == nil { b.stableIDs = make(map[Type]TypeID) } if b.deduper != nil { var err error typ, err = b.deduper.deduplicate(typ) if err != nil { return 0, err } } if ds, ok := typ.(*Datasec); ok { if err := datasecResolveWorkaround(b, ds); err != nil { return 0, err } } id, ok := b.stableIDs[typ] if ok { return id, nil } b.types = append(b.types, typ) id = TypeID(len(b.types)) if int(id) != len(b.types) { return 0, fmt.Errorf("no more type IDs") } b.stableIDs[typ] = id return id, nil } // Spec marshals the Builder's types and returns a new Spec to query them. // // The resulting Spec does not share any state with the Builder, subsequent // additions to the Builder will not affect the Spec. func (b *Builder) Spec() (*Spec, error) { buf, err := b.Marshal(make([]byte, 0), nil) if err != nil { return nil, err } return loadRawSpec(buf, nil) } // Marshal encodes all types in the Marshaler into BTF wire format. // // opts may be nil. func (b *Builder) Marshal(buf []byte, opts *MarshalOptions) ([]byte, error) { stb := b.strings if stb == nil { // Assume that most types are named. This makes encoding large BTF like // vmlinux a lot cheaper. stb = newStringTableBuilder(len(b.types)) } else { // Avoid modifying the Builder's string table. stb = b.strings.Copy() } if opts == nil { opts = &MarshalOptions{Order: internal.NativeEndian} } // Reserve space for the BTF header. buf = slices.Grow(buf, btfHeaderLen)[:btfHeaderLen] e := encoder{ MarshalOptions: *opts, strings: stb, lastID: TypeID(len(b.types)), visited: make(map[Type]struct{}, len(b.types)), ids: maps.Clone(b.stableIDs), } if e.ids == nil { e.ids = make(map[Type]TypeID) } types := b.types if len(types) == 0 && stb.Length() > 0 && opts.PreventNoTypeFound { // We have strings that need to be written out, // but no types (besides the implicit Void). // Kernels as recent as v6.7 refuse to load such BTF // with a "No type found" error in the log. // Fix this by adding a dummy type. types = []Type{&Int{Size: 0}} } // Ensure that types are marshaled in the exact order they were Add()ed. // Otherwise the ID returned from Add() won't match. e.pending.Grow(len(types)) for _, typ := range types { e.pending.Push(typ) } buf, err := e.deflatePending(buf) if err != nil { return nil, err } length := len(buf) typeLen := uint32(length - btfHeaderLen) stringLen := e.strings.Length() buf = e.strings.AppendEncoded(buf) // Fill out the header, and write it out. header := &btfHeader{ Magic: btfMagic, Version: 1, Flags: 0, HdrLen: uint32(btfHeaderLen), TypeOff: 0, TypeLen: typeLen, StringOff: typeLen, StringLen: uint32(stringLen), } _, err = binary.Encode(buf[:btfHeaderLen], e.Order, header) if err != nil { return nil, fmt.Errorf("write header: %v", err) } return buf, nil } // addString adds a string to the resulting BTF. // // Adding the same string multiple times will return the same result. // // Returns an identifier into the string table or an error if the string // contains invalid characters. func (b *Builder) addString(str string) (uint32, error) { if b.strings == nil { b.strings = newStringTableBuilder(0) } return b.strings.Add(str) } func (e *encoder) allocateIDs(root Type) error { for typ := range postorder(root, e.visited) { if _, ok := typ.(*Void); ok { continue } if _, ok := e.ids[typ]; ok { continue } id := e.lastID + 1 if id < e.lastID { return errors.New("type ID overflow") } e.pending.Push(typ) e.ids[typ] = id e.lastID = id } return nil } // id returns the ID for the given type or panics with an error. func (e *encoder) id(typ Type) TypeID { if _, ok := typ.(*Void); ok { return 0 } id, ok := e.ids[typ] if !ok { panic(fmt.Errorf("no ID for type %v", typ)) } return id } func (e *encoder) deflatePending(buf []byte) ([]byte, error) { // Declare root outside of the loop to avoid repeated heap allocations. var root Type for !e.pending.Empty() { root = e.pending.Shift() // Allocate IDs for all children of typ, including transitive dependencies. err := e.allocateIDs(root) if err != nil { return nil, err } buf, err = e.deflateType(buf, root) if err != nil { id := e.ids[root] return nil, fmt.Errorf("deflate %v with ID %d: %w", root, id, err) } } return buf, nil } func (e *encoder) deflateType(buf []byte, typ Type) (_ []byte, err error) { defer func() { if r := recover(); r != nil { var ok bool err, ok = r.(error) if !ok { panic(r) } } }() var raw btfType raw.NameOff, err = e.strings.Add(typ.TypeName()) if err != nil { return nil, err } // Reserve space for the btfType header. start := len(buf) buf = append(buf, make([]byte, unsafe.Sizeof(raw))...) switch v := typ.(type) { case *Void: return nil, errors.New("Void is implicit in BTF wire format") case *Int: buf, err = e.deflateInt(buf, &raw, v) case *Pointer: raw.SetKind(kindPointer) raw.SetType(e.id(v.Target)) case *Array: raw.SetKind(kindArray) buf, err = binary.Append(buf, e.Order, &btfArray{ e.id(v.Type), e.id(v.Index), v.Nelems, }) case *Struct: raw.SetKind(kindStruct) raw.SetSize(v.Size) buf, err = e.deflateMembers(buf, &raw, v.Members) case *Union: buf, err = e.deflateUnion(buf, &raw, v) case *Enum: if v.Size == 8 { buf, err = e.deflateEnum64(buf, &raw, v) } else { buf, err = e.deflateEnum(buf, &raw, v) } case *Fwd: raw.SetKind(kindForward) raw.SetFwdKind(v.Kind) case *Typedef: raw.SetKind(kindTypedef) raw.SetType(e.id(v.Type)) case *Volatile: raw.SetKind(kindVolatile) raw.SetType(e.id(v.Type)) case *Const: e.deflateConst(&raw, v) case *Restrict: raw.SetKind(kindRestrict) raw.SetType(e.id(v.Type)) case *Func: raw.SetKind(kindFunc) raw.SetType(e.id(v.Type)) if !e.StripFuncLinkage { raw.SetLinkage(v.Linkage) } case *FuncProto: raw.SetKind(kindFuncProto) raw.SetType(e.id(v.Return)) raw.SetVlen(len(v.Params)) buf, err = e.deflateFuncParams(buf, v.Params) case *Var: raw.SetKind(kindVar) raw.SetType(e.id(v.Type)) buf, err = binary.Append(buf, e.Order, btfVariable{uint32(v.Linkage)}) case *Datasec: raw.SetKind(kindDatasec) raw.SetSize(v.Size) raw.SetVlen(len(v.Vars)) buf, err = e.deflateVarSecinfos(buf, v.Vars) case *Float: raw.SetKind(kindFloat) raw.SetSize(v.Size) case *declTag: buf, err = e.deflateDeclTag(buf, &raw, v) case *TypeTag: err = e.deflateTypeTag(&raw, v) default: return nil, fmt.Errorf("don't know how to deflate %T", v) } if err != nil { return nil, err } header := buf[start : start+int(unsafe.Sizeof(raw))] if _, err = raw.Encode(header, e.Order); err != nil { return nil, err } return buf, nil } func (e *encoder) deflateInt(buf []byte, raw *btfType, i *Int) ([]byte, error) { raw.SetKind(kindInt) raw.SetSize(i.Size) var bi btfInt bi.SetEncoding(i.Encoding) // We need to set bits in addition to size, since btf_type_int_is_regular // otherwise flags this as a bitfield. bi.SetBits(byte(i.Size) * 8) return binary.Append(buf, e.Order, bi) } func (e *encoder) deflateDeclTag(buf []byte, raw *btfType, tag *declTag) ([]byte, error) { // Replace a decl tag with an integer for compatibility with <5.16 kernels, // following libbpf behaviour. if e.ReplaceDeclTags { typ := &Int{"decl_tag_placeholder", 1, Unsigned} buf, err := e.deflateInt(buf, raw, typ) if err != nil { return nil, err } // Add the placeholder type name to the string table. The encoder added the // original type name before this call. raw.NameOff, err = e.strings.Add(typ.TypeName()) return buf, err } var err error raw.SetKind(kindDeclTag) raw.SetType(e.id(tag.Type)) raw.NameOff, err = e.strings.Add(tag.Value) if err != nil { return nil, err } return binary.Append(buf, e.Order, btfDeclTag{uint32(tag.Index)}) } func (e *encoder) deflateConst(raw *btfType, c *Const) { raw.SetKind(kindConst) raw.SetType(e.id(c.Type)) } func (e *encoder) deflateTypeTag(raw *btfType, tag *TypeTag) (err error) { // Replace a type tag with a const qualifier for compatibility with <5.17 // kernels, following libbpf behaviour. if e.ReplaceTypeTags { e.deflateConst(raw, &Const{tag.Type}) return nil } raw.SetKind(kindTypeTag) raw.SetType(e.id(tag.Type)) raw.NameOff, err = e.strings.Add(tag.Value) return } func (e *encoder) deflateUnion(buf []byte, raw *btfType, union *Union) ([]byte, error) { raw.SetKind(kindUnion) raw.SetSize(union.Size) return e.deflateMembers(buf, raw, union.Members) } func (e *encoder) deflateMembers(buf []byte, header *btfType, members []Member) ([]byte, error) { var bm btfMember isBitfield := false buf = slices.Grow(buf, len(members)*int(unsafe.Sizeof(bm))) for _, member := range members { isBitfield = isBitfield || member.BitfieldSize > 0 offset := member.Offset if isBitfield { offset = member.BitfieldSize<<24 | (member.Offset & 0xffffff) } nameOff, err := e.strings.Add(member.Name) if err != nil { return nil, err } bm = btfMember{ nameOff, e.id(member.Type), uint32(offset), } buf, err = binary.Append(buf, e.Order, &bm) if err != nil { return nil, err } } header.SetVlen(len(members)) header.SetBitfield(isBitfield) return buf, nil } func (e *encoder) deflateEnum(buf []byte, raw *btfType, enum *Enum) ([]byte, error) { raw.SetKind(kindEnum) raw.SetSize(enum.Size) raw.SetVlen(len(enum.Values)) // Signedness appeared together with ENUM64 support. raw.SetSigned(enum.Signed && !e.ReplaceEnum64) return e.deflateEnumValues(buf, enum) } func (e *encoder) deflateEnumValues(buf []byte, enum *Enum) ([]byte, error) { var be btfEnum buf = slices.Grow(buf, len(enum.Values)*int(unsafe.Sizeof(be))) for _, value := range enum.Values { nameOff, err := e.strings.Add(value.Name) if err != nil { return nil, err } if enum.Signed { if signedValue := int64(value.Value); signedValue < math.MinInt32 || signedValue > math.MaxInt32 { return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", signedValue, value.Name) } } else { if value.Value > math.MaxUint32 { return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", value.Value, value.Name) } } be = btfEnum{ nameOff, uint32(value.Value), } buf, err = binary.Append(buf, e.Order, &be) if err != nil { return nil, err } } return buf, nil } func (e *encoder) deflateEnum64(buf []byte, raw *btfType, enum *Enum) ([]byte, error) { if e.ReplaceEnum64 { // Replace the ENUM64 with a union of fields with the correct size. // This matches libbpf behaviour on purpose. placeholder := &Int{ "enum64_placeholder", enum.Size, Unsigned, } if enum.Signed { placeholder.Encoding = Signed } if err := e.allocateIDs(placeholder); err != nil { return nil, fmt.Errorf("add enum64 placeholder: %w", err) } members := make([]Member, 0, len(enum.Values)) for _, v := range enum.Values { members = append(members, Member{ Name: v.Name, Type: placeholder, }) } return e.deflateUnion(buf, raw, &Union{enum.Name, enum.Size, members, nil}) } raw.SetKind(kindEnum64) raw.SetSize(enum.Size) raw.SetVlen(len(enum.Values)) raw.SetSigned(enum.Signed) return e.deflateEnum64Values(buf, enum.Values) } func (e *encoder) deflateEnum64Values(buf []byte, values []EnumValue) ([]byte, error) { var be btfEnum64 buf = slices.Grow(buf, len(values)*int(unsafe.Sizeof(be))) for _, value := range values { nameOff, err := e.strings.Add(value.Name) if err != nil { return nil, err } be = btfEnum64{ nameOff, uint32(value.Value), uint32(value.Value >> 32), } buf, err = binary.Append(buf, e.Order, &be) if err != nil { return nil, err } } return buf, nil } func (e *encoder) deflateFuncParams(buf []byte, params []FuncParam) ([]byte, error) { var bp btfParam buf = slices.Grow(buf, len(params)*int(unsafe.Sizeof(bp))) for _, param := range params { nameOff, err := e.strings.Add(param.Name) if err != nil { return nil, err } bp = btfParam{ nameOff, e.id(param.Type), } buf, err = binary.Append(buf, e.Order, &bp) if err != nil { return nil, err } } return buf, nil } func (e *encoder) deflateVarSecinfos(buf []byte, vars []VarSecinfo) ([]byte, error) { var vsi btfVarSecinfo var err error buf = slices.Grow(buf, len(vars)*int(unsafe.Sizeof(vsi))) for _, v := range vars { vsi = btfVarSecinfo{ e.id(v.Type), v.Offset, v.Size, } buf, err = binary.Append(buf, e.Order, vsi) if err != nil { return nil, err } } return buf, nil } // MarshalMapKV creates a BTF object containing a map key and value. // // The function is intended for the use of the ebpf package and may be removed // at any point in time. func MarshalMapKV(key, value Type) (_ *Handle, keyID, valueID TypeID, err error) { var b Builder if key != nil { keyID, err = b.Add(key) if err != nil { return nil, 0, 0, fmt.Errorf("add key type: %w", err) } } if value != nil { valueID, err = b.Add(value) if err != nil { return nil, 0, 0, fmt.Errorf("add value type: %w", err) } } handle, err := NewHandle(&b) if err != nil { // Check for 'full' map BTF support, since kernels between 4.18 and 5.2 // already support BTF blobs for maps without Var or Datasec just fine. if err := haveMapBTF(); err != nil { return nil, 0, 0, err } } return handle, keyID, valueID, err } ================================================ FILE: btf/marshal_test.go ================================================ package btf import ( "math" "testing" "github.com/go-quicktest/qt" "github.com/google/go-cmp/cmp" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" ) func TestBuilderMarshal(t *testing.T) { typ := &Int{ Name: "foo", Size: 2, Encoding: Signed | Char, } want := []Type{ (*Void)(nil), typ, &Pointer{typ}, &Typedef{"baz", typ, nil}, } b, err := NewBuilder(want, nil) qt.Assert(t, qt.IsNil(err)) cpy := *b buf, err := b.Marshal(nil, &MarshalOptions{Order: internal.NativeEndian}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.CmpEquals(b, &cpy, cmp.AllowUnexported(*b)), qt.Commentf("Marshaling should not change Builder state")) have, err := loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err), qt.Commentf("Couldn't parse BTF")) qt.Assert(t, qt.DeepEquals(typesFromSpec(t, have), want)) } func TestBuilderAdd(t *testing.T) { i := &Int{ Name: "foo", Size: 2, Encoding: Signed | Char, } pi := &Pointer{i} var b Builder id, err := b.Add(pi) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(id, TypeID(1)), qt.Commentf("First non-void type doesn't get id 1")) id, err = b.Add(pi) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(id, TypeID(1))) id, err = b.Add(i) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(id, TypeID(2)), qt.Commentf("Second type doesn't get id 2")) id, err = b.Add(i) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(id, TypeID(2)), qt.Commentf("Adding a type twice returns different ids")) id, err = b.Add(&Typedef{"baz", i, nil}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(id, TypeID(3))) _, err = b.Add(nil) qt.Assert(t, qt.IsNotNil(err)) t.Log(err) _, err = b.Add((*Int)(nil)) qt.Assert(t, qt.IsNotNil(err)) t.Log(err) } func TestBuilderSpec(t *testing.T) { b, err := NewBuilder([]Type{ &Int{Name: "foo", Size: 2}, &Int{Name: "foo", Size: 2}, }, &BuilderOptions{Deduplicate: true}) qt.Assert(t, qt.IsNil(err)) spec, err := b.Spec() qt.Assert(t, qt.IsNil(err)) // With deduplication enabled, both ints should be merged into one, // allowing queries with AnyTypeByName. _, err = spec.AnyTypeByName("foo") qt.Assert(t, qt.IsNil(err)) } func TestRoundtripVMlinux(t *testing.T) { types := typesFromSpec(t, vmlinuxSpec(t)) // Randomize the order to force different permutations of walking the type // graph. Keep Void at index 0. testutils.Rand(t).Shuffle(len(types[1:]), func(i, j int) { types[i+1], types[j+1] = types[j+1], types[i+1] }) visited := make(map[Type]struct{}) limitTypes: for i, typ := range types { for range postorder(typ, visited) { } if len(visited) >= math.MaxInt16 { // IDs exceeding math.MaxUint16 can trigger a bug when loading BTF. // This can be removed once the patch lands. // See https://lore.kernel.org/bpf/20220909092107.3035-1-oss@lmb.io/ types = types[:i] break limitTypes } } b, err := NewBuilder(types, nil) qt.Assert(t, qt.IsNil(err)) buf, err := b.Marshal(nil, KernelMarshalOptions()) qt.Assert(t, qt.IsNil(err)) rebuilt, err := loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err), qt.Commentf("round tripping BTF failed")) if n := len(rebuilt.offsets); n > math.MaxUint16 { t.Logf("Rebuilt BTF contains %d types which exceeds uint16, test may fail on older kernels", n) } h, err := NewHandleFromRawBTF(buf) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err), qt.Commentf("loading rebuilt BTF failed")) h.Close() } func TestMarshalEnum64(t *testing.T) { enum := &Enum{ Name: "enum64", Size: 8, Signed: true, Values: []EnumValue{ {"A", 0}, {"B", 1}, }, } b, err := NewBuilder([]Type{enum}, nil) qt.Assert(t, qt.IsNil(err)) buf, err := b.Marshal(nil, &MarshalOptions{ Order: internal.NativeEndian, ReplaceEnum64: true, }) qt.Assert(t, qt.IsNil(err)) spec, err := loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err)) var have *Union err = spec.TypeByName("enum64", &have) qt.Assert(t, qt.IsNil(err)) placeholder := &Int{Name: "enum64_placeholder", Size: 8, Encoding: Signed} qt.Assert(t, qt.DeepEquals(have, &Union{ Name: "enum64", Size: 8, Members: []Member{ {Name: "A", Type: placeholder}, {Name: "B", Type: placeholder}, }, })) } func TestMarshalDeclTags(t *testing.T) { types := []Type{ // Instead of an adjacent declTag, this will receive a placeholder Int. &Typedef{ Name: "decl tag typedef", Tags: []string{"decl tag"}, Type: &Int{Name: "decl tag target"}, }, } b, err := NewBuilder(types, nil) qt.Assert(t, qt.IsNil(err)) buf, err := b.Marshal(nil, &MarshalOptions{ Order: internal.NativeEndian, ReplaceDeclTags: true, }) qt.Assert(t, qt.IsNil(err)) spec, err := loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err)) var td *Typedef qt.Assert(t, qt.IsNil(spec.TypeByName("decl tag typedef", &td))) var ti *Int qt.Assert(t, qt.IsNil(spec.TypeByName("decl_tag_placeholder", &ti))) } func TestMarshalTypeTags(t *testing.T) { types := []Type{ // Instead of pointing to a TypeTag, this will point to an intermediary Const. &Typedef{ Name: "type tag typedef", Type: &TypeTag{ Value: "type tag", Type: &Pointer{ Target: &Int{Name: "type tag target"}, }, }, }, } b, err := NewBuilder(types, nil) qt.Assert(t, qt.IsNil(err)) buf, err := b.Marshal(nil, &MarshalOptions{ Order: internal.NativeEndian, ReplaceTypeTags: true, }) qt.Assert(t, qt.IsNil(err)) spec, err := loadRawSpec(buf, nil) qt.Assert(t, qt.IsNil(err)) var td *Typedef qt.Assert(t, qt.IsNil(spec.TypeByName("type tag typedef", &td))) qt.Assert(t, qt.Satisfies(td.Type, func(typ Type) bool { _, ok := typ.(*Const) return ok })) } func BenchmarkMarshaler(b *testing.B) { types := typesFromSpec(b, vmlinuxTestdataSpec(b))[:100] b.ReportAllocs() for b.Loop() { var b Builder for _, typ := range types { _, _ = b.Add(typ) } _, _ = b.Marshal(nil, nil) } } func BenchmarkBuildVmlinux(b *testing.B) { types := typesFromSpec(b, vmlinuxTestdataSpec(b)) b.ReportAllocs() for b.Loop() { var b Builder for _, typ := range types { _, _ = b.Add(typ) } _, _ = b.Marshal(nil, nil) } } func marshalNativeEndian(tb testing.TB, types []Type) []byte { tb.Helper() b, err := NewBuilder(types, nil) qt.Assert(tb, qt.IsNil(err)) buf, err := b.Marshal(nil, nil) qt.Assert(tb, qt.IsNil(err)) return buf } func specFromTypes(tb testing.TB, types []Type) *Spec { tb.Helper() btf := marshalNativeEndian(tb, types) spec, err := loadRawSpec(btf, nil) qt.Assert(tb, qt.IsNil(err)) return spec } func typesFromSpec(tb testing.TB, spec *Spec) []Type { tb.Helper() types := make([]Type, 0, len(spec.offsets)) for typ, err := range spec.All() { qt.Assert(tb, qt.IsNil(err)) types = append(types, typ) } return types } ================================================ FILE: btf/strings.go ================================================ package btf import ( "bytes" "errors" "fmt" "io" "maps" "strings" "sync" ) // stringTable contains a sequence of null-terminated strings. // // It is safe for concurrent use. type stringTable struct { base *stringTable bytes []byte mu sync.Mutex cache map[uint32]string } // sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc. type sizedReader interface { io.Reader Size() int64 } func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) { bytes := make([]byte, r.Size()) if _, err := io.ReadFull(r, bytes); err != nil { return nil, err } return newStringTable(bytes, base) } func newStringTable(bytes []byte, base *stringTable) (*stringTable, error) { // When parsing split BTF's string table, the first entry offset is derived // from the last entry offset of the base BTF. firstStringOffset := uint32(0) if base != nil { firstStringOffset = uint32(len(base.bytes)) } if len(bytes) > 0 { if bytes[len(bytes)-1] != 0 { return nil, errors.New("string table isn't null terminated") } if firstStringOffset == 0 && bytes[0] != 0 { return nil, errors.New("first item in string table is non-empty") } } return &stringTable{base: base, bytes: bytes}, nil } func (st *stringTable) Lookup(offset uint32) (string, error) { // Fast path: zero offset is the empty string, looked up frequently. if offset == 0 { return "", nil } b, err := st.lookupSlow(offset) return string(b), err } func (st *stringTable) LookupBytes(offset uint32) ([]byte, error) { // Fast path: zero offset is the empty string, looked up frequently. if offset == 0 { return nil, nil } return st.lookupSlow(offset) } func (st *stringTable) lookupSlow(offset uint32) ([]byte, error) { if st.base != nil { n := uint32(len(st.base.bytes)) if offset < n { return st.base.lookupSlow(offset) } offset -= n } if offset > uint32(len(st.bytes)) { return nil, fmt.Errorf("offset %d is out of bounds of string table", offset) } if offset > 0 && st.bytes[offset-1] != 0 { return nil, fmt.Errorf("offset %d is not the beginning of a string", offset) } i := bytes.IndexByte(st.bytes[offset:], 0) return st.bytes[offset : offset+uint32(i)], nil } // LookupCache returns the string at the given offset, caching the result // for future lookups. func (cst *stringTable) LookupCached(offset uint32) (string, error) { // Fast path: zero offset is the empty string, looked up frequently. if offset == 0 { return "", nil } cst.mu.Lock() defer cst.mu.Unlock() if str, ok := cst.cache[offset]; ok { return str, nil } str, err := cst.Lookup(offset) if err != nil { return "", err } if cst.cache == nil { cst.cache = make(map[uint32]string) } cst.cache[offset] = str return str, nil } // stringTableBuilder builds BTF string tables. type stringTableBuilder struct { length uint32 strings map[string]uint32 } // newStringTableBuilder creates a builder with the given capacity. // // capacity may be zero. func newStringTableBuilder(capacity int) *stringTableBuilder { var stb stringTableBuilder if capacity == 0 { // Use the runtime's small default size. stb.strings = make(map[string]uint32) } else { stb.strings = make(map[string]uint32, capacity) } // Ensure that the empty string is at index 0. stb.append("") return &stb } // Add a string to the table. // // Adding the same string multiple times will only store it once. func (stb *stringTableBuilder) Add(str string) (uint32, error) { if strings.IndexByte(str, 0) != -1 { return 0, fmt.Errorf("string contains null: %q", str) } offset, ok := stb.strings[str] if ok { return offset, nil } return stb.append(str), nil } func (stb *stringTableBuilder) append(str string) uint32 { offset := stb.length stb.length += uint32(len(str)) + 1 stb.strings[str] = offset return offset } // Lookup finds the offset of a string in the table. // // Returns an error if str hasn't been added yet. func (stb *stringTableBuilder) Lookup(str string) (uint32, error) { offset, ok := stb.strings[str] if !ok { return 0, fmt.Errorf("string %q is not in table", str) } return offset, nil } // Length returns the length in bytes. func (stb *stringTableBuilder) Length() int { return int(stb.length) } // AppendEncoded appends the string table to the end of the provided buffer. func (stb *stringTableBuilder) AppendEncoded(buf []byte) []byte { n := len(buf) buf = append(buf, make([]byte, stb.Length())...) strings := buf[n:] for str, offset := range stb.strings { copy(strings[offset:], str) } return buf } // Copy the string table builder. func (stb *stringTableBuilder) Copy() *stringTableBuilder { return &stringTableBuilder{ stb.length, maps.Clone(stb.strings), } } ================================================ FILE: btf/strings_test.go ================================================ package btf import ( "bytes" "strings" "testing" "github.com/go-quicktest/qt" ) func TestStringTable(t *testing.T) { const in = "\x00one\x00two\x00" const splitIn = "three\x00four\x00" st, err := readStringTable(strings.NewReader(in), nil) if err != nil { t.Fatal(err) } // Parse string table of split BTF split, err := readStringTable(strings.NewReader(splitIn), st) if err != nil { t.Fatal(err) } testcases := []struct { offset uint32 want string }{ {0, ""}, {1, "one"}, {5, "two"}, {9, "three"}, {15, "four"}, } for _, tc := range testcases { have, err := split.Lookup(tc.offset) if err != nil { t.Errorf("Offset %d: %s", tc.offset, err) continue } if have != tc.want { t.Errorf("Offset %d: want %s but have %s", tc.offset, tc.want, have) } } if _, err := st.Lookup(2); err == nil { t.Error("No error when using offset pointing into middle of string") } // Make sure we reject bogus tables _, err = readStringTable(strings.NewReader("\x00one"), nil) if err == nil { t.Fatal("Accepted non-terminated string") } _, err = readStringTable(strings.NewReader("one\x00"), nil) if err == nil { t.Fatal("Accepted non-empty first item") } } func TestEmptyStringTable(t *testing.T) { empty, err := newStringTable(nil, nil) qt.Assert(t, qt.IsNil(err)) str, err := empty.Lookup(0) qt.Assert(t, qt.IsNil(err), qt.Commentf("Can't lookup empty string")) qt.Assert(t, qt.Equals(str, ""), qt.Commentf("Empty string lookup returned %q", str)) _, err = empty.Lookup(1) qt.Assert(t, qt.IsNotNil(err)) } func TestStringTableBuilder(t *testing.T) { stb := newStringTableBuilder(0) _, err := readStringTable(bytes.NewReader(stb.AppendEncoded(nil)), nil) qt.Assert(t, qt.IsNil(err), qt.Commentf("Can't parse string table")) _, err = stb.Add("foo\x00bar") qt.Assert(t, qt.IsNotNil(err)) empty, err := stb.Add("") qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(empty, 0), qt.Commentf("The empty string is not at index 0")) foo1, _ := stb.Add("foo") foo2, _ := stb.Add("foo") qt.Assert(t, qt.Equals(foo1, foo2), qt.Commentf("Adding the same string returns different offsets")) table := stb.AppendEncoded(nil) if n := bytes.Count(table, []byte("foo")); n != 1 { t.Fatalf("Marshalled string table contains foo %d times instead of once", n) } _, err = readStringTable(bytes.NewReader(table), nil) qt.Assert(t, qt.IsNil(err), qt.Commentf("Can't parse string table")) } func BenchmarkStringTableZeroLookup(b *testing.B) { strings := vmlinuxTestdataSpec(b).strings for b.Loop() { s, err := strings.Lookup(0) if err != nil { b.Fatal(err) } if s != "" { b.Fatal("0 is not the empty string") } } } ================================================ FILE: btf/testdata/bpf_core_read.h ================================================ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ /* * enum bpf_field_info_kind is passed as a second argument into * __builtin_preserve_field_info() built-in to get a specific aspect of * a field, captured as a first argument. __builtin_preserve_field_info(field, * info_kind) returns __u32 integer and produces BTF field relocation, which * is understood and processed by libbpf during BPF object loading. See * selftests/bpf for examples. */ enum bpf_field_info_kind { BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ BPF_FIELD_BYTE_SIZE = 1, BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ BPF_FIELD_SIGNED = 3, BPF_FIELD_LSHIFT_U64 = 4, BPF_FIELD_RSHIFT_U64 = 5, }; /* second argument to __builtin_btf_type_id() built-in */ enum bpf_type_id_kind { BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */ BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */ }; /* second argument to __builtin_preserve_type_info() built-in */ enum bpf_type_info_kind { BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ BPF_TYPE_SIZE = 1, /* type size in target kernel */ BPF_TYPE_MATCHES = 2, /* type match in target kernel */ }; /* second argument to __builtin_preserve_enum_value() built-in */ enum bpf_enum_value_kind { BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */ BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */ }; #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) #if __BYTE_ORDER == __LITTLE_ENDIAN #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst, \ __CORE_RELO(src, fld, BYTE_SIZE), \ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so * for big-endian we need to adjust destination pointer accordingly, based on * field byte size */ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ __CORE_RELO(src, fld, BYTE_SIZE), \ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #endif /* * Extract bitfield, identified by s->field, and return its value as u64. * All this is done in relocatable manner, so bitfield changes such as * signedness, bit size, offset changes, this will be handled automatically. * This version of macro is using bpf_probe_read_kernel() to read underlying * integer storage. Macro functions as an expression and its return type is * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error. */ #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ unsigned long long val = 0; \ \ __CORE_BITFIELD_PROBE_READ(&val, s, field); \ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ if (__CORE_RELO(s, field, SIGNED)) \ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ else \ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ val; \ }) /* * Extract bitfield, identified by s->field, and return its value as u64. * This version of macro is using direct memory reads and should be used from * BPF program types that support such functionality (e.g., typed raw * tracepoints). */ #define BPF_CORE_READ_BITFIELD(s, field) ({ \ const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ unsigned long long val; \ \ /* This is a so-called barrier_var() operation that makes specified \ * variable "a black box" for optimizing compiler. \ * It forces compiler to perform BYTE_OFFSET relocation on p and use \ * its calculated value in the switch below, instead of applying \ * the same relocation 4 times for each individual memory load. \ */ \ asm volatile("" : "=r"(p) : "0"(p)); \ \ switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ case 1: val = *(const unsigned char *)p; break; \ case 2: val = *(const unsigned short *)p; break; \ case 4: val = *(const unsigned int *)p; break; \ case 8: val = *(const unsigned long long *)p; break; \ } \ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ if (__CORE_RELO(s, field, SIGNED)) \ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ else \ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ val; \ }) /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. */ #define bpf_core_field_exists(field) \ __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. */ #define bpf_core_field_size(field) \ __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF * information. Return 32-bit unsigned integer with type ID from program's own * BTF. Always succeeds. */ #define bpf_core_type_id_local(type) \ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) /* * Convenience macro to get BTF type ID of a target kernel's type that matches * specified local type. * Returns: * - valid 32-bit unsigned type ID in kernel BTF; * - 0, if no matching type was found in a target kernel BTF. */ #define bpf_core_type_id_kernel(type) \ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) /* * Convenience macro to check that provided named type * (struct/union/enum/typedef) exists in a target kernel. * Returns: * 1, if such type is present in target kernel's BTF; * 0, if no matching type is found. */ #define bpf_core_type_exists(type) \ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) /* * Convenience macro to check that provided named type * (struct/union/enum/typedef) "matches" that in a target kernel. * Returns: * 1, if the type matches in the target kernel's BTF; * 0, if the type does not match any in the target kernel */ #define bpf_core_type_matches(type) \ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES) /* * Convenience macro to get the byte size of a provided named type * (struct/union/enum/typedef) in a target kernel. * Returns: * >= 0 size (in bytes), if type is present in target kernel's BTF; * 0, if no matching type is found. */ #define bpf_core_type_size(type) \ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) /* * Convenience macro to check that provided enumerator value is defined in * a target kernel. * Returns: * 1, if specified enum type and its enumerator value are present in target * kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ #define bpf_core_enum_value_exists(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) /* * Convenience macro to get the integer value of an enumerator value in * a target kernel. * Returns: * 64-bit value, if specified enum type and its enumerator value are * present in target kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ #define bpf_core_enum_value(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures * offset relocation for source address using __builtin_preserve_access_index() * built-in, provided by Clang. * * __builtin_preserve_access_index() takes as an argument an expression of * taking an address of a field within struct/union. It makes compiler emit * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define bpf_core_read_user(dst, sz, src) \ bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() * additionally emitting BPF CO-RE field relocation for specified source * argument. */ #define bpf_core_read_str(dst, sz, src) \ bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define bpf_core_read_user_str(dst, sz, src) \ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) #define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N /* * return number of provided arguments; used for switch-based variadic macro * definitions (see ___last, ___arrow, etc below) */ #define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) /* * return 0 if no arguments are passed, N - otherwise; used for * recursively-defined macros to specify termination (0) case, and generic * (N) case (e.g., ___read_ptrs, ___core_read) */ #define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) #define ___last1(x) x #define ___last2(a, x) x #define ___last3(a, b, x) x #define ___last4(a, b, c, x) x #define ___last5(a, b, c, d, x) x #define ___last6(a, b, c, d, e, x) x #define ___last7(a, b, c, d, e, f, x) x #define ___last8(a, b, c, d, e, f, g, x) x #define ___last9(a, b, c, d, e, f, g, h, x) x #define ___last10(a, b, c, d, e, f, g, h, i, x) x #define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) #define ___nolast2(a, _) a #define ___nolast3(a, b, _) a, b #define ___nolast4(a, b, c, _) a, b, c #define ___nolast5(a, b, c, d, _) a, b, c, d #define ___nolast6(a, b, c, d, e, _) a, b, c, d, e #define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f #define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g #define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h #define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i #define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) #define ___arrow1(a) a #define ___arrow2(a, b) a->b #define ___arrow3(a, b, c) a->b->c #define ___arrow4(a, b, c, d) a->b->c->d #define ___arrow5(a, b, c, d, e) a->b->c->d->e #define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f #define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g #define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h #define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) #define ___type(...) typeof(___arrow(__VA_ARGS__)) #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) /* "recursively" read a sequence of inner pointers using local __t var */ #define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); #define ___rd_last(fn, ...) \ ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); #define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) #define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) #define ___read_ptrs(fn, src, ...) \ ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) #define ___core_read0(fn, fn_ptr, dst, src, a) \ ___read(fn, dst, ___type(src), src, a); #define ___core_readN(fn, fn_ptr, dst, src, ...) \ ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ___last(__VA_ARGS__)); #define ___core_read(fn, fn_ptr, dst, src, a, ...) \ ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ src, a, ##__VA_ARGS__) /* * BPF_CORE_READ_INTO() is a more performance-conscious variant of * BPF_CORE_READ(), in which final field is read into user-provided storage. * See BPF_CORE_READ() below for more details on general usage. */ #define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read, bpf_core_read, \ dst, (src), a, ##__VA_ARGS__) \ }) /* * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. * * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read_user, bpf_core_read_user, \ dst, (src), a, ##__VA_ARGS__) \ }) /* Non-CO-RE variant of BPF_CORE_READ_INTO() */ #define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read, bpf_probe_read, \ dst, (src), a, ##__VA_ARGS__) \ }) /* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). * * As no CO-RE relocations are emitted, source types can be arbitrary and are * not restricted to kernel types only. */ #define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ dst, (src), a, ##__VA_ARGS__) \ }) /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns * corresponding error code) bpf_core_read_str() for final string read. */ #define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read_str, bpf_core_read, \ dst, (src), a, ##__VA_ARGS__) \ }) /* * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. * * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ dst, (src), a, ##__VA_ARGS__) \ }) /* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ #define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read_str, bpf_probe_read, \ dst, (src), a, ##__VA_ARGS__) \ }) /* * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). * * As no CO-RE relocations are emitted, source types can be arbitrary and are * not restricted to kernel types only. */ #define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ dst, (src), a, ##__VA_ARGS__) \ }) /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially * when there are few pointer chasing steps. * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: * int x = s->a.b.c->d.e->f->g; * can be succinctly achieved using BPF_CORE_READ as: * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); * * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically * equivalent to: * 1. const void *__t = s->a.b.c; * 2. __t = __t->d.e; * 3. __t = __t->f; * 4. return __t->g; * * Equivalence is logical, because there is a heavy type casting/preservation * involved, as well as all the reads are happening through * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to * emit CO-RE relocations. * * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. */ #define BPF_CORE_READ(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) /* * Variant of BPF_CORE_READ() for reading from user-space memory. * * NOTE: all the source types involved are still *kernel types* and need to * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will * fail. Custom user types are not relocatable with CO-RE. * The typical situation in which BPF_CORE_READ_USER() might be used is to * read kernel UAPI types from the user-space memory passed in as a syscall * input argument. */ #define BPF_CORE_READ_USER(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) /* Non-CO-RE variant of BPF_CORE_READ() */ #define BPF_PROBE_READ(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) /* * Non-CO-RE variant of BPF_CORE_READ_USER(). * * As no CO-RE relocations are emitted, source types can be arbitrary and are * not restricted to kernel types only. */ #define BPF_PROBE_READ_USER(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) #endif ================================================ FILE: btf/testdata/fuzz/FuzzExtInfo/50a33736610b4a0945179db4c8a88e8247b05fbb25f50ed81e5393baf29bc5bc ================================================ go test fuzz v1 []byte("\x9f\xeb\x01\x00\x1c\x00\x00\x00\x00\x00\x00\x00000\x10\x00\x00\x00\x000000\xf3\xff\xff\xff0\x00\x00\x00") []byte("0") ================================================ FILE: btf/testdata/fuzz/FuzzExtInfo/72534f53bd90cb52a017013499b11511535c1295bf0e22f856148c02454c323e ================================================ go test fuzz v1 []byte("\x9f\xeb\x01\x00\x18\x00\x00\x00\x00\x00\x00\x000000000000000\x00\x00\x000000") []byte("0") ================================================ FILE: btf/testdata/fuzz/FuzzExtInfo/a87a26efa64ed50b598ae8e333301d57d5f234527730f042d68ccc736e90c9fa ================================================ go test fuzz v1 []byte("\x9f\xeb\x01\x00\x1c\x00\x00\x00\x00\x00\x00\x000000\xe8\xff\xff\xff000000000\x00\x00\x00") []byte("0") ================================================ FILE: btf/testdata/relocs.c ================================================ #include "../../testdata/common.h" #include "bpf_core_read.h" enum e { ZERO = 0, ONE, TWO, }; enum e64 { LARGE = 0x1ffffffff, }; typedef enum e e_t; struct s { int _1; char _2; unsigned int _3; }; typedef struct s s_t; union u { int *_1; char *_2; unsigned int *_3; }; typedef union u u_t; #define local_id_not_zero(expr) \ ({ \ if (bpf_core_type_id_local(expr) == 0) { \ return __LINE__; \ } \ }) #define target_and_local_id_dont_match(expr) \ ({ \ if (bpf_core_type_id_kernel(expr) == bpf_core_type_id_local(expr)) { \ return __LINE__; \ } \ }) __section("socket/type_ids") int type_ids() { local_id_not_zero(int); local_id_not_zero(struct { int frob; }); local_id_not_zero(enum {FRAP}); local_id_not_zero(union { char bar; }); local_id_not_zero(struct s); local_id_not_zero(s_t); local_id_not_zero(const s_t); local_id_not_zero(volatile s_t); local_id_not_zero(enum e); local_id_not_zero(e_t); local_id_not_zero(const e_t); local_id_not_zero(volatile e_t); local_id_not_zero(union u); local_id_not_zero(u_t); local_id_not_zero(const u_t); local_id_not_zero(volatile u_t); // In this context, target is the BTF generated by clang. local is // generated on the fly by the library. There is a low chance that // the order on both is the same, so we assert this to make sure that // CO-RE uses the IDs from the dynamic BTF. // Qualifiers on types crash clang. target_and_local_id_dont_match(struct s); target_and_local_id_dont_match(s_t); // target_and_local_id_dont_match(const s_t); // target_and_local_id_dont_match(volatile s_t); target_and_local_id_dont_match(enum e); target_and_local_id_dont_match(e_t); // target_and_local_id_dont_match(const e_t); // target_and_local_id_dont_match(volatile e_t); target_and_local_id_dont_match(union u); target_and_local_id_dont_match(u_t); // target_and_local_id_dont_match(const u_t); // target_and_local_id_dont_match(volatile u_t); return 0; } #define type_exists(expr) \ ({ \ if (!bpf_core_type_exists(expr)) { \ return __LINE__; \ } \ }) #define type_size_matches(expr) \ ({ \ if (bpf_core_type_size(expr) != sizeof(expr)) { \ return __LINE__; \ } \ }) #define type_matches(expr) \ ({ \ if (!bpf_core_type_matches(expr)) { \ return __LINE__; \ } \ }) __section("socket/types") int types() { type_exists(struct s); type_exists(s_t); type_exists(const s_t); type_exists(volatile s_t); type_exists(enum e); type_exists(e_t); type_exists(const e_t); type_exists(volatile e_t); type_exists(union u); type_exists(u_t); type_exists(const u_t); type_exists(volatile u_t); // TODO: Check non-existence. type_size_matches(struct s); type_size_matches(s_t); type_size_matches(const s_t); type_size_matches(volatile s_t); type_size_matches(enum e); type_size_matches(e_t); type_size_matches(const e_t); type_size_matches(volatile e_t); type_size_matches(union u); type_size_matches(u_t); type_size_matches(const u_t); type_size_matches(volatile u_t); type_matches(struct s); type_matches(s_t); type_matches(const s_t); type_matches(volatile s_t); type_matches(enum e); type_matches(e_t); type_matches(const e_t); type_matches(volatile e_t); type_matches(union u); type_matches(u_t); type_matches(const u_t); type_matches(volatile u_t); return 0; } #define enum_value_exists(t, v) \ ({ \ if (!bpf_core_enum_value_exists(t, v)) { \ return __LINE__; \ } \ }) #define enum_value_matches(t, v) \ ({ \ if (v != bpf_core_enum_value(t, v)) { \ return __LINE__; \ } \ }) __section("socket/enums") int enums() { enum_value_exists(enum e, ONE); enum_value_exists(volatile enum e, ONE); enum_value_exists(const enum e, ONE); enum_value_exists(e_t, TWO); enum_value_exists(enum e64, LARGE); // TODO: Check non-existence. enum_value_matches(enum e, ZERO); enum_value_matches(enum e, TWO); enum_value_matches(e_t, ONE); enum_value_matches(volatile e_t, ONE); enum_value_matches(const e_t, ONE); enum_value_matches(enum e64, LARGE); return 0; } #define field_exists(f) \ ({ \ if (!bpf_core_field_exists(f)) { \ return __LINE__; \ } \ }) #define field_size_matches(f) \ ({ \ if (sizeof(f) != bpf_core_field_size(f)) { \ return __LINE__; \ } \ }) #define field_offset_matches(t, f) \ ({ \ if (__builtin_offsetof(t, f) != __builtin_preserve_field_info(((typeof(t) *)0)->f, BPF_FIELD_BYTE_OFFSET)) { \ return __LINE__; \ } \ }) #define field_is_signed(f) \ ({ \ if (!__builtin_preserve_field_info(f, BPF_FIELD_SIGNED)) { \ return __LINE__; \ } \ }) #define field_is_unsigned(f) \ ({ \ if (__builtin_preserve_field_info(f, BPF_FIELD_SIGNED)) { \ return __LINE__; \ } \ }) __section("socket/fields") int fields() { field_exists((struct s){}._1); field_exists((s_t){}._2); field_exists((union u){}._1); field_exists((u_t){}._2); field_is_signed((struct s){}._1); field_is_unsigned((struct s){}._3); // unions crash clang-14. // field_is_signed((union u){}._1); // field_is_unsigned((union u){}._3); field_size_matches((struct s){}._1); field_size_matches((s_t){}._2); field_size_matches((union u){}._1); field_size_matches((u_t){}._2); field_offset_matches(struct s, _1); field_offset_matches(s_t, _2); field_offset_matches(union u, _1); field_offset_matches(u_t, _2); struct t { union { s_t s[10]; }; struct { union u u; }; } bar, *barp = &bar; field_exists(bar.s[2]._1); field_exists(bar.s[1]._2); field_exists(bar.u._1); field_exists(bar.u._2); field_exists(barp[1].u._2); field_is_signed(bar.s[2]._1); field_is_unsigned(bar.s[2]._3); // unions crash clang-14. // field_is_signed(bar.u._1); // field_is_signed(bar.u._3); field_size_matches(bar.s[2]._1); field_size_matches(bar.s[1]._2); field_size_matches(bar.u._1); field_size_matches(bar.u._2); field_size_matches(barp[1].u._2); field_offset_matches(struct t, s[2]._1); field_offset_matches(struct t, s[1]._2); field_offset_matches(struct t, u._1); field_offset_matches(struct t, u._2); return 0; } struct ambiguous { int _1; char _2; }; struct ambiguous___flavour { char _1; int _2; }; __section("socket/err_ambiguous") int err_ambiguous() { return bpf_core_type_id_kernel(struct ambiguous); } __section("socket/err_ambiguous_flavour") int err_ambiguous_flavour() { return bpf_core_type_id_kernel(struct ambiguous___flavour); } ================================================ FILE: btf/testdata/relocs_enum.c ================================================ #include "bpf_core_read.h" enum cgroup_subsys_id { cpuset_cgrp_id, cpuset_cgrp_id_lublub, CGROUP_SUBSYS_COUNT, }; #define __section(NAME) __attribute__((section(NAME), used)) __section("socket/core_ld64imm") int core_ld64imm() { if (bpf_core_enum_value_exists(enum cgroup_subsys_id, cpuset_cgrp_id_lublub)) { __attribute__((unused)) const volatile int val = bpf_core_enum_value(enum cgroup_subsys_id, cpuset_cgrp_id_lublub); } return 0; } ================================================ FILE: btf/testdata/relocs_read.c ================================================ #include "../../testdata/common.h" #include "bpf_core_read.h" // Struct with the members declared in the wrong order. Accesses need // a successful CO-RE relocation against the type in relocs_read_tgt.c // for the test below to pass. struct s { char b; char a; }; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long u64; // Struct with bitfields. struct bits { int x; u8 a : 4, b : 2; u16 c : 1; unsigned int d : 2; enum { ZERO = 0, ONE = 1 } e : 1; u64 f : 16, g : 30; }; struct nonexist { int non_exist; }; enum nonexist_enum { NON_EXIST = 1 }; // Perform a read from a subprog to ensure CO-RE relocations // occurring there are tracked and executed in the final linked program. __attribute__((noinline)) int read_subprog() { struct s foo = { .a = 0, .b = 1, }; if (core_access(foo.a) == 0) return __LINE__; if (core_access(foo.b) == 1) return __LINE__; struct bits bar; char *p = (char *)&bar; /* Target: * [4] STRUCT 'bits' size=8 vlen=7 * 'b' type_id=5 bits_offset=0 bitfield_size=2 * 'a' type_id=5 bits_offset=2 bitfield_size=4 * 'd' type_id=7 bits_offset=6 bitfield_size=2 * 'c' type_id=9 bits_offset=8 bitfield_size=1 * 'e' type_id=11 bits_offset=9 bitfield_size=1 * 'f' type_id=9 bits_offset=16 * 'g' type_id=12 bits_offset=32 bitfield_size=30 */ *p++ = 0xff; // a, b, d *p++ = 0x00; // c, e *p++ = 0x56; // f *p++ = 0x56; // f #ifdef __BIG_ENDIAN__ *p++ = 0x55; // g *p++ = 0x44; // g *p++ = 0x33; // g *p++ = 0x22; // g #else *p++ = 0x22; // g *p++ = 0x33; // g *p++ = 0x44; // g *p++ = 0x55; // g #endif if (BPF_CORE_READ_BITFIELD(&bar, a) != (1 << 4) - 1) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, b) != (1 << 2) - 1) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, d) != (1 << 2) - 1) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, c) != 0) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, e) != 0) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, f) != 0x5656) return __LINE__; if (BPF_CORE_READ_BITFIELD(&bar, g) != 0x15443322) return __LINE__; if (bpf_core_type_exists(struct nonexist) != 0) return __LINE__; if (bpf_core_field_exists(((struct nonexist *)0)->non_exist) != 0) return __LINE__; if (bpf_core_enum_value_exists(enum nonexist_enum, NON_EXIST) != 0) return __LINE__; return 0; } __section("socket") int reads() { int ret = read_subprog(); if (ret) return ret; return 0; } ================================================ FILE: btf/testdata/relocs_read_tgt.c ================================================ /* This file exists to emit ELFs with specific BTF types to use as target BTF in tests. It can be made redundant when btf.Spec can be handcrafted and passed as a CO-RE target in the future. */ struct s { char a; char b; }; struct s *unused_s __attribute__((unused)); typedef unsigned int my_u32; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long u64; struct bits { /*int x;*/ u8 b : 2, a : 4; /* a was before b */ my_u32 d : 2; /* was 'unsigned int' */ u16 c : 1; /* was before d */ enum { ZERO = 0, ONE = 1 } e : 1; u16 f; /* was: u64 f:16 */ u32 g : 30; /* was: u64 g:30 */ }; struct bits *unused_bits __attribute__((unused)); ================================================ FILE: btf/testdata/tags.c ================================================ #include "../../testdata/common.h" #define tagA __attribute__((btf_decl_tag("a"))) #define tagB __attribute__((btf_decl_tag("b"))) #define tagC __attribute__((btf_decl_tag("c"))) #define tagD __attribute__((btf_decl_tag("d"))) #define tagE __attribute__((btf_decl_tag("e"))) struct s { char tagA foo; char tagB bar; } tagC; union u { char tagA foo; char tagB bar; } tagC; typedef tagB char td; struct s tagD s1; union u tagE u1; td tagA t1; int tagA tagB fwdDecl(char tagC x, char tagD y); int tagE normalDecl1(char tagB x, char tagC y) { return fwdDecl(x, y); } int tagE normalDecl2(char tagB x, char tagC y) { return fwdDecl(x, y); } __section("syscall") int prog(char *ctx) { return normalDecl1(ctx[0], ctx[1]) + normalDecl2(ctx[2], ctx[3]); } ================================================ FILE: btf/traversal.go ================================================ package btf import ( "fmt" "iter" ) // Functions to traverse a cyclic graph of types. The below was very useful: // https://eli.thegreenplace.net/2015/directed-graph-traversal-orderings-and-applications-to-data-flow-analysis/#post-order-and-reverse-post-order // postorder yields all types reachable from root in post order. func postorder(root Type, visited map[Type]struct{}) iter.Seq[Type] { return func(yield func(Type) bool) { visitInPostorder(root, visited, yield) } } // visitInPostorder is a separate function to avoid arguments escaping // to the heap. Don't change the setup without re-running the benchmarks. func visitInPostorder(root Type, visited map[Type]struct{}, yield func(typ Type) bool) bool { if _, ok := visited[root]; ok { return true } if visited == nil { visited = make(map[Type]struct{}) } visited[root] = struct{}{} for child := range children(root) { if !visitInPostorder(*child, visited, yield) { return false } } return yield(root) } // children yields all direct descendants of typ. func children(typ Type) iter.Seq[*Type] { return func(yield func(*Type) bool) { // Explicitly type switch on the most common types to allow the inliner to // do its work. This avoids allocating intermediate slices from walk() on // the heap. var tags []string switch v := typ.(type) { case *Void, *Int, *Enum, *Fwd, *Float, *declTag: // No children to traverse. // declTags is declared as a leaf type since it's parsed into .Tags fields of other types // during unmarshaling. case *Pointer: if !yield(&v.Target) { return } case *Array: if !yield(&v.Index) { return } if !yield(&v.Type) { return } case *Struct: for i := range v.Members { if !yield(&v.Members[i].Type) { return } for _, t := range v.Members[i].Tags { var tag Type = &declTag{v, t, i} if !yield(&tag) { return } } } tags = v.Tags case *Union: for i := range v.Members { if !yield(&v.Members[i].Type) { return } for _, t := range v.Members[i].Tags { var tag Type = &declTag{v, t, i} if !yield(&tag) { return } } } tags = v.Tags case *Typedef: if !yield(&v.Type) { return } tags = v.Tags case *Volatile: if !yield(&v.Type) { return } case *Const: if !yield(&v.Type) { return } case *Restrict: if !yield(&v.Type) { return } case *Func: if !yield(&v.Type) { return } if fp, ok := v.Type.(*FuncProto); ok { for i := range fp.Params { if len(v.ParamTags) <= i { continue } for _, t := range v.ParamTags[i] { var tag Type = &declTag{v, t, i} if !yield(&tag) { return } } } } tags = v.Tags case *FuncProto: if !yield(&v.Return) { return } for i := range v.Params { if !yield(&v.Params[i].Type) { return } } case *Var: if !yield(&v.Type) { return } tags = v.Tags case *Datasec: for i := range v.Vars { if !yield(&v.Vars[i].Type) { return } } case *TypeTag: if !yield(&v.Type) { return } case *cycle: // cycle has children, but we ignore them deliberately. default: panic(fmt.Sprintf("don't know how to walk Type %T", v)) } for _, t := range tags { var tag Type = &declTag{typ, t, -1} if !yield(&tag) { return } } } } ================================================ FILE: btf/traversal_test.go ================================================ package btf import ( "fmt" "testing" "github.com/go-quicktest/qt" ) func TestPostorderTraversal(t *testing.T) { ptr := newCyclicalType(2).(*Pointer) cst := ptr.Target.(*Const) str := cst.Type.(*Struct) t.Logf("%3v", ptr) pending := []Type{str, cst, ptr} for typ := range postorder(ptr, nil) { qt.Assert(t, qt.Equals(typ, pending[0])) pending = pending[1:] } qt.Assert(t, qt.HasLen(pending, 0)) i := &Int{Name: "foo"} // i appears twice at the same nesting depth. arr := &Array{Index: i, Type: i} seen := make(map[Type]bool) for typ := range postorder(arr, nil) { qt.Assert(t, qt.IsFalse(seen[typ])) seen[typ] = true } qt.Assert(t, qt.IsTrue(seen[arr])) qt.Assert(t, qt.IsTrue(seen[i])) } func TestPostorderTraversalVmlinux(t *testing.T) { spec := vmlinuxTestdataSpec(t) typ, err := spec.AnyTypeByName("gov_update_cpu_data") if err != nil { t.Fatal(err) } for _, typ := range []Type{typ} { t.Run(fmt.Sprintf("%s", typ), func(t *testing.T) { seen := make(map[Type]bool) var last Type for typ := range postorder(typ, nil) { if seen[typ] { t.Fatalf("%s visited twice", typ) } seen[typ] = true last = typ } if last != typ { t.Fatalf("Expected %s got %s as last type", typ, last) } for child := range children(typ) { qt.Check(t, qt.IsTrue(seen[*child]), qt.Commentf("missing child %s", *child)) } }) } } func TestChildren(t *testing.T) { for _, test := range []struct { typ Type count int }{ {&Int{}, 0}, {&Const{&Int{}}, 1}, {&Array{Index: &Int{}, Type: &Int{}}, 2}, } { t.Run(fmt.Sprint(test.typ), func(t *testing.T) { var count int allocs := testing.AllocsPerRun(1, func() { count = 0 for range children(test.typ) { count++ } }) qt.Assert(t, qt.Equals(count, test.count)) qt.Assert(t, qt.Equals(allocs, 0)) }) } } func BenchmarkPostorderTraversal(b *testing.B) { spec := vmlinuxTestdataSpec(b) var fn *Func err := spec.TypeByName("gov_update_cpu_data", &fn) if err != nil { b.Fatal(err) } for _, test := range []struct { name string typ Type }{ {"single type", &Int{}}, {"cycle(1)", newCyclicalType(1)}, {"cycle(10)", newCyclicalType(10)}, {"gov_update_cpu_data", fn}, } { b.Run(test.name, func(b *testing.B) { b.ReportAllocs() for b.Loop() { for range postorder(test.typ, nil) { } } }) } } ================================================ FILE: btf/types.go ================================================ package btf import ( "errors" "fmt" "io" "math" "strings" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" ) // Mirrors MAX_RESOLVE_DEPTH in libbpf. // https://github.com/libbpf/libbpf/blob/e26b84dc330c9644c07428c271ab491b0f01f4e1/src/btf.c#L761 const maxResolveDepth = 32 // TypeID identifies a type in a BTF section. type TypeID = sys.TypeID // Type represents a type described by BTF. // // A Type has three properties where compared to other Types. // // Identity: follows the [Go specification], two Types are considered identical // if they have the same concrete type and the same dynamic value, aka they point // at the same location in memory. This means that the following Types are // considered distinct even though they have the same "shape". // // a := &Int{Size: 1} // b := &Int{Size: 1} // a != b // // Equivalence: two Types are considered equivalent if they have the same shape // and thus are functionally interchangeable, even if they are located at different // memory addresses. The above two Int types are equivalent. // // Compatibility: two Types are considered compatible according to the rules of CO-RE // see [coreAreTypesCompatible] for details. This is a non-commutative property, // so A may be compatible with B, but B not compatible with A. // // [Go specification]: https://go.dev/ref/spec#Comparison_operators type Type interface { // Type can be formatted using the %s and %v verbs. %s outputs only the // identity of the type, without any detail. %v outputs additional detail. // // Use the '+' flag to include the address of the type. // // Use the width to specify how many levels of detail to output, for example // %1v will output detail for the root type and a short description of its // children. %2v would output details of the root type and its children // as well as a short description of the grandchildren. fmt.Formatter // Name of the type, empty for anonymous types and types that cannot // carry a name, like Void and Pointer. TypeName() string // Make a copy of the type, without copying Type members. copy() Type // New implementations must update children, deduper.typeHash, and typesEquivalent. } var ( _ Type = (*Int)(nil) _ Type = (*Struct)(nil) _ Type = (*Union)(nil) _ Type = (*Enum)(nil) _ Type = (*Fwd)(nil) _ Type = (*Func)(nil) _ Type = (*Typedef)(nil) _ Type = (*Var)(nil) _ Type = (*Datasec)(nil) _ Type = (*Float)(nil) _ Type = (*declTag)(nil) _ Type = (*TypeTag)(nil) _ Type = (*cycle)(nil) ) // Void is the unit type of BTF. type Void struct{} func (v *Void) Format(fs fmt.State, verb rune) { formatType(fs, verb, v) } func (v *Void) TypeName() string { return "" } func (v *Void) size() uint32 { return 0 } func (v *Void) copy() Type { return (*Void)(nil) } type IntEncoding byte // Valid IntEncodings. // // These may look like they are flags, but they aren't. const ( Unsigned IntEncoding = 0 Signed IntEncoding = 1 Char IntEncoding = 2 Bool IntEncoding = 4 ) func (ie IntEncoding) String() string { switch ie { case Char: // NB: There is no way to determine signedness for char. return "char" case Bool: return "bool" case Signed: return "signed" case Unsigned: return "unsigned" default: return fmt.Sprintf("IntEncoding(%d)", byte(ie)) } } // Int is an integer of a given length. // // See https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int type Int struct { Name string // The size of the integer in bytes. Size uint32 Encoding IntEncoding } func (i *Int) Format(fs fmt.State, verb rune) { formatType(fs, verb, i, i.Encoding, "size=", i.Size) } func (i *Int) TypeName() string { return i.Name } func (i *Int) size() uint32 { return i.Size } func (i *Int) copy() Type { cpy := *i return &cpy } // Pointer is a pointer to another type. type Pointer struct { Target Type } func (p *Pointer) Format(fs fmt.State, verb rune) { formatType(fs, verb, p, "target=", p.Target) } func (p *Pointer) TypeName() string { return "" } func (p *Pointer) size() uint32 { return 8 } func (p *Pointer) copy() Type { cpy := *p return &cpy } // Array is an array with a fixed number of elements. type Array struct { Index Type Type Type Nelems uint32 } func (arr *Array) Format(fs fmt.State, verb rune) { formatType(fs, verb, arr, "index=", arr.Index, "type=", arr.Type, "n=", arr.Nelems) } func (arr *Array) TypeName() string { return "" } func (arr *Array) copy() Type { cpy := *arr return &cpy } // Struct is a compound type of consecutive members. type Struct struct { Name string // The size of the struct including padding, in bytes Size uint32 Members []Member Tags []string } // Format supports the width option to %v to limit or extend the number of // struct member names printed (default 5). // // For example, %1v will print only the first member name followed by '...': // // Struct:"struct"[fields=3 fieldNames=[a ...]] func (s *Struct) Format(fs fmt.State, verb rune) { max, _ := fs.Width() formatType(fs, verb, s, "fields=", len(s.Members), "fieldNames=", memberNames(s.Members, max)) } func (s *Struct) TypeName() string { return s.Name } func (s *Struct) size() uint32 { return s.Size } func (s *Struct) copy() Type { cpy := *s cpy.Members = copyMembers(s.Members) cpy.Tags = copyTags(cpy.Tags) return &cpy } func (s *Struct) members() []Member { return s.Members } // Union is a compound type where members occupy the same memory. type Union struct { Name string // The size of the union including padding, in bytes. Size uint32 Members []Member Tags []string } // Format supports the width option to %v to limit or extend the number of // union member names printed (default 5). // // For example, %1v will print only the first member name followed by '...': // // Union:"union"[fields=3 fieldNames=[a ...]] func (u *Union) Format(fs fmt.State, verb rune) { max, _ := fs.Width() formatType(fs, verb, u, "fields=", len(u.Members), "fieldNames=", memberNames(u.Members, max)) } func (u *Union) TypeName() string { return u.Name } func (u *Union) size() uint32 { return u.Size } func (u *Union) copy() Type { cpy := *u cpy.Members = copyMembers(u.Members) cpy.Tags = copyTags(cpy.Tags) return &cpy } func (u *Union) members() []Member { return u.Members } // memberNames returns the names of members, or its index in the list of members // if the name is empty. // // Returns up to max entries (default 5), followed by '...'. func memberNames(members []Member, max int) []string { if max <= 0 { max = 5 } names := make([]string, min(len(members), max+1)) for i, m := range members { if i >= max { names[i] = "..." break } if m.Name == "" { names[i] = fmt.Sprintf("<%d>", i) continue } names[i] = m.Name } return names } func copyMembers(orig []Member) []Member { cpy := make([]Member, len(orig)) copy(cpy, orig) for i, member := range cpy { cpy[i].Tags = copyTags(member.Tags) } return cpy } func copyTags(orig []string) []string { if orig == nil { // preserve nil vs zero-len slice distinction return nil } cpy := make([]string, len(orig)) copy(cpy, orig) return cpy } type composite interface { Type members() []Member } var ( _ composite = (*Struct)(nil) _ composite = (*Union)(nil) ) // A value in bits. type Bits uint32 // Bytes converts a bit value into bytes. func (b Bits) Bytes() uint32 { return uint32(b / 8) } // Member is part of a Struct or Union. // // It is not a valid Type. type Member struct { Name string Type Type Offset Bits BitfieldSize Bits Tags []string } // Enum lists possible values. type Enum struct { Name string // Size of the enum value in bytes. Size uint32 // True if the values should be interpreted as signed integers. Signed bool Values []EnumValue } func (e *Enum) Format(fs fmt.State, verb rune) { formatType(fs, verb, e, "size=", e.Size, "values=", len(e.Values)) } func (e *Enum) TypeName() string { return e.Name } // EnumValue is part of an Enum // // Is is not a valid Type type EnumValue struct { Name string Value uint64 } func (e *Enum) size() uint32 { return e.Size } func (e *Enum) copy() Type { cpy := *e cpy.Values = make([]EnumValue, len(e.Values)) copy(cpy.Values, e.Values) return &cpy } // FwdKind is the type of forward declaration. type FwdKind int // Valid types of forward declaration. const ( FwdStruct FwdKind = iota FwdUnion ) func (fk FwdKind) String() string { switch fk { case FwdStruct: return "struct" case FwdUnion: return "union" default: return fmt.Sprintf("%T(%d)", fk, int(fk)) } } // Fwd is a forward declaration of a Type. type Fwd struct { Name string Kind FwdKind } func (f *Fwd) Format(fs fmt.State, verb rune) { formatType(fs, verb, f, f.Kind) } func (f *Fwd) TypeName() string { return f.Name } func (f *Fwd) copy() Type { cpy := *f return &cpy } func (f *Fwd) matches(typ Type) bool { if _, ok := As[*Struct](typ); ok && f.Kind == FwdStruct { return true } if _, ok := As[*Union](typ); ok && f.Kind == FwdUnion { return true } return false } // Typedef is an alias of a Type. type Typedef struct { Name string Type Type Tags []string } func (td *Typedef) Format(fs fmt.State, verb rune) { formatType(fs, verb, td, td.Type) } func (td *Typedef) TypeName() string { return td.Name } func (td *Typedef) copy() Type { cpy := *td cpy.Tags = copyTags(td.Tags) return &cpy } // Volatile is a qualifier. type Volatile struct { Type Type } func (v *Volatile) Format(fs fmt.State, verb rune) { formatType(fs, verb, v, v.Type) } func (v *Volatile) TypeName() string { return "" } func (v *Volatile) qualify() Type { return v.Type } func (v *Volatile) copy() Type { cpy := *v return &cpy } // Const is a qualifier. type Const struct { Type Type } func (c *Const) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, c.Type) } func (c *Const) TypeName() string { return "" } func (c *Const) qualify() Type { return c.Type } func (c *Const) copy() Type { cpy := *c return &cpy } // Restrict is a qualifier. type Restrict struct { Type Type } func (r *Restrict) Format(fs fmt.State, verb rune) { formatType(fs, verb, r, r.Type) } func (r *Restrict) TypeName() string { return "" } func (r *Restrict) qualify() Type { return r.Type } func (r *Restrict) copy() Type { cpy := *r return &cpy } // Func is a function definition. type Func struct { Name string Type Type Linkage FuncLinkage Tags []string // ParamTags holds a list of tags for each parameter of the FuncProto to which `Type` points. // If no tags are present for any param, the outer slice will be nil/len(ParamTags)==0. // If at least 1 param has a tag, the outer slice will have the same length as the number of params. // The inner slice contains the tags and may be nil/len(ParamTags[i])==0 if no tags are present for that param. ParamTags [][]string } type funcInfoMeta struct{} func FuncMetadata(ins *asm.Instruction) *Func { fn, _ := ins.Metadata.Get(funcInfoMeta{}).(*Func) return fn } // WithFuncMetadata adds a btf.Func to the Metadata of asm.Instruction. func WithFuncMetadata(ins asm.Instruction, fn *Func) asm.Instruction { ins.Metadata.Set(funcInfoMeta{}, fn) return ins } func (f *Func) Format(fs fmt.State, verb rune) { formatType(fs, verb, f, f.Linkage, "proto=", f.Type) } func (f *Func) TypeName() string { return f.Name } func (f *Func) copy() Type { cpy := *f cpy.Tags = copyTags(f.Tags) if f.ParamTags != nil { // preserve nil vs zero-len slice distinction ptCopy := make([][]string, len(f.ParamTags)) for i, tags := range f.ParamTags { ptCopy[i] = copyTags(tags) } cpy.ParamTags = ptCopy } return &cpy } // FuncProto is a function declaration. type FuncProto struct { Return Type Params []FuncParam } func (fp *FuncProto) Format(fs fmt.State, verb rune) { formatType(fs, verb, fp, "args=", len(fp.Params), "return=", fp.Return) } func (fp *FuncProto) TypeName() string { return "" } func (fp *FuncProto) copy() Type { cpy := *fp cpy.Params = make([]FuncParam, len(fp.Params)) copy(cpy.Params, fp.Params) return &cpy } type FuncParam struct { Name string Type Type } // Var is a global variable. type Var struct { Name string Type Type Linkage VarLinkage Tags []string } func (v *Var) Format(fs fmt.State, verb rune) { formatType(fs, verb, v, v.Linkage) } func (v *Var) TypeName() string { return v.Name } func (v *Var) copy() Type { cpy := *v cpy.Tags = copyTags(v.Tags) return &cpy } // Datasec is a global program section containing data. type Datasec struct { Name string Size uint32 Vars []VarSecinfo } func (ds *Datasec) Format(fs fmt.State, verb rune) { formatType(fs, verb, ds) } func (ds *Datasec) TypeName() string { return ds.Name } func (ds *Datasec) size() uint32 { return ds.Size } func (ds *Datasec) copy() Type { cpy := *ds cpy.Vars = make([]VarSecinfo, len(ds.Vars)) copy(cpy.Vars, ds.Vars) return &cpy } // VarSecinfo describes variable in a Datasec. // // It is not a valid Type. type VarSecinfo struct { // Var or Func. Type Type Offset uint32 Size uint32 } // Float is a float of a given length. type Float struct { Name string // The size of the float in bytes. Size uint32 } func (f *Float) Format(fs fmt.State, verb rune) { formatType(fs, verb, f, "size=", f.Size*8) } func (f *Float) TypeName() string { return f.Name } func (f *Float) size() uint32 { return f.Size } func (f *Float) copy() Type { cpy := *f return &cpy } // declTag associates metadata with a declaration. type declTag struct { Type Type Value string // The index this tag refers to in the target type. For composite types, // a value of -1 indicates that the tag refers to the whole type. Otherwise // it indicates which member or argument the tag applies to. Index int } func (dt *declTag) Format(fs fmt.State, verb rune) { formatType(fs, verb, dt, "type=", dt.Type, "value=", dt.Value, "index=", dt.Index) } func (dt *declTag) TypeName() string { return "" } func (dt *declTag) copy() Type { cpy := *dt return &cpy } // TypeTag associates metadata with a pointer type. Tag types act as a custom // modifier(const, restrict, volatile) for the target type. Unlike declTags, // TypeTags are ordered so the order in which they are added matters. // // One of their uses is to mark pointers as `__kptr` meaning a pointer points // to kernel memory. Adding a `__kptr` to pointers in map values allows you // to store pointers to kernel memory in maps. type TypeTag struct { Type Type Value string } func (tt *TypeTag) Format(fs fmt.State, verb rune) { formatType(fs, verb, tt, "type=", tt.Type, "value=", tt.Value) } func (tt *TypeTag) TypeName() string { return "" } func (tt *TypeTag) qualify() Type { return tt.Type } func (tt *TypeTag) copy() Type { cpy := *tt return &cpy } // cycle is a type which had to be elided since it exceeded maxTypeDepth. type cycle struct { root Type } func (c *cycle) ID() TypeID { return math.MaxUint32 } func (c *cycle) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, "root=", c.root) } func (c *cycle) TypeName() string { return "" } func (c *cycle) copy() Type { cpy := *c return &cpy } type sizer interface { size() uint32 } var ( _ sizer = (*Int)(nil) _ sizer = (*Pointer)(nil) _ sizer = (*Struct)(nil) _ sizer = (*Union)(nil) _ sizer = (*Enum)(nil) _ sizer = (*Datasec)(nil) ) type qualifier interface { qualify() Type } var ( _ qualifier = (*Const)(nil) _ qualifier = (*Restrict)(nil) _ qualifier = (*Volatile)(nil) _ qualifier = (*TypeTag)(nil) ) var errUnsizedType = errors.New("type is unsized") // Sizeof returns the size of a type in bytes. // // Returns an error if the size can't be computed. func Sizeof(typ Type) (int, error) { var ( n = int64(1) elem int64 ) for i := 0; i < maxResolveDepth; i++ { switch v := typ.(type) { case *Array: if n > 0 && int64(v.Nelems) > math.MaxInt64/n { return 0, fmt.Errorf("type %s: overflow", typ) } // Arrays may be of zero length, which allows // n to be zero as well. n *= int64(v.Nelems) typ = v.Type continue case sizer: elem = int64(v.size()) case *Typedef: typ = v.Type continue case qualifier: typ = v.qualify() continue default: return 0, fmt.Errorf("type %T: %w", typ, errUnsizedType) } if n > 0 && elem > math.MaxInt64/n { return 0, fmt.Errorf("type %s: overflow", typ) } size := n * elem if int64(int(size)) != size { return 0, fmt.Errorf("type %s: overflow", typ) } return int(size), nil } return 0, fmt.Errorf("type %s: exceeded type depth", typ) } // alignof returns the alignment of a type. // // Returns an error if the Type can't be aligned, like an integer with an uneven // size. Currently only supports the subset of types necessary for bitfield // relocations. func alignof(typ Type) (int, error) { var n int switch t := UnderlyingType(typ).(type) { case *Enum: n = int(t.size()) case *Int: n = int(t.Size) case *Array: return alignof(t.Type) default: return 0, fmt.Errorf("can't calculate alignment of %T", t) } if !internal.IsPow(n) { return 0, fmt.Errorf("alignment value %d is not a power of two", n) } return n, nil } // Copy a Type recursively. // // typ may form a cycle. func Copy(typ Type) Type { return copyType(typ, nil, make(map[Type]Type), nil) } func copyType(typ Type, ids map[Type]TypeID, copies map[Type]Type, copiedIDs map[Type]TypeID) Type { if typ == nil { return nil } cpy, ok := copies[typ] if ok { // This has been copied previously, no need to continue. return cpy } cpy = typ.copy() copies[typ] = cpy if id, ok := ids[typ]; ok { copiedIDs[cpy] = id } for child := range children(cpy) { *child = copyType(*child, ids, copies, copiedIDs) } return cpy } type typeDeque = internal.Deque[*Type] // essentialName represents the name of a BTF type stripped of any flavor // suffixes after a ___ delimiter. type essentialName string // newEssentialName returns name without a ___ suffix. // // CO-RE has the concept of 'struct flavors', which are used to deal with // changes in kernel data structures. Anything after three underscores // in a type name is ignored for the purpose of finding a candidate type // in the kernel's BTF. func newEssentialName(name string) essentialName { if name == "" { return "" } lastIdx := strings.LastIndex(name, "___") if lastIdx > 0 { return essentialName(name[:lastIdx]) } return essentialName(name) } // UnderlyingType skips qualifiers and Typedefs. func UnderlyingType(typ Type) Type { result := typ for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (result).(type) { case qualifier: result = v.qualify() case *Typedef: result = v.Type default: return result } } return &cycle{typ} } // QualifiedType returns the type with all qualifiers removed. func QualifiedType(typ Type) Type { result := typ for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (result).(type) { case qualifier: result = v.qualify() default: return result } } return &cycle{typ} } // As returns typ if is of type T. Otherwise it peels qualifiers and Typedefs // until it finds a T. // // Returns the zero value and false if there is no T or if the type is nested // too deeply. func As[T Type](typ Type) (T, bool) { // NB: We can't make this function return (*T) since then // we can't assert that a type matches an interface which // embeds Type: as[composite](T). for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (typ).(type) { case T: return v, true case qualifier: typ = v.qualify() case *Typedef: typ = v.Type default: goto notFound } } notFound: var zero T return zero, false } type formatState struct { fmt.State depth int } // formattableType is a subset of Type, to ease unit testing of formatType. type formattableType interface { fmt.Formatter TypeName() string } // formatType formats a type in a canonical form. // // Handles cyclical types by only printing cycles up to a certain depth. Elements // in extra are separated by spaces unless the preceding element is a string // ending in '='. func formatType(f fmt.State, verb rune, t formattableType, extra ...interface{}) { if verb != 'v' && verb != 's' { fmt.Fprintf(f, "{UNRECOGNIZED: %c}", verb) return } _, _ = io.WriteString(f, internal.GoTypeName(t)) if name := t.TypeName(); name != "" { // Output BTF type name if present. fmt.Fprintf(f, ":%q", name) } if f.Flag('+') { // Output address if requested. fmt.Fprintf(f, ":%#p", t) } if verb == 's' { // %s omits details. return } var depth int if ps, ok := f.(*formatState); ok { depth = ps.depth f = ps.State } maxDepth, ok := f.Width() if !ok { maxDepth = 0 } if depth > maxDepth { // We've reached the maximum depth. This avoids infinite recursion even // for cyclical types. return } if len(extra) == 0 { return } wantSpace := false _, _ = io.WriteString(f, "[") for _, arg := range extra { if wantSpace { _, _ = io.WriteString(f, " ") } switch v := arg.(type) { case string: _, _ = io.WriteString(f, v) wantSpace = len(v) > 0 && v[len(v)-1] != '=' continue case formattableType: v.Format(&formatState{f, depth + 1}, verb) default: fmt.Fprint(f, arg) } wantSpace = true } _, _ = io.WriteString(f, "]") } ================================================ FILE: btf/types_test.go ================================================ package btf import ( "encoding/binary" "fmt" "reflect" "testing" "github.com/go-quicktest/qt" "github.com/google/go-cmp/cmp" "github.com/cilium/ebpf/internal/testutils" ) func TestSizeof(t *testing.T) { testcases := []struct { size int typ Type }{ {0, (*Void)(nil)}, {1, &Int{Size: 1}}, {8, &Enum{Size: 8}}, {0, &Array{Type: &Pointer{Target: (*Void)(nil)}, Nelems: 0}}, {12, &Array{Type: &Enum{Size: 4}, Nelems: 3}}, } for _, tc := range testcases { name := fmt.Sprint(tc.typ) t.Run(name, func(t *testing.T) { have, err := Sizeof(tc.typ) if err != nil { t.Fatal("Can't calculate size:", err) } if have != tc.size { t.Errorf("Expected size %d, got %d", tc.size, have) } }) } } func TestCopy(t *testing.T) { i := &Int{Size: 4} tags := []string{"bar:foo"} got := Copy(&Struct{ Members: []Member{ {Name: "a", Type: i}, {Name: "b", Type: i}, }, }) members := got.(*Struct).Members qt.Check(t, qt.Equals(members[0].Type.(*Int), members[1].Type.(*Int)), qt.Commentf("identity should be preserved")) for _, test := range []struct { name string typ Type }{ {"nil", nil}, {"void", (*Void)(nil)}, {"int", i}, {"cyclical", newCyclicalType(2)}, {"struct tags", &Struct{Tags: tags, Members: []Member{{Tags: tags}}}}, {"union tags", &Union{Tags: tags, Members: []Member{{Tags: tags}}}}, {"typedef tags", &Typedef{Type: i, Tags: tags}}, {"var tags", &Var{Type: i, Tags: tags}}, {"func tags", &Func{Tags: tags, ParamTags: [][]string{tags}}}, } { t.Run(test.name, func(t *testing.T) { cpy := Copy(test.typ) qt.Assert(t, testutils.IsDeepCopy(cpy, test.typ)) }) } } func TestAs(t *testing.T) { i := &Int{} ptr := &Pointer{i} td := &Typedef{Type: ptr} cst := &Const{td} vol := &Volatile{cst} // It's possible to retrieve qualifiers and Typedefs. haveVol, ok := As[*Volatile](vol) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(haveVol, vol)) haveTd, ok := As[*Typedef](vol) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(haveTd, td)) haveCst, ok := As[*Const](vol) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(haveCst, cst)) // Make sure we don't skip Pointer. haveI, ok := As[*Int](vol) qt.Assert(t, qt.IsFalse(ok)) qt.Assert(t, qt.IsNil(haveI)) // Make sure we can always retrieve Pointer. for _, typ := range []Type{ td, cst, vol, ptr, } { have, ok := As[*Pointer](typ) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(have, ptr)) } } func BenchmarkCopy(b *testing.B) { typ := newCyclicalType(10) b.ReportAllocs() for b.Loop() { Copy(typ) } } // The following are valid Types. // // There currently is no better way to document which // types implement an interface. func ExampleType_validTypes() { var _ Type = &Void{} var _ Type = &Int{} var _ Type = &Pointer{} var _ Type = &Array{} var _ Type = &Struct{} var _ Type = &Union{} var _ Type = &Enum{} var _ Type = &Fwd{} var _ Type = &Typedef{} var _ Type = &Volatile{} var _ Type = &Const{} var _ Type = &Restrict{} var _ Type = &Func{} var _ Type = &FuncProto{} var _ Type = &Var{} var _ Type = &Datasec{} var _ Type = &Float{} } func TestType(t *testing.T) { types := []func() Type{ func() Type { return &Void{} }, func() Type { return &Int{Size: 2} }, func() Type { return &Pointer{Target: &Void{}} }, func() Type { return &Array{Type: &Int{}} }, func() Type { return &Struct{ Members: []Member{{Type: &Void{}}}, } }, func() Type { return &Union{ Members: []Member{{Type: &Void{}}}, } }, func() Type { return &Enum{} }, func() Type { return &Fwd{Name: "thunk"} }, func() Type { return &Typedef{Type: &Void{}} }, func() Type { return &Volatile{Type: &Void{}} }, func() Type { return &Const{Type: &Void{}} }, func() Type { return &Restrict{Type: &Void{}} }, func() Type { return &Func{Name: "foo", Type: &Void{}} }, func() Type { return &FuncProto{ Params: []FuncParam{{Name: "bar", Type: &Void{}}}, Return: &Void{}, } }, func() Type { return &Var{Type: &Void{}} }, func() Type { return &Datasec{ Vars: []VarSecinfo{{Type: &Void{}}}, } }, func() Type { return &Float{} }, func() Type { return &TypeTag{Type: &Void{}} }, func() Type { return &cycle{&Void{}} }, } compareTypes := cmp.Comparer(func(a, b *Type) bool { return a == b }) for _, fn := range types { typ := fn() t.Run(fmt.Sprintf("%T", typ), func(t *testing.T) { t.Logf("%v", typ) if typ == typ.copy() { t.Error("Copy doesn't copy") } var a []*Type for t := range children(typ) { a = append(a, t) } if _, ok := typ.(*cycle); !ok { if n := countChildren(t, reflect.TypeOf(typ)); len(a) < n { t.Errorf("walkType visited %d children, expected at least %d", len(a), n) } } var b []*Type for t := range children(typ) { b = append(b, t) } if diff := cmp.Diff(a, b, compareTypes); diff != "" { t.Errorf("Walk mismatch (-want +got):\n%s", diff) } }) } } func TestTagMarshaling(t *testing.T) { for _, typ := range []Type{ &TypeTag{&Int{}, "foo"}, &Struct{Members: []Member{ {Type: &Int{}, Tags: []string{"bar"}}, }, Tags: []string{"foo"}}, &Union{Members: []Member{ {Type: &Int{}, Tags: []string{"bar"}}, {Type: &Int{}, Tags: []string{"baz"}}, }, Tags: []string{"foo"}}, &Func{Type: &FuncProto{Return: &Int{}, Params: []FuncParam{ {Name: "param1", Type: &Int{}}, }}, Tags: []string{"foo"}, ParamTags: [][]string{{"bar"}}}, &Var{Name: "var1", Type: &Int{}, Tags: []string{"foo"}}, &Typedef{Name: "baz", Type: &Int{}, Tags: []string{"foo"}}, } { t.Run(fmt.Sprint(typ), func(t *testing.T) { s := specFromTypes(t, []Type{typ}) have, err := s.TypeByID(1) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(have, typ)) }) } } func countChildren(t *testing.T, typ reflect.Type) int { if typ.Kind() != reflect.Pointer { t.Fatal("Expected pointer, got", typ.Kind()) } typ = typ.Elem() if typ.Kind() != reflect.Struct { t.Fatal("Expected struct, got", typ.Kind()) } var n int for i := 0; i < typ.NumField(); i++ { if typ.Field(i).Type == reflect.TypeOf((*Type)(nil)).Elem() { n++ } } return n } type testFormattableType struct { name string extra []interface{} } var _ formattableType = (*testFormattableType)(nil) func (tft *testFormattableType) TypeName() string { return tft.name } func (tft *testFormattableType) Format(fs fmt.State, verb rune) { formatType(fs, verb, tft, tft.extra...) } func TestFormatType(t *testing.T) { t1 := &testFormattableType{"", []interface{}{"extra"}} t1Addr := fmt.Sprintf("%#p", t1) goType := reflect.TypeOf(t1).Elem().Name() t2 := &testFormattableType{"foo", []interface{}{t1}} t3 := &testFormattableType{extra: []interface{}{""}} tests := []struct { t formattableType fmt string contains []string omits []string }{ // %s doesn't contain address or extra. {t1, "%s", []string{goType}, []string{t1Addr, "extra"}}, // %+s doesn't contain extra. {t1, "%+s", []string{goType, t1Addr}, []string{"extra"}}, // %v does contain extra. {t1, "%v", []string{goType, "extra"}, []string{t1Addr}}, // %+v does contain address. {t1, "%+v", []string{goType, "extra", t1Addr}, nil}, // %v doesn't print nested types' extra. {t2, "%v", []string{goType, t2.name}, []string{"extra"}}, // %1v does print nested types' extra. {t2, "%1v", []string{goType, t2.name, "extra"}, nil}, // empty strings in extra don't emit anything. {t3, "%v", []string{"[]"}, nil}, } for _, test := range tests { t.Run(test.fmt, func(t *testing.T) { str := fmt.Sprintf(test.fmt, test.t) t.Log(str) for _, want := range test.contains { qt.Assert(t, qt.StringContains(str, want)) } for _, notWant := range test.omits { qt.Assert(t, qt.Not(qt.StringContains(str, notWant))) } }) } } func TestFormatCompoundTypes(t *testing.T) { u := &Union{ Name: "u", Members: []Member{{Name: "a"}, {Name: "b"}, {Name: "c"}, {Name: "d"}, {Name: "e"}, {Name: "f"}}, } qt.Assert(t, qt.Equals(fmt.Sprintf("%v", u), `Union:"u"[fields=6 fieldNames=[a b c d e ...]]`)) qt.Assert(t, qt.Equals(fmt.Sprintf("%1v", u), `Union:"u"[fields=6 fieldNames=[a ...]]`)) qt.Assert(t, qt.Equals(fmt.Sprintf("%6v", u), `Union:"u"[fields=6 fieldNames=[a b c d e f]]`)) s := &Struct{ Name: "s", Members: []Member{{Name: "a"}, {Name: "b"}, {Name: "c"}}, } qt.Assert(t, qt.Equals(fmt.Sprintf("%v", s), `Struct:"s"[fields=3 fieldNames=[a b c]]`)) qt.Assert(t, qt.Equals(fmt.Sprintf("%1v", s), `Struct:"s"[fields=3 fieldNames=[a ...]]`)) } func newCyclicalType(n int) Type { ptr := &Pointer{} prev := Type(ptr) for i := 0; i < n; i++ { switch i % 5 { case 0: prev = &Struct{ Members: []Member{ {Type: prev}, }, } case 1: prev = &Const{Type: prev} case 2: prev = &Volatile{Type: prev} case 3: prev = &Typedef{Type: prev} case 4: prev = &Array{Type: prev, Index: &Int{Size: 1}} } } ptr.Target = prev return ptr } func TestUnderlyingType(t *testing.T) { wrappers := []struct { name string fn func(Type) Type }{ {"const", func(t Type) Type { return &Const{Type: t} }}, {"volatile", func(t Type) Type { return &Volatile{Type: t} }}, {"restrict", func(t Type) Type { return &Restrict{Type: t} }}, {"typedef", func(t Type) Type { return &Typedef{Type: t} }}, {"type tag", func(t Type) Type { return &TypeTag{Type: t} }}, } for _, test := range wrappers { t.Run(test.name+" cycle", func(t *testing.T) { root := &Volatile{} root.Type = test.fn(root) got, ok := UnderlyingType(root).(*cycle) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals[Type](got.root, root)) }) } for _, test := range wrappers { t.Run(test.name, func(t *testing.T) { want := &Int{} got := UnderlyingType(test.fn(want)) qt.Assert(t, qt.Equals[Type](got, want)) }) } } func TestInflateLegacyBitfield(t *testing.T) { const offset = 3 const size = 5 addHeaderAndStringTable := func(types ...any) []byte { var buf []byte var err error for _, typ := range types { buf, err = binary.Append(buf, binary.LittleEndian, typ) qt.Assert(t, qt.IsNil(err)) } header, err := binary.Append(nil, binary.LittleEndian, &btfHeader{ Magic: btfMagic, Version: 1, Flags: 0, HdrLen: uint32(btfHeaderLen), TypeOff: 0, TypeLen: uint32(len(buf)), StringOff: uint32(len(buf)), StringLen: 1, }) qt.Assert(t, qt.IsNil(err)) buf = append(header, buf...) buf = append(buf, 0) // string table return buf } var placeholder struct { btfType btfInt } placeholder.SetKind(kindInt) placeholder.SetSize(4) placeholder.SetOffset(offset) placeholder.SetBits(size) var structFirst struct { btfType Members [1]btfMember } structFirst.SetKind(kindStruct) structFirst.SetVlen(1) structFirst.Members = [...]btfMember{{Type: 2}} before := addHeaderAndStringTable(&structFirst, &placeholder) structSecond := structFirst structSecond.Members = [...]btfMember{{Type: 1}} after := addHeaderAndStringTable(&placeholder, &structSecond) for _, test := range []struct { name string buf []byte }{ {"struct before int", before}, {"struct after int", after}, } { t.Run(test.name, func(t *testing.T) { spec, err := loadRawSpec(test.buf, nil) qt.Assert(t, qt.IsNil(err)) for _, typ := range typesFromSpec(t, spec) { s, ok := typ.(*Struct) if !ok { continue } i := s.Members[0] if i.BitfieldSize != size { t.Errorf("Expected bitfield size %d, got %d", size, i.BitfieldSize) } if i.Offset != offset { t.Errorf("Expected offset %d, got %d", offset, i.Offset) } return } t.Fatal("No Struct returned from inflateRawTypes") }) } } func TestMemberNames(t *testing.T) { members := []Member{{Name: "foo"}, {}, {Name: "bar"}} qt.Assert(t, qt.ContentEquals(memberNames(members, 3), []string{"foo", "<1>", "bar"})) qt.Assert(t, qt.ContentEquals(memberNames(members, 2), []string{"foo", "<1>", "..."})) } func BenchmarkWalk(b *testing.B) { types := []Type{ &Void{}, &Int{}, &Pointer{}, &Array{}, &Struct{Members: make([]Member, 2)}, &Union{Members: make([]Member, 2)}, &Enum{}, &Fwd{}, &Typedef{}, &Volatile{}, &Const{}, &Restrict{}, &Func{}, &FuncProto{Params: make([]FuncParam, 2)}, &Var{}, &Datasec{Vars: make([]VarSecinfo, 2)}, } for _, typ := range types { b.Run(fmt.Sprint(typ), func(b *testing.B) { b.ReportAllocs() for b.Loop() { var dq typeDeque for child := range children(typ) { dq.Push(child) } } }) } } func TestTagUnmarshaling(t *testing.T) { testutils.Files(t, testutils.Glob(t, "testdata/tags-*.elf"), func(t *testing.T, file string) { spec, err := LoadSpec(file) qt.Assert(t, qt.IsNil(err)) var s *Struct err = spec.TypeByName("s", &s) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(s.Tags, []string{"c"})) qt.Assert(t, qt.ContentEquals(s.Members[0].Tags, []string{"a"})) qt.Assert(t, qt.ContentEquals(s.Members[1].Tags, []string{"b"})) var u *Union err = spec.TypeByName("u", &u) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(u.Tags, []string{"c"})) qt.Assert(t, qt.ContentEquals(u.Members[0].Tags, []string{"a"})) qt.Assert(t, qt.ContentEquals(u.Members[1].Tags, []string{"b"})) var td *Typedef err = spec.TypeByName("td", &td) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(td.Tags, []string{"b"})) var s1 *Var err = spec.TypeByName("s1", &s1) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(s1.Tags, []string{"d"})) var s2 *Var err = spec.TypeByName("u1", &s2) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(s2.Tags, []string{"e"})) var t1 *Var err = spec.TypeByName("t1", &t1) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(t1.Tags, []string{"a"})) var extFunc *Func err = spec.TypeByName("fwdDecl", &extFunc) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(extFunc.Tags, []string{"a", "b"})) qt.Assert(t, qt.ContentEquals(extFunc.ParamTags, [][]string{{"c"}, {"d"}})) var normalFunc *Func err = spec.TypeByName("normalDecl1", &normalFunc) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(normalFunc.Tags, []string{"e"})) qt.Assert(t, qt.ContentEquals(normalFunc.ParamTags, [][]string{{"b"}, {"c"}})) err = spec.TypeByName("normalDecl2", &normalFunc) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(normalFunc.Tags, []string{"e"})) qt.Assert(t, qt.ContentEquals(normalFunc.ParamTags, [][]string{{"b"}, {"c"}})) }) } func BenchmarkUnderlyingType(b *testing.B) { b.Run("no unwrapping", func(b *testing.B) { v := &Int{} b.ReportAllocs() for b.Loop() { UnderlyingType(v) } }) b.Run("single unwrapping", func(b *testing.B) { v := &Typedef{Type: &Int{}} b.ReportAllocs() for b.Loop() { UnderlyingType(v) } }) } // As can be used to strip qualifiers from a Type. func ExampleAs() { a := &Volatile{Type: &Pointer{Target: &Typedef{Name: "foo", Type: &Int{Size: 2}}}} fmt.Println(As[*Pointer](a)) // Output: Pointer[target=Typedef:"foo"] true } ================================================ FILE: btf/unmarshal.go ================================================ package btf import ( "bytes" "encoding/binary" "fmt" "hash/maphash" "io" "iter" "maps" "math" "slices" "sync" ) // sharedBuf is a buffer which may be shared between multiple decoders. // // It must not be modified. Some sharedBuf may be backed by an mmap-ed file, in // which case the sharedBuf has a finalizer. sharedBuf must therefore always be // passed as a pointer. type sharedBuf struct { raw []byte } type decoder struct { // Immutable fields, may be shared. base *decoder byteOrder binary.ByteOrder *sharedBuf strings *stringTable // The ID for offsets[0]. firstTypeID TypeID // Map from TypeID to offset of the marshaled data in raw. Contains an entry // for each TypeID, including 0 aka Void. The offset for Void is invalid. offsets []int declTags map[TypeID][]TypeID // An index from essentialName to TypeID. namedTypes *fuzzyStringIndex // Protection for mutable fields below. mu sync.Mutex types map[TypeID]Type typeIDs map[Type]TypeID legacyBitfields map[TypeID][2]Bits // offset, size } func newDecoder(raw []byte, bo binary.ByteOrder, strings *stringTable, base *decoder) (*decoder, error) { firstTypeID := TypeID(0) if base != nil { if base.byteOrder != bo { return nil, fmt.Errorf("can't use %v base with %v split BTF", base.byteOrder, bo) } if base.firstTypeID != 0 { return nil, fmt.Errorf("can't use split BTF as base") } firstTypeID = TypeID(len(base.offsets)) } var header btfType var numTypes, numDeclTags, numNamedTypes int for _, err := range allBtfTypeOffsets(raw, bo, &header) { if err != nil { return nil, err } numTypes++ if header.Kind() == kindDeclTag { numDeclTags++ } if header.NameOff != 0 { numNamedTypes++ } } if firstTypeID == 0 { // Allocate an extra slot for Void so we don't have to deal with // constant off by one issues. numTypes++ } offsets := make([]int, 0, numTypes) declTags := make(map[TypeID][]TypeID, numDeclTags) namedTypes := newFuzzyStringIndex(numNamedTypes) if firstTypeID == 0 { // Add a sentinel for Void. offsets = append(offsets, math.MaxInt) } id := firstTypeID + TypeID(len(offsets)) for offset := range allBtfTypeOffsets(raw, bo, &header) { if id < firstTypeID { return nil, fmt.Errorf("no more type IDs") } offsets = append(offsets, offset) if header.Kind() == kindDeclTag { declTags[header.Type()] = append(declTags[header.Type()], id) } // Build named type index. name, err := strings.LookupBytes(header.NameOff) if err != nil { return nil, fmt.Errorf("lookup type name for id %v: %w", id, err) } if len(name) > 0 { if i := bytes.Index(name, []byte("___")); i != -1 { // Flavours are rare. It's cheaper to find the first index for some // reason. i = bytes.LastIndex(name, []byte("___")) name = name[:i] } namedTypes.Add(name, id) } id++ } namedTypes.Build() return &decoder{ base, bo, &sharedBuf{raw}, strings, firstTypeID, offsets, declTags, namedTypes, sync.Mutex{}, make(map[TypeID]Type), make(map[Type]TypeID), make(map[TypeID][2]Bits), }, nil } func allBtfTypeOffsets(buf []byte, bo binary.ByteOrder, header *btfType) iter.Seq2[int, error] { return func(yield func(int, error) bool) { for offset := 0; offset < len(buf); { start := offset n, err := unmarshalBtfType(header, buf[offset:], bo) if err != nil { yield(-1, fmt.Errorf("unmarshal type header: %w", err)) return } offset += n n, err = header.DataLen() if err != nil { yield(-1, err) return } offset += n if offset > len(buf) { yield(-1, fmt.Errorf("auxiliary type data: %w", io.ErrUnexpectedEOF)) return } if !yield(start, nil) { return } } } } func rebaseDecoder(d *decoder, base *decoder) (*decoder, error) { if d.base == nil { return nil, fmt.Errorf("rebase split spec: not a split spec") } if len(d.base.raw) != len(base.raw) || (len(d.base.raw) > 0 && &d.base.raw[0] != &base.raw[0]) { return nil, fmt.Errorf("rebase split spec: raw BTF differs") } return &decoder{ base, d.byteOrder, d.sharedBuf, d.strings, d.firstTypeID, d.offsets, d.declTags, d.namedTypes, sync.Mutex{}, make(map[TypeID]Type), make(map[Type]TypeID), make(map[TypeID][2]Bits), }, nil } // Copy performs a deep copy of a decoder and its base. func (d *decoder) Copy() *decoder { if d == nil { return nil } return d.copy(nil) } func (d *decoder) copy(copiedTypes map[Type]Type) *decoder { if d == nil { return nil } d.mu.Lock() defer d.mu.Unlock() if copiedTypes == nil { copiedTypes = make(map[Type]Type, len(d.types)) } types := make(map[TypeID]Type, len(d.types)) typeIDs := make(map[Type]TypeID, len(d.typeIDs)) for id, typ := range d.types { types[id] = copyType(typ, d.typeIDs, copiedTypes, typeIDs) } return &decoder{ d.base.copy(copiedTypes), d.byteOrder, d.sharedBuf, d.strings, d.firstTypeID, d.offsets, d.declTags, d.namedTypes, sync.Mutex{}, types, typeIDs, maps.Clone(d.legacyBitfields), } } // TypeID returns the ID for a Type previously obtained via [TypeByID]. func (d *decoder) TypeID(typ Type) (TypeID, error) { if _, ok := typ.(*Void); ok { // Equality is weird for void, since it is a zero sized type. return 0, nil } d.mu.Lock() defer d.mu.Unlock() id, ok := d.typeIDs[typ] if !ok { return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound) } return id, nil } // TypesByName returns all types which have the given essential name. // // Returns ErrNotFound if no matching Type exists. func (d *decoder) TypesByName(name essentialName) ([]Type, error) { var types []Type for id := range d.namedTypes.Find(string(name)) { typ, err := d.TypeByID(id) if err != nil { return nil, err } if newEssentialName(typ.TypeName()) == name { // Deal with hash collisions by checking against the name. types = append(types, typ) } } if len(types) == 0 { // Return an unwrapped error because this is on the hot path // for CO-RE. return nil, ErrNotFound } return types, nil } // TypeByID decodes a type and any of its descendants. func (d *decoder) TypeByID(id TypeID) (Type, error) { d.mu.Lock() defer d.mu.Unlock() return d.inflateType(id) } func (d *decoder) inflateType(id TypeID) (typ Type, err error) { defer func() { if r := recover(); r != nil { err = r.(error) } // err is the return value of the enclosing function, even if an explicit // return is used. // See https://go.dev/ref/spec#Defer_statements if err != nil { // Remove partially inflated type so that d.types only contains // fully inflated ones. delete(d.types, id) } else { // Populate reverse index. d.typeIDs[typ] = id } }() if id < d.firstTypeID { return d.base.inflateType(id) } if id == 0 { // Void is defined to always be type ID 0, and is thus omitted from BTF. // Fast-path because it is looked up frequently. return (*Void)(nil), nil } if typ, ok := d.types[id]; ok { return typ, nil } fixup := func(id TypeID, typ *Type) { fixup, err := d.inflateType(id) if err != nil { panic(err) } *typ = fixup } convertMembers := func(header *btfType, buf []byte) ([]Member, error) { var bm btfMember members := make([]Member, 0, header.Vlen()) for i := range header.Vlen() { n, err := unmarshalBtfMember(&bm, buf, d.byteOrder) if err != nil { return nil, fmt.Errorf("unmarshal member: %w", err) } buf = buf[n:] name, err := d.strings.Lookup(bm.NameOff) if err != nil { return nil, fmt.Errorf("can't get name for member %d: %w", i, err) } members = append(members, Member{ Name: name, Offset: Bits(bm.Offset), }) m := &members[i] fixup(bm.Type, &m.Type) if header.Bitfield() { m.BitfieldSize = Bits(bm.Offset >> 24) m.Offset &= 0xffffff // We ignore legacy bitfield definitions if the current composite // is a new-style bitfield. This is kind of safe since offset and // size on the type of the member must be zero if kindFlat is set // according to spec. continue } // This may be a legacy bitfield, try to fix it up. data, ok := d.legacyBitfields[bm.Type] if ok { // Bingo! m.Offset += data[0] m.BitfieldSize = data[1] continue } } return members, nil } idx := int(id - d.firstTypeID) if idx >= len(d.offsets) { return nil, fmt.Errorf("type id %v: %w", id, ErrNotFound) } offset := d.offsets[idx] if offset >= len(d.raw) { return nil, fmt.Errorf("offset out of bounds") } var ( header btfType bInt btfInt bArr btfArray bVariable btfVariable bDeclTag btfDeclTag pos = d.raw[offset:] ) { if n, err := unmarshalBtfType(&header, pos, d.byteOrder); err != nil { return nil, fmt.Errorf("can't unmarshal type info for id %v: %v", id, err) } else { pos = pos[n:] } name, err := d.strings.Lookup(header.NameOff) if err != nil { return nil, fmt.Errorf("get name for type id %d: %w", id, err) } switch header.Kind() { case kindInt: size := header.Size() if _, err := unmarshalBtfInt(&bInt, pos, d.byteOrder); err != nil { return nil, fmt.Errorf("can't unmarshal btfInt, id: %d: %w", id, err) } if bInt.Offset() > 0 || bInt.Bits().Bytes() != size { d.legacyBitfields[id] = [2]Bits{bInt.Offset(), bInt.Bits()} } typ = &Int{name, header.Size(), bInt.Encoding()} d.types[id] = typ case kindPointer: ptr := &Pointer{nil} d.types[id] = ptr fixup(header.Type(), &ptr.Target) typ = ptr case kindArray: if _, err := unmarshalBtfArray(&bArr, pos, d.byteOrder); err != nil { return nil, fmt.Errorf("can't unmarshal btfArray, id: %d: %w", id, err) } arr := &Array{nil, nil, bArr.Nelems} d.types[id] = arr fixup(bArr.IndexType, &arr.Index) fixup(bArr.Type, &arr.Type) typ = arr case kindStruct: str := &Struct{name, header.Size(), nil, nil} d.types[id] = str typ = str str.Members, err = convertMembers(&header, pos) if err != nil { return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err) } case kindUnion: uni := &Union{name, header.Size(), nil, nil} d.types[id] = uni typ = uni uni.Members, err = convertMembers(&header, pos) if err != nil { return nil, fmt.Errorf("union %s (id %d): %w", name, id, err) } case kindEnum: enum := &Enum{name, header.Size(), header.Signed(), nil} d.types[id] = enum typ = enum var be btfEnum enum.Values = make([]EnumValue, 0, header.Vlen()) for i := range header.Vlen() { n, err := unmarshalBtfEnum(&be, pos, d.byteOrder) if err != nil { return nil, fmt.Errorf("unmarshal btfEnum %d, id: %d: %w", i, id, err) } pos = pos[n:] name, err := d.strings.Lookup(be.NameOff) if err != nil { return nil, fmt.Errorf("get name for enum value %d: %s", i, err) } value := uint64(be.Val) if enum.Signed { // Sign extend values to 64 bit. value = uint64(int32(be.Val)) } enum.Values = append(enum.Values, EnumValue{name, value}) } case kindForward: typ = &Fwd{name, header.FwdKind()} d.types[id] = typ case kindTypedef: typedef := &Typedef{name, nil, nil} d.types[id] = typedef fixup(header.Type(), &typedef.Type) typ = typedef case kindVolatile: volatile := &Volatile{nil} d.types[id] = volatile fixup(header.Type(), &volatile.Type) typ = volatile case kindConst: cnst := &Const{nil} d.types[id] = cnst fixup(header.Type(), &cnst.Type) typ = cnst case kindRestrict: restrict := &Restrict{nil} d.types[id] = restrict fixup(header.Type(), &restrict.Type) typ = restrict case kindFunc: fn := &Func{name, nil, header.Linkage(), nil, nil} d.types[id] = fn fixup(header.Type(), &fn.Type) typ = fn case kindFuncProto: fp := &FuncProto{} d.types[id] = fp params := make([]FuncParam, 0, header.Vlen()) var bParam btfParam for i := range header.Vlen() { n, err := unmarshalBtfParam(&bParam, pos, d.byteOrder) if err != nil { return nil, fmt.Errorf("can't unmarshal btfParam %d, id: %d: %w", i, id, err) } pos = pos[n:] name, err := d.strings.Lookup(bParam.NameOff) if err != nil { return nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err) } param := FuncParam{Name: name} fixup(bParam.Type, ¶m.Type) params = append(params, param) } fixup(header.Type(), &fp.Return) fp.Params = params typ = fp case kindVar: if _, err := unmarshalBtfVariable(&bVariable, pos, d.byteOrder); err != nil { return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err) } v := &Var{name, nil, VarLinkage(bVariable.Linkage), nil} d.types[id] = v fixup(header.Type(), &v.Type) typ = v case kindDatasec: ds := &Datasec{name, header.Size(), nil} d.types[id] = ds vlen := header.Vlen() vars := make([]VarSecinfo, 0, vlen) var bSecInfo btfVarSecinfo for i := 0; i < vlen; i++ { n, err := unmarshalBtfVarSecInfo(&bSecInfo, pos, d.byteOrder) if err != nil { return nil, fmt.Errorf("can't unmarshal btfVarSecinfo %d, id: %d: %w", i, id, err) } pos = pos[n:] vs := VarSecinfo{ Offset: bSecInfo.Offset, Size: bSecInfo.Size, } fixup(bSecInfo.Type, &vs.Type) vars = append(vars, vs) } ds.Vars = vars typ = ds case kindFloat: typ = &Float{name, header.Size()} d.types[id] = typ case kindDeclTag: if _, err := unmarshalBtfDeclTag(&bDeclTag, pos, d.byteOrder); err != nil { return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err) } btfIndex := bDeclTag.ComponentIdx if uint64(btfIndex) > math.MaxInt { return nil, fmt.Errorf("type id %d: index exceeds int", id) } dt := &declTag{nil, name, int(int32(btfIndex))} d.types[id] = dt fixup(header.Type(), &dt.Type) typ = dt case kindTypeTag: tt := &TypeTag{nil, name} d.types[id] = tt fixup(header.Type(), &tt.Type) typ = tt case kindEnum64: enum := &Enum{name, header.Size(), header.Signed(), nil} d.types[id] = enum typ = enum enum.Values = make([]EnumValue, 0, header.Vlen()) var bEnum64 btfEnum64 for i := range header.Vlen() { n, err := unmarshalBtfEnum64(&bEnum64, pos, d.byteOrder) if err != nil { return nil, fmt.Errorf("can't unmarshal btfEnum64 %d, id: %d: %w", i, id, err) } pos = pos[n:] name, err := d.strings.Lookup(bEnum64.NameOff) if err != nil { return nil, fmt.Errorf("get name for enum64 value %d: %s", i, err) } value := (uint64(bEnum64.ValHi32) << 32) | uint64(bEnum64.ValLo32) enum.Values = append(enum.Values, EnumValue{name, value}) } default: return nil, fmt.Errorf("type id %d: unknown kind: %v", id, header.Kind()) } } for _, tagID := range d.declTags[id] { dtType, err := d.inflateType(tagID) if err != nil { return nil, err } dt, ok := dtType.(*declTag) if !ok { return nil, fmt.Errorf("type id %v: not a declTag", tagID) } switch t := typ.(type) { case *Var: if dt.Index != -1 { return nil, fmt.Errorf("type %s: component idx %d is not -1", dt, dt.Index) } t.Tags = append(t.Tags, dt.Value) case *Typedef: if dt.Index != -1 { return nil, fmt.Errorf("type %s: component idx %d is not -1", dt, dt.Index) } t.Tags = append(t.Tags, dt.Value) case composite: if dt.Index >= 0 { members := t.members() if dt.Index >= len(members) { return nil, fmt.Errorf("type %s: component idx %d exceeds members of %s", dt, dt.Index, t) } members[dt.Index].Tags = append(members[dt.Index].Tags, dt.Value) } else if dt.Index == -1 { switch t2 := t.(type) { case *Struct: t2.Tags = append(t2.Tags, dt.Value) case *Union: t2.Tags = append(t2.Tags, dt.Value) } } else { return nil, fmt.Errorf("type %s: decl tag for type %s has invalid component idx", dt, t) } case *Func: fp, ok := t.Type.(*FuncProto) if !ok { return nil, fmt.Errorf("type %s: %s is not a FuncProto", dt, t.Type) } // Ensure the number of argument tag lists equals the number of arguments if len(t.ParamTags) == 0 { t.ParamTags = make([][]string, len(fp.Params)) } if dt.Index >= 0 { if dt.Index >= len(fp.Params) { return nil, fmt.Errorf("type %s: component idx %d exceeds params of %s", dt, dt.Index, t) } t.ParamTags[dt.Index] = append(t.ParamTags[dt.Index], dt.Value) } else if dt.Index == -1 { t.Tags = append(t.Tags, dt.Value) } else { return nil, fmt.Errorf("type %s: decl tag for type %s has invalid component idx", dt, t) } default: return nil, fmt.Errorf("type %s: decl tag for type %s is not supported", dt, t) } } return typ, nil } // An index from string to TypeID. // // Fuzzy because it may return false positive matches. type fuzzyStringIndex struct { seed maphash.Seed entries []fuzzyStringIndexEntry } func newFuzzyStringIndex(capacity int) *fuzzyStringIndex { return &fuzzyStringIndex{ maphash.MakeSeed(), make([]fuzzyStringIndexEntry, 0, capacity), } } // Add a string to the index. // // Calling the method with identical arguments will create duplicate entries. func (idx *fuzzyStringIndex) Add(name []byte, id TypeID) { hash := uint32(maphash.Bytes(idx.seed, name)) idx.entries = append(idx.entries, newFuzzyStringIndexEntry(hash, id)) } // Build the index. // // Must be called after [Add] and before [Match]. func (idx *fuzzyStringIndex) Build() { slices.Sort(idx.entries) } // Find TypeIDs which may match the name. // // May return false positives, but is guaranteed to not have false negatives. // // You must call [Build] at least once before calling this method. func (idx *fuzzyStringIndex) Find(name string) iter.Seq[TypeID] { return func(yield func(TypeID) bool) { hash := uint32(maphash.String(idx.seed, name)) // We match only on the first 32 bits here, so ignore found. i, _ := slices.BinarySearch(idx.entries, fuzzyStringIndexEntry(hash)<<32) for i := i; i < len(idx.entries); i++ { if idx.entries[i].hash() != hash { break } if !yield(idx.entries[i].id()) { return } } } } // Tuple mapping the hash of an essential name to a type. // // Encoded in an uint64 so that it implements cmp.Ordered. type fuzzyStringIndexEntry uint64 func newFuzzyStringIndexEntry(hash uint32, id TypeID) fuzzyStringIndexEntry { return fuzzyStringIndexEntry(hash)<<32 | fuzzyStringIndexEntry(id) } func (e fuzzyStringIndexEntry) hash() uint32 { return uint32(e >> 32) } func (e fuzzyStringIndexEntry) id() TypeID { return TypeID(e) } ================================================ FILE: btf/unmarshal_test.go ================================================ package btf import ( "iter" "math" "testing" "github.com/go-quicktest/qt" ) func TestFuzzyStringIndex(t *testing.T) { idx := newFuzzyStringIndex(10) count := testing.AllocsPerRun(1, func() { idx.Add([]byte("foo"), 1) }) qt.Assert(t, qt.Equals(count, 0)) idx.entries = idx.entries[:0] idx.Add([]byte("foo"), 1) idx.Add([]byte("bar"), 2) idx.Add([]byte("baz"), 3) idx.Build() all := func(it iter.Seq[TypeID]) (ids []TypeID) { for id := range it { ids = append(ids, id) } return } qt.Assert(t, qt.SliceContains(all(idx.Find("foo")), 1)) qt.Assert(t, qt.SliceContains(all(idx.Find("bar")), 2)) qt.Assert(t, qt.SliceContains(all(idx.Find("baz")), 3)) qt.Assert(t, qt.IsTrue(newFuzzyStringIndexEntry(0, math.MaxUint32) < newFuzzyStringIndexEntry(1, 0))) } ================================================ FILE: btf/workarounds.go ================================================ package btf // datasecResolveWorkaround ensures that certain vars in a Datasec are added // to a Spec before the Datasec. This avoids a bug in kernel BTF validation. // // See https://lore.kernel.org/bpf/20230302123440.1193507-1-lmb@isovalent.com/ func datasecResolveWorkaround(b *Builder, ds *Datasec) error { for _, vsi := range ds.Vars { v, ok := vsi.Type.(*Var) if !ok { continue } switch v.Type.(type) { case *Typedef, *Volatile, *Const, *Restrict, *TypeTag: // NB: We must never call Add on a Datasec, otherwise we risk // infinite recursion. _, err := b.Add(v.Type) if err != nil { return err } } } return nil } ================================================ FILE: btf/workarounds_test.go ================================================ package btf import ( "errors" "fmt" "testing" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestDatasecResolveWorkaround(t *testing.T) { testutils.SkipOnOldKernel(t, "5.2", "BTF_KIND_DATASEC") i := &Int{Size: 1} for _, typ := range []Type{ &Typedef{"foo", i, nil}, &Volatile{i}, &Const{i}, &Restrict{i}, &TypeTag{i, "foo"}, } { t.Run(fmt.Sprint(typ), func(t *testing.T) { if _, ok := typ.(*TypeTag); ok { testutils.SkipOnOldKernel(t, "5.17", "BTF_KIND_TYPE_TAG") } ds := &Datasec{ Name: "a", Size: 2, Vars: []VarSecinfo{ { Size: 1, Offset: 0, // struct, union, pointer, array will trigger the bug. Type: &Var{Name: "a", Type: &Pointer{i}}, }, { Size: 1, Offset: 1, Type: &Var{ Name: "b", Type: typ, }, }, }, } b, err := NewBuilder([]Type{ds}, nil) if err != nil { t.Fatal(err) } h, err := NewHandle(b) testutils.SkipIfNotSupportedOnOS(t, err) var ve *internal.VerifierError if errors.As(err, &ve) { t.Fatalf("%+v\n", ve) } if err != nil { t.Fatal(err) } h.Close() }) } } func TestEmptyBTFWithStringTableWorkaround(t *testing.T) { var b Builder _, err := b.addString("foo") qt.Assert(t, qt.IsNil(err)) h, err := NewHandle(&b) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(h.Close())) } ================================================ FILE: cmd/bpf2go/README.md ================================================ bpf2go === `bpf2go` compiles a C source file into eBPF bytecode and then emits a Go file containing the eBPF. The goal is to avoid loading the eBPF from disk at runtime and to minimise the amount of manual work required to interact with eBPF programs. It takes inspiration from `bpftool gen skeleton`. Add `bpf2go` as a tool dependency in your project's Go module: go get -tool github.com/cilium/ebpf/cmd/bpf2go Invoke the tool using go generate: //go:generate go tool bpf2go foo path/to/src.c -- -I/path/to/include This will emit `foo_bpfel.go` and `foo_bpfeb.go`, with types using `foo` as a stem. The two files contain compiled BPF for little and big endian systems, respectively. ## Environment Variables You can use environment variables to affect all bpf2go invocations across a project, e.g. to set specific C flags: BPF2GO_CFLAGS="-O2 -g -Wall -Werror $(CFLAGS)" go generate ./... Alternatively, by exporting `$BPF2GO_CFLAGS` from your build system, you can control all builds from a single location. Most bpf2go arguments can be controlled this way. See `bpf2go -h` for an up-to-date list. ## Generated types `bpf2go` generates Go types for all map keys and values by default. You can disable this behaviour using `-no-global-types`. You can add to the set of types by specifying `-type foo` for each type you'd like to generate. ## Examples See [examples/kprobe](../../examples/kprobe/main.go) for a fully worked out example. ================================================ FILE: cmd/bpf2go/doc.go ================================================ //go:build !windows // Program bpf2go embeds eBPF in Go. // // Please see the README for details how to use it. package main ================================================ FILE: cmd/bpf2go/flags.go ================================================ //go:build !windows package main import ( "flag" "go/build/constraint" ) // buildTags is a comma-separated list of build tags. // // This follows the pre-Go 1.17 syntax and is kept for compatibility reasons. type buildTags struct { Expr constraint.Expr } var _ flag.Value = (*buildTags)(nil) func (bt *buildTags) String() string { if bt.Expr == nil { return "" } return (bt.Expr).String() } func (bt *buildTags) Set(value string) error { ct, err := constraint.Parse("// +build " + value) if err != nil { return err } bt.Expr = ct return nil } func andConstraints(x, y constraint.Expr) constraint.Expr { if x == nil { return y } if y == nil { return x } return &constraint.AndExpr{X: x, Y: y} } ================================================ FILE: cmd/bpf2go/gen/compile.go ================================================ //go:build !windows package gen import ( "fmt" "os" "os/exec" "path/filepath" "strings" ) type CompileArgs struct { // Which compiler to use. CC string // Command used to strip DWARF from the ELF. Strip string // Flags to pass to the compiler. This may contain positional arguments as well. Flags []string // Absolute working directory Workdir string // Absolute input file name Source string // Absolute output file name Dest string // Target to compile for, defaults to compiling generic BPF in host endianness. Target Target DisableStripping bool } func insertDefaultFlags(flags []string) []string { // Default cflags that can be overridden by the user. overrideFlags := []string{ // Code needs to be optimized, otherwise the verifier will often fail // to understand it. "-O2", // Clang defaults to mcpu=probe which checks the kernel that we are // compiling on. This isn't appropriate for ahead of time // compiled code so force the most compatible version. "-mcpu=v1", } insert := 0 // Find the first non-positional argument to support CC commands with // multiple components. E.g.: BPF2GO_CC="ccache clang" ... for ; insert < len(flags); insert++ { if strings.HasPrefix(flags[insert], "-") { break } } result := append([]string(nil), flags[:insert]...) result = append(result, overrideFlags...) result = append(result, flags[insert:]...) return result } // Compile C to a BPF ELF file. func Compile(args CompileArgs) error { cmd := exec.Command(args.CC, insertDefaultFlags(args.Flags)...) cmd.Stderr = os.Stderr inputDir := filepath.Dir(args.Source) relInputDir, err := filepath.Rel(args.Workdir, inputDir) if err != nil { return err } target := args.Target if target == (Target{}) { target.clang = "bpf" } // C flags that can't be overridden. if linux := target.linux; linux != "" { cmd.Args = append(cmd.Args, "-D__TARGET_ARCH_"+linux) } cmd.Args = append(cmd.Args, "-Wunused-command-line-argument", "-target", target.clang, "-c", args.Source, "-o", args.Dest, // Don't include clang version "-fno-ident", // Don't output inputDir into debug info "-fdebug-prefix-map="+inputDir+"="+relInputDir, "-fdebug-compilation-dir", ".", // We always want BTF to be generated, so enforce debug symbols "-g", fmt.Sprintf("-D__BPF_TARGET_MISSING=%q", "GCC error \"The eBPF is using target specific macros, please provide -target that is not bpf, bpfel or bpfeb\""), ) cmd.Dir = args.Workdir if err := cmd.Run(); err != nil { return err } if args.DisableStripping { return nil } cmd = exec.Command(args.Strip, "-g", args.Dest) cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { return fmt.Errorf("strip %s: %w", args.Dest, err) } return nil } ================================================ FILE: cmd/bpf2go/gen/compile_test.go ================================================ //go:build !windows package gen import ( "bytes" "os" "path/filepath" "testing" "github.com/cilium/ebpf/internal/testutils" ) const minimalSocketFilter = `__attribute__((section("socket"), used)) int main() { return 0; }` func TestCompile(t *testing.T) { if testing.Short() { t.SkipNow() } dir := t.TempDir() mustWriteFile(t, dir, "test.c", minimalSocketFilter) err := Compile(CompileArgs{ CC: testutils.ClangBin(t), DisableStripping: true, Workdir: dir, Source: filepath.Join(dir, "test.c"), Dest: filepath.Join(dir, "test.o"), }) if err != nil { t.Fatal("Can't compile:", err) } stat, err := os.Stat(filepath.Join(dir, "test.o")) if err != nil { t.Fatal("Can't stat output:", err) } if stat.Size() == 0 { t.Error("Compilation creates an empty file") } } func TestReproducibleCompile(t *testing.T) { if testing.Short() { t.SkipNow() } clangBin := testutils.ClangBin(t) dir := t.TempDir() mustWriteFile(t, dir, "test.c", minimalSocketFilter) err := Compile(CompileArgs{ CC: clangBin, DisableStripping: true, Workdir: dir, Source: filepath.Join(dir, "test.c"), Dest: filepath.Join(dir, "a.o"), }) if err != nil { t.Fatal("Can't compile:", err) } err = Compile(CompileArgs{ CC: clangBin, DisableStripping: true, Workdir: dir, Source: filepath.Join(dir, "test.c"), Dest: filepath.Join(dir, "b.o"), }) if err != nil { t.Fatal("Can't compile:", err) } aBytes, err := os.ReadFile(filepath.Join(dir, "a.o")) if err != nil { t.Fatal(err) } bBytes, err := os.ReadFile(filepath.Join(dir, "b.o")) if err != nil { t.Fatal(err) } if !bytes.Equal(aBytes, bBytes) { t.Error("Compiling the same file twice doesn't give the same result") } } func TestTriggerMissingTarget(t *testing.T) { if testing.Short() { t.SkipNow() } dir := t.TempDir() mustWriteFile(t, dir, "test.c", `_Pragma(__BPF_TARGET_MISSING);`) err := Compile(CompileArgs{ CC: testutils.ClangBin(t), Workdir: dir, Source: filepath.Join(dir, "test.c"), Dest: filepath.Join(dir, "a.o"), }) if err == nil { t.Fatal("No error when compiling __BPF_TARGET_MISSING") } } func mustWriteFile(tb testing.TB, dir, name, contents string) { tb.Helper() tmpFile := filepath.Join(dir, name) if err := os.WriteFile(tmpFile, []byte(contents), 0660); err != nil { tb.Fatal(err) } } ================================================ FILE: cmd/bpf2go/gen/doc.go ================================================ // Package gen contains utilities to generate Go bindings for eBPF ELF files. package gen ================================================ FILE: cmd/bpf2go/gen/output.go ================================================ //go:build !windows package gen import ( "bytes" _ "embed" "fmt" "go/build/constraint" "go/token" "io" "sort" "strings" "text/template" "unicode" "unicode/utf8" "github.com/cilium/ebpf/btf" b2gInt "github.com/cilium/ebpf/cmd/bpf2go/internal" "github.com/cilium/ebpf/internal" ) //go:embed output.tpl var commonRaw string var commonTemplate = template.Must(template.New("common").Parse(commonRaw)) type templateName string func (n templateName) maybeExport(str string) string { if token.IsExported(string(n)) { return toUpperFirst(str) } return str } func (n templateName) Bytes() string { return "_" + toUpperFirst(string(n)) + "Bytes" } func (n templateName) Specs() string { return string(n) + "Specs" } func (n templateName) ProgramSpecs() string { return string(n) + "ProgramSpecs" } func (n templateName) MapSpecs() string { return string(n) + "MapSpecs" } func (n templateName) VariableSpecs() string { return string(n) + "VariableSpecs" } func (n templateName) Load() string { return n.maybeExport("load" + toUpperFirst(string(n))) } func (n templateName) LoadObjects() string { return n.maybeExport("load" + toUpperFirst(string(n)) + "Objects") } func (n templateName) Objects() string { return string(n) + "Objects" } func (n templateName) Maps() string { return string(n) + "Maps" } func (n templateName) Variables() string { return string(n) + "Variables" } func (n templateName) Programs() string { return string(n) + "Programs" } func (n templateName) CloseHelper() string { return "_" + toUpperFirst(string(n)) + "Close" } type GenerateArgs struct { // Package of the resulting file. Package string // The prefix of all names declared at the top-level. Stem string // Build Constraints included in the resulting file. Constraints constraint.Expr // Maps to be emitted. Maps []string // Variables to be emitted. Variables []string // Programs to be emitted. Programs []string // Types to be emitted. Types []btf.Type // Filename of the object to embed. ObjectFile string // Output to write template to. Output io.Writer // Function which transforms the input into a valid go identifier. Uses the default behaviour if nil Identifier func(string) string } // Generate bindings for a BPF ELF file. func Generate(args GenerateArgs) error { if args.Identifier == nil { args.Identifier = internal.Identifier } if !token.IsIdentifier(args.Stem) { return fmt.Errorf("%q is not a valid identifier", args.Stem) } if strings.ContainsAny(args.ObjectFile, "\n") { // Prevent injecting newlines into the template. return fmt.Errorf("file %q contains an invalid character", args.ObjectFile) } for _, typ := range args.Types { if _, ok := btf.As[*btf.Datasec](typ); ok { // Avoid emitting .rodata, .bss, etc. for now. We might want to // name these types differently, etc. return fmt.Errorf("can't output btf.Datasec: %s", typ) } } maps := make(map[string]string) for _, name := range args.Maps { maps[name] = args.Identifier(name) } variables := make(map[string]string) for _, name := range args.Variables { variables[name] = args.Identifier(name) } programs := make(map[string]string) for _, name := range args.Programs { programs[name] = args.Identifier(name) } typeNames := make(map[btf.Type]string) for _, typ := range args.Types { // NB: This also deduplicates types. typeNames[typ] = args.Stem + args.Identifier(typ.TypeName()) } // Ensure we don't have conflicting names and generate a sorted list of // named types so that the output is stable. types, err := sortTypes(typeNames) if err != nil { return err } gf := &btf.GoFormatter{ Names: typeNames, Identifier: args.Identifier, } var typeDecls []string needsStructsPkg := false for _, typ := range types { name := typeNames[typ] decl, err := gf.TypeDeclaration(name, typ) if err != nil { return fmt.Errorf("generating %s: %w", name, err) } _, ok := btf.As[*btf.Struct](typ) needsStructsPkg = needsStructsPkg || ok typeDecls = append(typeDecls, decl) } ctx := struct { Module string Package string Constraints constraint.Expr Name templateName Maps map[string]string Variables map[string]string Programs map[string]string TypeDeclarations []string File string NeedsStructsPkg bool }{ b2gInt.CurrentModule, args.Package, args.Constraints, templateName(args.Stem), maps, variables, programs, typeDecls, args.ObjectFile, needsStructsPkg, } var buf bytes.Buffer if err := commonTemplate.Execute(&buf, &ctx); err != nil { return fmt.Errorf("can't generate types: %s", err) } return internal.WriteFormatted(buf.Bytes(), args.Output) } // sortTypes returns a list of types sorted by their (generated) Go type name. // // Duplicate Go type names are rejected. func sortTypes(typeNames map[btf.Type]string) ([]btf.Type, error) { var types []btf.Type var names []string for typ, name := range typeNames { i := sort.SearchStrings(names, name) if i >= len(names) { types = append(types, typ) names = append(names, name) continue } if names[i] == name { return nil, fmt.Errorf("type name %q is used multiple times", name) } types = append(types[:i], append([]btf.Type{typ}, types[i:]...)...) names = append(names[:i], append([]string{name}, names[i:]...)...) } return types, nil } func toUpperFirst(str string) string { first, n := utf8.DecodeRuneInString(str) return string(unicode.ToUpper(first)) + str[n:] } ================================================ FILE: cmd/bpf2go/gen/output.tpl ================================================ // Code generated by bpf2go; DO NOT EDIT. {{ with .Constraints }}//go:build {{ . }}{{ end }} package {{ .Package }} import ( "bytes" _ "embed" "fmt" "io" {{- if .NeedsStructsPkg }} "structs" {{- end }} "{{ .Module }}" ) {{- if .TypeDeclarations }} {{- range $type := .TypeDeclarations }} {{ $type }} {{ end }} {{- end }} // {{ .Name.Load }} returns the embedded CollectionSpec for {{ .Name }}. func {{ .Name.Load }}() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader({{ .Name.Bytes }}) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load {{ .Name }}: %w", err) } return spec, err } // {{ .Name.LoadObjects }} loads {{ .Name }} and converts it into a struct. // // The following types are suitable as obj argument: // // *{{ .Name.Objects }} // *{{ .Name.Programs }} // *{{ .Name.Maps }} // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func {{ .Name.LoadObjects }}(obj interface{}, opts *ebpf.CollectionOptions) (error) { spec, err := {{ .Name.Load }}() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // {{ .Name.Specs }} contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type {{ .Name.Specs }} struct { {{ .Name.ProgramSpecs }} {{ .Name.MapSpecs }} {{ .Name.VariableSpecs }} } // {{ .Name.ProgramSpecs }} contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type {{ .Name.ProgramSpecs }} struct { {{- range $name, $id := .Programs }} {{ $id }} *ebpf.ProgramSpec `ebpf:"{{ $name }}"` {{- end }} } // {{ .Name.MapSpecs }} contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type {{ .Name.MapSpecs }} struct { {{- range $name, $id := .Maps }} {{ $id }} *ebpf.MapSpec `ebpf:"{{ $name }}"` {{- end }} } // {{ .Name.VariableSpecs }} contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type {{ .Name.VariableSpecs }} struct { {{- range $name, $id := .Variables }} {{ $id }} *ebpf.VariableSpec `ebpf:"{{ $name }}"` {{- end }} } // {{ .Name.Objects }} contains all objects after they have been loaded into the kernel. // // It can be passed to {{ .Name.LoadObjects }} or ebpf.CollectionSpec.LoadAndAssign. type {{ .Name.Objects }} struct { {{ .Name.Programs }} {{ .Name.Maps }} {{ .Name.Variables }} } func (o *{{ .Name.Objects }}) Close() error { return {{ .Name.CloseHelper }}( &o.{{ .Name.Programs }}, &o.{{ .Name.Maps }}, ) } // {{ .Name.Maps }} contains all maps after they have been loaded into the kernel. // // It can be passed to {{ .Name.LoadObjects }} or ebpf.CollectionSpec.LoadAndAssign. type {{ .Name.Maps }} struct { {{- range $name, $id := .Maps }} {{ $id }} *ebpf.Map `ebpf:"{{ $name }}"` {{- end }} } func (m *{{ .Name.Maps }}) Close() error { return {{ .Name.CloseHelper }}( {{- range $id := .Maps }} m.{{ $id }}, {{- end }} ) } // {{ .Name.Variables }} contains all global variables after they have been loaded into the kernel. // // It can be passed to {{ .Name.LoadObjects }} or ebpf.CollectionSpec.LoadAndAssign. type {{ .Name.Variables }} struct { {{- range $name, $id := .Variables }} {{ $id }} *ebpf.Variable `ebpf:"{{ $name }}"` {{- end }} } // {{ .Name.Programs }} contains all programs after they have been loaded into the kernel. // // It can be passed to {{ .Name.LoadObjects }} or ebpf.CollectionSpec.LoadAndAssign. type {{ .Name.Programs }} struct { {{- range $name, $id := .Programs }} {{ $id }} *ebpf.Program `ebpf:"{{ $name }}"` {{- end }} } func (p *{{ .Name.Programs }}) Close() error { return {{ .Name.CloseHelper }}( {{- range $id := .Programs }} p.{{ $id }}, {{- end }} ) } func {{ .Name.CloseHelper }}(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. //go:embed {{ .File }} var {{ .Name.Bytes }} []byte ================================================ FILE: cmd/bpf2go/gen/output_test.go ================================================ //go:build !windows package gen import ( "bytes" "fmt" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/cmd/bpf2go/internal" ) func TestOrderTypes(t *testing.T) { a := &btf.Int{} b := &btf.Int{} c := &btf.Int{} for _, test := range []struct { name string in map[btf.Type]string out []btf.Type }{ { "order", map[btf.Type]string{ a: "foo", b: "bar", c: "baz", }, []btf.Type{b, c, a}, }, } { t.Run(test.name, func(t *testing.T) { result, err := sortTypes(test.in) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(len(result), len(test.out))) for i, o := range test.out { if result[i] != o { t.Fatalf("Index %d: expected %p got %p", i, o, result[i]) } } }) } for _, test := range []struct { name string in map[btf.Type]string }{ { "duplicate names", map[btf.Type]string{ a: "foo", b: "foo", }, }, } { t.Run(test.name, func(t *testing.T) { result, err := sortTypes(test.in) qt.Assert(t, qt.IsNotNil(err)) qt.Assert(t, qt.IsNil(result)) }) } } func TestPackageImport(t *testing.T) { var buf bytes.Buffer err := Generate(GenerateArgs{ Package: "foo", Stem: "bar", ObjectFile: "frob.o", Output: &buf, }) qt.Assert(t, qt.IsNil(err)) // NB: It'd be great to test that this is the case for callers outside of // this module, but that is kind of tricky. qt.Assert(t, qt.StringContains(buf.String(), fmt.Sprintf(`"%s"`, internal.CurrentModule))) } func TestCustomIdentifier(t *testing.T) { var buf bytes.Buffer args := GenerateArgs{ Package: "foo", Stem: "bar", ObjectFile: "frob.o", Output: &buf, Programs: []string{"do_thing"}, Identifier: strings.ToUpper, } err := Generate(args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.StringContains(buf.String(), "DO_THING")) } func TestObjects(t *testing.T) { var buf bytes.Buffer args := GenerateArgs{ Package: "foo", Stem: "bar", Maps: []string{"map1"}, Variables: []string{"var_1"}, Programs: []string{"prog_foo_1"}, Output: &buf, } err := Generate(args) qt.Assert(t, qt.IsNil(err)) str := buf.String() qt.Assert(t, qt.StringContains(str, "Map1 *ebpf.MapSpec `ebpf:\"map1\"`")) qt.Assert(t, qt.StringContains(str, "Var1 *ebpf.VariableSpec `ebpf:\"var_1\"`")) qt.Assert(t, qt.StringContains(str, "ProgFoo1 *ebpf.ProgramSpec `ebpf:\"prog_foo_1\"`")) qt.Assert(t, qt.StringContains(str, "Map1 *ebpf.Map `ebpf:\"map1\"`")) qt.Assert(t, qt.StringContains(str, "Var1 *ebpf.Variable `ebpf:\"var_1\"`")) qt.Assert(t, qt.StringContains(str, "ProgFoo1 *ebpf.Program `ebpf:\"prog_foo_1\"`")) } func TestGenerateStructTypes(t *testing.T) { ts := &btf.Struct{ Name: "test_struct", Size: 8, Members: []btf.Member{ { Name: "field1", Type: &btf.Int{Size: 8, Encoding: btf.Unsigned}, Offset: 0, }, }, } td := &btf.Typedef{ Name: "test_typedef", Type: ts, } tests := []struct { name string types []btf.Type expected string }{ { name: "simple struct", types: []btf.Type{ts}, expected: "type stemTestStruct struct {\n\t_ structs.HostLayout\n\tField1 uint64\n}", }, { name: "typedef struct", types: []btf.Type{td}, expected: "type stemTestTypedef struct {\n\t_ structs.HostLayout\n\tField1 uint64\n}", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var buf bytes.Buffer err := Generate(GenerateArgs{ Package: "test", Stem: "stem", Types: tt.types, Output: &buf, Constraints: nil, }) qt.Assert(t, qt.IsNil(err)) str := buf.String() qt.Assert(t, qt.StringContains(str, tt.expected)) qt.Assert(t, qt.StringContains(str, "\"structs\"")) }) } } ================================================ FILE: cmd/bpf2go/gen/target.go ================================================ //go:build !windows package gen import ( "errors" "fmt" "go/build/constraint" "maps" "runtime" "slices" ) var ErrInvalidTarget = errors.New("unsupported target") var targetsByGoArch = map[GoArch]Target{ "386": {"bpfel", "x86", ""}, "amd64": {"bpfel", "x86", ""}, "arm": {"bpfel", "arm", ""}, "arm64": {"bpfel", "arm64", ""}, "loong64": {"bpfel", "loongarch", ""}, "mips": {"bpfeb", "mips", ""}, "mipsle": {"bpfel", "", ""}, "mips64": {"bpfeb", "", ""}, "mips64le": {"bpfel", "", ""}, "ppc64": {"bpfeb", "powerpc", ""}, "ppc64le": {"bpfel", "powerpc", ""}, "riscv64": {"bpfel", "riscv", ""}, "s390x": {"bpfeb", "s390", ""}, "wasm": {"bpfel", "", "js"}, } type Target struct { // Clang arch string, used to define the clang -target flag, as per // "clang -print-targets". clang string // Linux arch string, used to define __TARGET_ARCH_xzy macros used by // https://github.com/libbpf/libbpf/blob/master/src/bpf_tracing.h linux string // GOOS override for use during tests. goos string } // TargetsByGoArch returns all supported targets. func TargetsByGoArch() map[GoArch]Target { return maps.Clone(targetsByGoArch) } // IsGeneric returns true if the target will compile to generic BPF. func (tgt *Target) IsGeneric() bool { return tgt.linux == "" } // Suffix returns a a string suitable for appending to a file name to // identify the target. func (tgt *Target) Suffix() string { // The output filename must not match any of the following patterns: // // *_GOOS // *_GOARCH // *_GOOS_GOARCH // // Otherwise it is interpreted as a build constraint by the Go toolchain. stem := tgt.clang if tgt.linux != "" { stem = fmt.Sprintf("%s_%s", tgt.linux, tgt.clang) } return stem } // ObsoleteSuffix returns an obsolete suffix for a subset of targets. // // It's used to work around an old bug and should not be used in new code. func (tgt *Target) ObsoleteSuffix() string { if tgt.linux == "" { return "" } return fmt.Sprintf("%s_%s", tgt.clang, tgt.linux) } // GoArch is a Go arch string. // // See https://go.dev/doc/install/source#environment for valid GOARCHes when // GOOS=linux. type GoArch string type GoArches []GoArch // Constraints is satisfied when GOARCH is any of the arches. func (arches GoArches) Constraint() constraint.Expr { var archConstraint constraint.Expr for _, goarch := range arches { tag := &constraint.TagExpr{Tag: string(goarch)} archConstraint = orConstraints(archConstraint, tag) } return archConstraint } // FindTarget turns a list of identifiers into targets and their respective // GoArches. // // The following are valid identifiers: // // - bpf: compile generic BPF for host endianness // - bpfel: compile generic BPF for little endian // - bpfeb: compile generic BPF for big endian // - native: compile BPF for host target // - $GOARCH: compile BPF for $GOARCH target // // Generic BPF can run on any target goarch with the correct endianness, // but doesn't have access to some arch specific tracing functionality. func FindTarget(id string) (Target, GoArches, error) { switch id { case "bpf", "bpfel", "bpfeb": var goarches []GoArch for arch, archTarget := range targetsByGoArch { if archTarget.clang == id { // Include tags for all goarches that have the same endianness. goarches = append(goarches, arch) } } slices.Sort(goarches) return Target{id, "", ""}, goarches, nil case "native": id = runtime.GOARCH fallthrough default: archTarget, ok := targetsByGoArch[GoArch(id)] if !ok || archTarget.linux == "" { return Target{}, nil, fmt.Errorf("%q: %w", id, ErrInvalidTarget) } var goarches []GoArch for goarch, lt := range targetsByGoArch { if lt == archTarget { // Include tags for all goarches that have the same // target. goarches = append(goarches, goarch) } } slices.Sort(goarches) return archTarget, goarches, nil } } func orConstraints(x, y constraint.Expr) constraint.Expr { if x == nil { return y } if y == nil { return x } return &constraint.OrExpr{X: x, Y: y} } ================================================ FILE: cmd/bpf2go/gen/target_test.go ================================================ //go:build !windows package gen import ( "errors" "os/exec" "slices" "testing" "github.com/go-quicktest/qt" ) func TestCollectTargets(t *testing.T) { clangArches := make(map[string][]GoArch) linuxArchesLE := make(map[string][]GoArch) linuxArchesBE := make(map[string][]GoArch) for arch, archTarget := range targetsByGoArch { clangArches[archTarget.clang] = append(clangArches[archTarget.clang], arch) if archTarget.clang == "bpfel" { linuxArchesLE[archTarget.linux] = append(linuxArchesLE[archTarget.linux], arch) continue } linuxArchesBE[archTarget.linux] = append(linuxArchesBE[archTarget.linux], arch) } for i := range clangArches { slices.Sort(clangArches[i]) } for i := range linuxArchesLE { slices.Sort(linuxArchesLE[i]) } for i := range linuxArchesBE { slices.Sort(linuxArchesBE[i]) } nativeTarget, nativeArches, err := FindTarget("native") qt.Assert(t, qt.IsNil(err)) tests := []struct { short string target Target arches GoArches }{ { "bpf", Target{"bpf", "", ""}, nil, }, { "bpfel", Target{"bpfel", "", ""}, clangArches["bpfel"], }, { "bpfeb", Target{"bpfeb", "", ""}, clangArches["bpfeb"], }, { "amd64", Target{"bpfel", "x86", ""}, linuxArchesLE["x86"], }, { "386", Target{"bpfel", "x86", ""}, linuxArchesLE["x86"], }, { "ppc64", Target{"bpfeb", "powerpc", ""}, linuxArchesBE["powerpc"], }, { "native", nativeTarget, nativeArches, }, } for _, test := range tests { t.Run(test.short, func(t *testing.T) { target, arches, err := FindTarget(test.short) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(target, test.target)) qt.Assert(t, qt.DeepEquals(arches, test.arches)) }) } } func TestCollectTargetsErrors(t *testing.T) { tests := []struct { name string target string }{ {"unknown", "frood"}, {"no linux target", "mipsle"}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { _, _, err := FindTarget(test.target) if err == nil { t.Fatal("Function did not return an error") } t.Log("Error message:", err) }) } } func TestGoarches(t *testing.T) { exe := goBin(t) for GoArch, tgt := range targetsByGoArch { t.Run(string(GoArch), func(t *testing.T) { goOS := "linux" if tgt.goos != "" { goOS = tgt.goos } goEnv := exec.Command(exe, "env") goEnv.Env = []string{"GOROOT=/", "GOOS=" + string(goOS), "GOARCH=" + string(GoArch)} output, err := goEnv.CombinedOutput() qt.Assert(t, qt.IsNil(err), qt.Commentf("go output is:\n%s", string(output))) }) } } func TestClangTargets(t *testing.T) { exe := goBin(t) clangTargets := map[string]struct{}{} for _, tgt := range targetsByGoArch { clangTargets[tgt.clang] = struct{}{} } for target := range clangTargets { for _, env := range []string{"GOOS", "GOARCH"} { env += "=" + target t.Run(env, func(t *testing.T) { goEnv := exec.Command(exe, "env") goEnv.Env = []string{"GOROOT=/", env} output, err := goEnv.CombinedOutput() t.Log("go output is:", string(output)) qt.Assert(t, qt.IsNotNil(err), qt.Commentf("No clang target should be a valid build constraint")) }) } } } func goBin(t *testing.T) string { t.Helper() exe, err := exec.LookPath("go") if errors.Is(err, exec.ErrNotFound) { t.Skip("go binary is not in PATH") } qt.Assert(t, qt.IsNil(err)) return exe } ================================================ FILE: cmd/bpf2go/gen/types.go ================================================ //go:build !windows package gen import ( "cmp" "slices" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" ) // CollectGlobalTypes finds all types which are used in the global scope. // // This currently includes the types of variables, map keys and values. func CollectGlobalTypes(spec *ebpf.CollectionSpec) []btf.Type { var types []btf.Type types = collectMapTypes(types, spec.Maps) types = collectVariableTypes(types, spec.Variables) slices.SortStableFunc(types, func(a, b btf.Type) int { return cmp.Compare(a.TypeName(), b.TypeName()) }) return types } // collectMapTypes collects all types used by MapSpecs. func collectMapTypes(types []btf.Type, maps map[string]*ebpf.MapSpec) []btf.Type { for _, m := range maps { if m.Key != nil && m.Key.TypeName() != "" { types = addType(types, m.Key) } if m.Value != nil && m.Value.TypeName() != "" { types = addType(types, m.Value) } } return types } // collectVariableTypes collects all types used by VariableSpecs. func collectVariableTypes(types []btf.Type, vars map[string]*ebpf.VariableSpec) []btf.Type { for _, vs := range vars { types = addType(types, vs.Type.Type) } return types } // addType adds a type to types if not already present. Types that don't need to // be generated are not added to types. func addType(types []btf.Type, incoming btf.Type) []btf.Type { incoming = selectType(incoming) if incoming == nil { return types } // Strip only the qualifiers (not typedefs) from the incoming type. Retain // typedefs since they carry the name of the anonymous type they point to, // without which we can't generate a named Go type. incoming = btf.QualifiedType(incoming) if incoming.TypeName() == "" { return types } exists := func(existing btf.Type) bool { return existing.TypeName() == incoming.TypeName() } if !slices.ContainsFunc(types, exists) { types = append(types, incoming) } return types } func selectType(t btf.Type) btf.Type { // Obtain a concrete type with qualifiers and typedefs stripped. switch ut := btf.UnderlyingType(t).(type) { case *btf.Struct, *btf.Union, *btf.Enum: return t // Collect the array's element type. Note: qualifiers on array-type variables // typically appear after the array, e.g. a const volatile int[4] is actually // an array of const volatile ints. case *btf.Array: return selectType(ut.Type) } return nil } ================================================ FILE: cmd/bpf2go/gen/types_test.go ================================================ //go:build !windows package gen import ( "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" "github.com/google/go-cmp/cmp" ) func mustAnyTypeByName(t *testing.T, spec *ebpf.CollectionSpec, name string) btf.Type { t.Helper() typ, err := spec.Types.AnyTypeByName(name) qt.Assert(t, qt.IsNil(err)) return typ } func TestCollectGlobalTypes(t *testing.T) { spec, err := ebpf.LoadCollectionSpec(testutils.NativeFile(t, "../testdata/minimal-%s.elf")) if err != nil { t.Fatal(err) } bar := mustAnyTypeByName(t, spec, "bar") barfoo := mustAnyTypeByName(t, spec, "barfoo") baz := mustAnyTypeByName(t, spec, "baz") e := mustAnyTypeByName(t, spec, "e") ubar := mustAnyTypeByName(t, spec, "ubar") got := CollectGlobalTypes(spec) qt.Assert(t, qt.IsNil(err)) want := []btf.Type{bar, barfoo, baz, e, ubar} qt.Assert(t, qt.CmpEquals(got, want, cmp.Comparer(func(a, b btf.Type) bool { return a.TypeName() == b.TypeName() }))) } ================================================ FILE: cmd/bpf2go/internal/module.go ================================================ package internal // We used to have some clever code here which relied on debug.ReadBuildInfo(). // This is broken due to https://github.com/golang/go/issues/33976, and some build // systems like bazel also do not generate the necessary data. Let's keep it // simple instead. // The module containing the code in this repository. const CurrentModule = "github.com/cilium/ebpf" ================================================ FILE: cmd/bpf2go/main.go ================================================ //go:build !windows package main import ( "errors" "flag" "fmt" "io" "os" "os/exec" "path/filepath" "regexp" "slices" "sort" "strconv" "strings" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/cmd/bpf2go/gen" ) const helpText = `Usage: %[1]s [options] [-- ] ident is used as the stem of all generated Go types and functions, and must be a valid Go identifier. source is a single C file that is compiled using the specified compiler (usually some version of clang). You can pass options to the compiler by appending them after a '--' argument or by supplying -cflags. Flags passed as arguments take precedence over flags passed via -cflags. Additionally, the program expands quotation marks in -cflags. This means that -cflags 'foo "bar baz"' is passed to the compiler as two arguments "foo" and "bar baz". The program expects GOPACKAGE to be set in the environment, and should be invoked via go generate. The generated files are written to the current directory. Some options take defaults from the environment. Variable name is mentioned next to the respective option. Options: ` func run(stdout io.Writer, args []string) (err error) { b2g, err := newB2G(stdout, args) switch { case err == nil: return b2g.convertAll() case errors.Is(err, flag.ErrHelp): return nil default: return err } } type bpf2go struct { stdout io.Writer verbose bool // Absolute path to a .c file. sourceFile string // Absolute path to a directory where .go are written outputDir string // Alternative output stem. If empty, identStem is used. outputStem string // Suffix in generated file names such as _test. outputSuffix string // Valid go package name. pkg string // Valid go identifier. identStem string // Targets to build for. targetArches map[gen.Target]gen.GoArches // C compiler. cc string // Command used to strip DWARF. strip string disableStripping bool // C flags passed to the compiler. cFlags []string skipGlobalTypes bool // C types to include in the generated output. cTypes cTypes // Build tags to be included in the output. tags buildTags // Base directory of the Makefile. Enables outputting make-style dependencies // in .d files. makeBase string } func (b2g *bpf2go) Debugln(a ...any) { if b2g.verbose { fmt.Fprintln(b2g.stdout, a...) } } func newB2G(stdout io.Writer, args []string) (*bpf2go, error) { b2g := &bpf2go{ stdout: stdout, } fs := flag.NewFlagSet("bpf2go", flag.ContinueOnError) fs.BoolVar(&b2g.verbose, "verbose", getBool("V", false), "Enable verbose logging ($V)") fs.StringVar(&b2g.cc, "cc", getEnv("BPF2GO_CC", "clang"), "`binary` used to compile C to BPF ($BPF2GO_CC)") fs.StringVar(&b2g.strip, "strip", getEnv("BPF2GO_STRIP", ""), "`binary` used to strip DWARF from compiled BPF ($BPF2GO_STRIP)") fs.BoolVar(&b2g.disableStripping, "no-strip", false, "disable stripping of DWARF") flagCFlags := fs.String("cflags", getEnv("BPF2GO_CFLAGS", ""), "flags passed to the compiler, may contain quoted arguments ($BPF2GO_CFLAGS)") fs.Var(&b2g.tags, "tags", "Comma-separated list of Go build tags to include in generated files") flagTarget := fs.String("target", "bpfel,bpfeb", "clang target(s) to compile for (comma separated)") fs.StringVar(&b2g.makeBase, "makebase", getEnv("BPF2GO_MAKEBASE", ""), "write make compatible depinfo files relative to `directory` ($BPF2GO_MAKEBASE)") fs.Var(&b2g.cTypes, "type", "`Name` of a type to generate a Go declaration for, may be repeated") fs.BoolVar(&b2g.skipGlobalTypes, "no-global-types", false, "Skip generating types for map keys and values, etc.") fs.StringVar(&b2g.outputStem, "output-stem", "", "alternative stem for names of generated files (defaults to ident)") outputSuffix := "" if strings.HasSuffix(getEnv("GOFILE", ""), "_test.go") { outputSuffix = "_test" } fs.StringVar(&b2g.outputSuffix, "output-suffix", outputSuffix, "suffix in generated file names such as _test (default based on $GOFILE)") outDir := fs.String("output-dir", "", "target directory of generated files (defaults to current directory)") outPkg := fs.String("go-package", "", "package for output go file (default as ENV GOPACKAGE)") fs.SetOutput(b2g.stdout) fs.Usage = func() { fmt.Fprintf(fs.Output(), helpText, fs.Name()) fs.PrintDefaults() fmt.Fprintln(fs.Output()) printTargets(fs.Output()) } if err := fs.Parse(args); err != nil { return nil, err } if *outDir == "" { var err error if *outDir, err = os.Getwd(); err != nil { return nil, err } } b2g.outputDir = *outDir if *outPkg == "" { *outPkg = os.Getenv(gopackageEnv) } b2g.pkg = *outPkg if b2g.pkg == "" { return nil, errors.New("missing package, you should either set the go-package flag or the GOPACKAGE env") } // Allow CC like "ccache clang" to work. ccParts := strings.Fields(b2g.cc) if len(ccParts) == 0 { return nil, errors.New("no compiler specified") } b2g.cc = ccParts[0] args, cFlags := splitCFlagsFromArgs(fs.Args()) if *flagCFlags != "" { splitCFlags, err := splitArguments(*flagCFlags) if err != nil { return nil, err } // Command line arguments take precedence over C flags // from the flag. cFlags = append(splitCFlags, cFlags...) } for _, cFlag := range cFlags { if strings.HasPrefix(cFlag, "-M") { return nil, fmt.Errorf("use -makebase instead of %q", cFlag) } } b2g.cFlags = append(ccParts[1:], cFlags[:len(cFlags):len(cFlags)]...) if len(args) < 2 { return nil, errors.New("expected at least two arguments") } b2g.identStem = args[0] sourceFile, err := filepath.Abs(args[1]) if err != nil { return nil, err } b2g.sourceFile = sourceFile if b2g.makeBase != "" { b2g.makeBase, err = filepath.Abs(b2g.makeBase) if err != nil { return nil, err } } if b2g.outputStem != "" && strings.ContainsRune(b2g.outputStem, filepath.Separator) { return nil, fmt.Errorf("-output-stem %q must not contain path separation characters", b2g.outputStem) } if strings.ContainsRune(b2g.outputSuffix, filepath.Separator) { return nil, fmt.Errorf("-output-suffix %q must not contain path separation characters", b2g.outputSuffix) } targetArches := make(map[gen.Target]gen.GoArches) for _, tgt := range strings.Split(*flagTarget, ",") { target, goarches, err := gen.FindTarget(tgt) if err != nil { if errors.Is(err, gen.ErrInvalidTarget) { printTargets(b2g.stdout) fmt.Fprintln(b2g.stdout) } return nil, err } targetArches[target] = goarches } if len(targetArches) == 0 { return nil, fmt.Errorf("no targets specified") } b2g.targetArches = targetArches // Try to find a suitable llvm-strip, possibly with a version suffix derived // from the clang binary. if b2g.strip == "" { b2g.strip = "llvm-strip" if after, ok := strings.CutPrefix(b2g.cc, "clang"); ok { b2g.strip += after } } return b2g, nil } // cTypes collects the C type names a user wants to generate Go types for. // // Names are guaranteed to be unique, and only a subset of names is accepted so // that we may extend the flag syntax in the future. type cTypes []string var _ flag.Value = (*cTypes)(nil) func (ct *cTypes) String() string { if ct == nil { return "[]" } return fmt.Sprint(*ct) } const validCTypeChars = `[a-z0-9_]` var reValidCType = regexp.MustCompile(`(?i)^` + validCTypeChars + `+$`) func (ct *cTypes) Set(value string) error { if !reValidCType.MatchString(value) { return fmt.Errorf("%q contains characters outside of %s", value, validCTypeChars) } i := sort.SearchStrings(*ct, value) if i >= len(*ct) { *ct = append(*ct, value) return nil } if (*ct)[i] == value { return fmt.Errorf("duplicate type %q", value) } *ct = append((*ct)[:i], append([]string{value}, (*ct)[i:]...)...) return nil } func getEnv(key, defaultVal string) string { if val, ok := os.LookupEnv(key); ok { return val } return defaultVal } func getBool(key string, defaultVal bool) bool { val, ok := os.LookupEnv(key) if !ok { return defaultVal } b, err := strconv.ParseBool(val) if err != nil { return defaultVal } return b } func (b2g *bpf2go) convertAll() (err error) { if _, err := os.Stat(b2g.sourceFile); os.IsNotExist(err) { return fmt.Errorf("file %s doesn't exist", b2g.sourceFile) } else if err != nil { return err } if !b2g.disableStripping { b2g.strip, err = exec.LookPath(b2g.strip) if err != nil { return err } } for target, arches := range b2g.targetArches { if err := b2g.convert(target, arches); err != nil { return err } } return nil } func (b2g *bpf2go) convert(tgt gen.Target, goarches gen.GoArches) (err error) { removeOnError := func(f *os.File) { if err != nil { os.Remove(f.Name()) } f.Close() } outputStem := b2g.outputStem if outputStem == "" { outputStem = strings.ToLower(b2g.identStem) } stem := fmt.Sprintf("%s_%s%s", outputStem, tgt.Suffix(), b2g.outputSuffix) absOutPath, err := filepath.Abs(b2g.outputDir) if err != nil { return err } objFileName := filepath.Join(absOutPath, stem+".o") cwd, err := os.Getwd() if err != nil { return err } archConstraint := goarches.Constraint() constraints := andConstraints(archConstraint, b2g.tags.Expr) if err := b2g.removeOldOutputFiles(outputStem, tgt); err != nil { return fmt.Errorf("remove obsolete output: %w", err) } var depInput *os.File cFlags := slices.Clone(b2g.cFlags) if b2g.makeBase != "" { depInput, err = os.CreateTemp("", "bpf2go") if err != nil { return err } defer depInput.Close() defer os.Remove(depInput.Name()) cFlags = append(cFlags, // Output dependency information. "-MD", // Create phony targets so that deleting a dependency doesn't // break the build. "-MP", // Write it to temporary file "-MF"+depInput.Name(), ) } err = gen.Compile(gen.CompileArgs{ CC: b2g.cc, Strip: b2g.strip, DisableStripping: b2g.disableStripping, Flags: cFlags, Target: tgt, Workdir: cwd, Source: b2g.sourceFile, Dest: objFileName, }) if err != nil { return fmt.Errorf("compile: %w", err) } if b2g.disableStripping { b2g.Debugln("Compiled object", "file", objFileName) } else { b2g.Debugln("Compiled and stripped object", "file", objFileName) } spec, err := ebpf.LoadCollectionSpec(objFileName) if err != nil { return fmt.Errorf("can't load BPF from ELF: %s", err) } var maps []string for name := range spec.Maps { // Skip .rodata, .data, .bss, etc. sections if !strings.HasPrefix(name, ".") { maps = append(maps, name) } } var variables []string for name := range spec.Variables { variables = append(variables, name) } var programs []string for name := range spec.Programs { programs = append(programs, name) } types, err := collectCTypes(spec.Types, b2g.cTypes) if err != nil { return fmt.Errorf("collect C types: %w", err) } if !b2g.skipGlobalTypes { types = append(types, gen.CollectGlobalTypes(spec)...) } // Write out generated go goFileName := filepath.Join(absOutPath, stem+".go") goFile, err := os.Create(goFileName) if err != nil { return err } defer removeOnError(goFile) err = gen.Generate(gen.GenerateArgs{ Package: b2g.pkg, Stem: b2g.identStem, Constraints: constraints, Maps: maps, Variables: variables, Programs: programs, Types: types, ObjectFile: filepath.Base(objFileName), Output: goFile, }) if err != nil { return fmt.Errorf("can't write %s: %s", goFileName, err) } b2g.Debugln("Generated bpf2go binding", "file", goFileName) if b2g.makeBase == "" { return } deps, err := parseDependencies(cwd, depInput) if err != nil { return fmt.Errorf("can't read dependency information: %s", err) } depFileName := goFileName + ".d" depOutput, err := os.Create(depFileName) if err != nil { return fmt.Errorf("write make dependencies: %w", err) } defer depOutput.Close() // There is always at least a dependency for the main file. deps[0].file = goFileName if err := adjustDependencies(depOutput, b2g.makeBase, deps); err != nil { return fmt.Errorf("can't adjust dependency information: %s", err) } b2g.Debugln("Wrote dependency", "file", depFileName) return nil } // removeOldOutputFiles removes output files generated by an old naming scheme. // // In the old scheme some linux targets were interpreted as build constraints // by the go toolchain. func (b2g *bpf2go) removeOldOutputFiles(outputStem string, tgt gen.Target) error { suffix := tgt.ObsoleteSuffix() if suffix == "" { return nil } stem := fmt.Sprintf("%s_%s", outputStem, suffix) for _, ext := range []string{".o", ".go"} { filename := filepath.Join(b2g.outputDir, stem+ext) if err := os.Remove(filename); errors.Is(err, os.ErrNotExist) { continue } else if err != nil { return err } b2g.Debugln("Removed obsolete output file", "file", filename) } return nil } func printTargets(w io.Writer) { var arches []string for goarch, archTarget := range gen.TargetsByGoArch() { if archTarget.IsGeneric() { continue } arches = append(arches, string(goarch)) } sort.Strings(arches) fmt.Fprint(w, "Supported targets:\n") fmt.Fprint(w, "\tbpf\n\tbpfel\n\tbpfeb\n") for _, arch := range arches { fmt.Fprintf(w, "\t%s\n", arch) } } func collectCTypes(types *btf.Spec, names []string) ([]btf.Type, error) { var result []btf.Type for _, cType := range names { typ, err := types.AnyTypeByName(cType) if err != nil { return nil, fmt.Errorf("looking up type %s: %w", cType, err) } result = append(result, typ) } return result, nil } const gopackageEnv = "GOPACKAGE" func main() { if err := run(os.Stdout, os.Args[1:]); err != nil { fmt.Fprintln(os.Stderr, "Error:", err) os.Exit(1) } } ================================================ FILE: cmd/bpf2go/main_test.go ================================================ //go:build !windows package main import ( "bytes" "fmt" "io" "os" "os/exec" "path/filepath" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/cmd/bpf2go/gen" "github.com/cilium/ebpf/cmd/bpf2go/internal" "github.com/cilium/ebpf/internal/testutils" ) const minimalSocketFilter = `__attribute__((section("socket"), used)) int main() { return 0; }` func TestRun(t *testing.T) { clangBin := testutils.ClangBin(t) dir := t.TempDir() mustWriteFile(t, dir, "test.c", minimalSocketFilter) modRoot, err := filepath.Abs("../..") qt.Assert(t, qt.IsNil(err)) if _, err := os.Stat(filepath.Join(modRoot, "go.mod")); os.IsNotExist(err) { t.Fatal("No go.mod file in", modRoot) } modDir := t.TempDir() execInModule := func(name string, args ...string) { t.Helper() cmd := exec.Command(name, args...) cmd.Dir = modDir if out, err := cmd.CombinedOutput(); err != nil { if out := string(out); out != "" { t.Log(out) } t.Fatalf("Can't execute %s: %v", name, args) } } module := internal.CurrentModule execInModule("go", "mod", "init", "bpf2go-test") execInModule("go", "mod", "edit", // Require the module. The version doesn't matter due to the replace // below. fmt.Sprintf("-require=%s@v0.0.0", module), // Replace the module with the current version. fmt.Sprintf("-replace=%s=%s", module, modRoot), ) goarches := []string{ "amd64", // little-endian "arm64", "s390x", // big-endian } err = run(io.Discard, []string{ "-go-package", "main", "-output-dir", modDir, "-cc", clangBin, "-target", strings.Join(goarches, ","), "bar", filepath.Join(dir, "test.c"), }) if err != nil { t.Fatal("Can't run:", err) } mustWriteFile(t, modDir, "main.go", ` package main func main() { var obj barObjects println(obj.Main) }`) for _, arch := range goarches { t.Run(arch, func(t *testing.T) { goBuild := exec.Command("go", "build", "-mod=mod", "-o", "/dev/null") goBuild.Dir = modDir goBuild.Env = append(os.Environ(), "GOOS=linux", "GOARCH="+arch, "GOPROXY=off", "GOSUMDB=off", ) out, err := goBuild.CombinedOutput() if err != nil { if out := string(out); out != "" { t.Log(out) } t.Error("Can't compile package:", err) } }) } } func TestHelp(t *testing.T) { var stdout bytes.Buffer err := run(&stdout, []string{"-help"}) if err != nil { t.Fatal("Can't execute -help") } if stdout.Len() == 0 { t.Error("-help doesn't write to stdout") } } func TestErrorMentionsEnvVar(t *testing.T) { err := run(io.Discard, nil) qt.Assert(t, qt.StringContains(err.Error(), gopackageEnv), qt.Commentf("Error should include name of environment variable")) } func TestDisableStripping(t *testing.T) { dir := t.TempDir() mustWriteFile(t, dir, "test.c", minimalSocketFilter) err := run(io.Discard, []string{ "-go-package", "foo", "-output-dir", dir, "-cc", testutils.ClangBin(t), "-strip", "binary-that-certainly-doesnt-exist", "-no-strip", "bar", filepath.Join(dir, "test.c"), }) if err != nil { t.Fatal("Can't run with stripping disabled:", err) } } func TestConvertGOARCH(t *testing.T) { tmp := t.TempDir() mustWriteFile(t, tmp, "test.c", ` #ifndef __TARGET_ARCH_x86 #error __TARGET_ARCH_x86 is not defined #endif`, ) b2g := bpf2go{ pkg: "test", stdout: io.Discard, identStem: "test", cc: testutils.ClangBin(t), disableStripping: true, sourceFile: tmp + "/test.c", outputDir: tmp, } if err := b2g.convert(gen.TargetsByGoArch()["amd64"], nil); err != nil { t.Fatal("Can't target GOARCH:", err) } } func TestCTypes(t *testing.T) { var ct cTypes valid := []string{ "abcdefghijklmnopqrstuvqxyABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_", "y", } for _, value := range valid { if err := ct.Set(value); err != nil { t.Fatalf("Set returned an error for %q: %s", value, err) } } qt.Assert(t, qt.ContentEquals(ct, valid)) for _, value := range []string{ "", " ", " frood", "foo\nbar", ".", ",", "+", "-", } { ct = nil if err := ct.Set(value); err == nil { t.Fatalf("Set did not return an error for %q", value) } } ct = nil qt.Assert(t, qt.IsNil(ct.Set("foo"))) qt.Assert(t, qt.IsNotNil(ct.Set("foo"))) } func TestParseArgs(t *testing.T) { const ( pkg = "eee" outputDir = "." csource = "testdata/minimal.c" stem = "a" ) t.Run("makebase", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) basePath, _ := filepath.Abs("barfoo") args := []string{"-makebase", basePath, stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.makeBase, basePath)) }) t.Run("makebase from env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) basePath, _ := filepath.Abs("barfoo") args := []string{stem, csource} t.Setenv("BPF2GO_MAKEBASE", basePath) b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.makeBase, basePath)) }) t.Run("makebase flag overrides env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) basePathFlag, _ := filepath.Abs("barfoo") basePathEnv, _ := filepath.Abs("foobar") args := []string{"-makebase", basePathFlag, stem, csource} t.Setenv("BPF2GO_MAKEBASE", basePathEnv) b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.makeBase, basePathFlag)) }) t.Run("cc defaults to clang", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.cc, "clang")) }) t.Run("cc", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cc", "barfoo", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.cc, "barfoo")) }) t.Run("cc from env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} t.Setenv("BPF2GO_CC", "barfoo") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.cc, "barfoo")) }) t.Run("cc flag overrides env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cc", "barfoo", stem, csource} t.Setenv("BPF2GO_CC", "foobar") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.cc, "barfoo")) }) t.Run("strip defaults to llvm-strip", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.strip, "llvm-strip")) }) t.Run("strip", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-strip", "barfoo", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.strip, "barfoo")) }) t.Run("strip from env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} t.Setenv("BPF2GO_STRIP", "barfoo") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.strip, "barfoo")) }) t.Run("strip flag overrides env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-strip", "barfoo", stem, csource} t.Setenv("BPF2GO_STRIP", "foobar") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.strip, "barfoo")) }) t.Run("no strip defaults to false", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsFalse(b2g.disableStripping)) }) t.Run("no strip", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-no-strip", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(b2g.disableStripping)) }) t.Run("cflags flag", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cflags", "x y z", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(b2g.cFlags, []string{"x", "y", "z"})) }) t.Run("cflags multi flag", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cflags", "x y z", "-cflags", "u v", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(b2g.cFlags, []string{"u", "v"})) }) t.Run("cflags flag and args", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cflags", "x y z", "stem", csource, "--", "u", "v"} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(b2g.cFlags, []string{"x", "y", "z", "u", "v"})) }) t.Run("cflags from env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{stem, csource} t.Setenv("BPF2GO_CFLAGS", "x y z") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(b2g.cFlags, []string{"x", "y", "z"})) }) t.Run("cflags flag overrides env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-cflags", "u v", stem, csource} t.Setenv("BPF2GO_CFLAGS", "x y z") b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(b2g.cFlags, []string{"u", "v"})) }) t.Run("go package overrides env", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-go-package", "aaa", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.pkg, "aaa")) }) t.Run("output dir", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) args := []string{"-output-dir", outputDir, stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.outputDir, outputDir)) }) t.Run("output suffix default", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) b2g, err := newB2G(&bytes.Buffer{}, []string{stem, csource}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.outputSuffix, "")) }) t.Run("output suffix GOFILE=_test", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) t.Setenv("GOFILE", "foo_test.go") b2g, err := newB2G(&bytes.Buffer{}, []string{stem, csource}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.outputSuffix, "_test")) }) t.Run("output suffix custom", func(t *testing.T) { t.Setenv(gopackageEnv, pkg) t.Setenv("GOFILE", "foo_test.go") args := []string{"-output-suffix", "_custom", stem, csource} b2g, err := newB2G(&bytes.Buffer{}, args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(b2g.outputSuffix, "_custom")) }) } func mustWriteFile(tb testing.TB, dir, name, contents string) { tb.Helper() tmpFile := filepath.Join(dir, name) if err := os.WriteFile(tmpFile, []byte(contents), 0660); err != nil { tb.Fatal(err) } } ================================================ FILE: cmd/bpf2go/makedep.go ================================================ //go:build !windows package main import ( "bufio" "bytes" "errors" "fmt" "io" "path/filepath" "strings" ) func adjustDependencies(w io.Writer, baseDir string, deps []dependency) error { for _, dep := range deps { relativeFile, err := filepath.Rel(baseDir, dep.file) if err != nil { return err } if len(dep.prerequisites) == 0 { _, err := fmt.Fprintf(w, "%s:\n\n", relativeFile) if err != nil { return err } continue } var prereqs []string for _, prereq := range dep.prerequisites { relativePrereq, err := filepath.Rel(baseDir, prereq) if err != nil { return err } prereqs = append(prereqs, relativePrereq) } _, err = fmt.Fprintf(w, "%s: \\\n %s\n\n", relativeFile, strings.Join(prereqs, " \\\n ")) if err != nil { return err } } return nil } type dependency struct { file string prerequisites []string } func parseDependencies(baseDir string, in io.Reader) ([]dependency, error) { abs := func(path string) string { if filepath.IsAbs(path) { return path } return filepath.Join(baseDir, path) } scanner := bufio.NewScanner(in) var line strings.Builder var deps []dependency for scanner.Scan() { buf := scanner.Bytes() if line.Len()+len(buf) > 1024*1024 { return nil, errors.New("line too long") } if bytes.HasSuffix(buf, []byte{'\\'}) { line.Write(buf[:len(buf)-1]) continue } line.Write(buf) if line.Len() == 0 { // Skip empty lines continue } parts := strings.SplitN(line.String(), ":", 2) if len(parts) < 2 { return nil, fmt.Errorf("invalid line without ':'") } // NB: This doesn't handle filenames with spaces in them. // It seems like make doesn't do that either, so oh well. var prereqs []string for _, prereq := range strings.Fields(parts[1]) { prereqs = append(prereqs, abs(prereq)) } deps = append(deps, dependency{ abs(string(parts[0])), prereqs, }) line.Reset() } if err := scanner.Err(); err != nil { return nil, err } // There is always at least a dependency for the main file. if len(deps) == 0 { return nil, fmt.Errorf("empty dependency file") } return deps, nil } ================================================ FILE: cmd/bpf2go/makedep_test.go ================================================ //go:build !windows package main import ( "bytes" "reflect" "strings" "testing" ) func TestParseDependencies(t *testing.T) { const input = `main.go: /foo/bar baz frob: /gobble \ gubble nothing: ` have, err := parseDependencies("/foo", strings.NewReader(input)) if err != nil { t.Fatal("Can't parse dependencies:", err) } want := []dependency{ {"/foo/main.go", []string{"/foo/bar", "/foo/baz"}}, {"/foo/frob", []string{"/gobble", "/foo/gubble"}}, {"/foo/nothing", nil}, } if !reflect.DeepEqual(have, want) { t.Logf("Have: %#v", have) t.Logf("Want: %#v", want) t.Error("Result doesn't match") } var output bytes.Buffer err = adjustDependencies(&output, "/foo", want) if err != nil { t.Error("Can't adjust dependencies") } const wantOutput = `main.go: \ bar \ baz frob: \ ../gobble \ gubble nothing: ` if have := output.String(); have != wantOutput { t.Logf("Have:\n%s", have) t.Logf("Want:\n%s", wantOutput) t.Error("Output doesn't match") } } ================================================ FILE: cmd/bpf2go/test/api_test.go ================================================ //go:build linux package test import ( "reflect" "testing" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/testutils" ) func TestLoadingSpec(t *testing.T) { spec, err := loadTest() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Not(qt.IsNil(spec))) qt.Assert(t, qt.Not(qt.IsNil(spec.Programs))) qt.Assert(t, qt.Not(qt.IsNil(spec.Maps))) qt.Assert(t, qt.Not(qt.IsNil(spec.Variables))) } func TestLoadingObjects(t *testing.T) { var objs testObjects err := loadTestObjects(&objs, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't load objects:", err) } defer objs.Close() qt.Assert(t, qt.Not(qt.IsNil(objs.Filter))) qt.Assert(t, qt.Not(qt.IsNil(objs.Map1))) qt.Assert(t, qt.Not(qt.IsNil(objs.MyConstant))) qt.Assert(t, qt.Not(qt.IsNil(objs.StructConst))) } func TestTypes(t *testing.T) { if testEHOOPY != 0 { t.Error("Expected testEHOOPY to be 0, got", testEHOOPY) } if testEFROOD != 1 { t.Error("Expected testEFROOD to be 0, got", testEFROOD) } e := testE(0) if size := unsafe.Sizeof(e); size != 4 { t.Error("Expected size of exampleE to be 4, got", size) } bf := testBarfoo{} if size := unsafe.Sizeof(bf); size != 16 { t.Error("Expected size of exampleE to be 16, got", size) } if reflect.TypeOf(bf.Bar).Kind() != reflect.Int64 { t.Error("Expected testBarfoo.Bar to be int64") } if reflect.TypeOf(bf.Baz).Kind() != reflect.Bool { t.Error("Expected testBarfoo.Baz to be bool") } if reflect.TypeOf(bf.Boo) != reflect.TypeOf(e) { t.Error("Expected testBarfoo.Boo to be exampleE") } } ================================================ FILE: cmd/bpf2go/test/doc.go ================================================ // Package test checks that the code generated by bpf2go conforms to a // specific API. package test //go:generate go tool bpf2go -tags linux test ../testdata/minimal.c ================================================ FILE: cmd/bpf2go/test/test_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package test import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type testBar struct { _ structs.HostLayout A uint64 B uint32 _ [4]byte } type testBarfoo struct { _ structs.HostLayout Bar int64 Baz bool _ [3]byte Boo testE } type testBaz struct { _ structs.HostLayout A uint64 } type testE uint32 const ( testEHOOPY testE = 0 testEFROOD testE = 1 ) type testUbar struct { _ structs.HostLayout A uint32 _ [4]byte } // loadTest returns the embedded CollectionSpec for test. func loadTest() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_TestBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load test: %w", err) } return spec, err } // loadTestObjects loads test and converts it into a struct. // // The following types are suitable as obj argument: // // *testObjects // *testPrograms // *testMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadTestObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadTest() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // testSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testSpecs struct { testProgramSpecs testMapSpecs testVariableSpecs } // testProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testProgramSpecs struct { Filter *ebpf.ProgramSpec `ebpf:"filter"` } // testMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testMapSpecs struct { Map1 *ebpf.MapSpec `ebpf:"map1"` } // testVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testVariableSpecs struct { AnInt *ebpf.VariableSpec `ebpf:"an_int"` IntArray *ebpf.VariableSpec `ebpf:"int_array"` MyConstant *ebpf.VariableSpec `ebpf:"my_constant"` StructArray *ebpf.VariableSpec `ebpf:"struct_array"` StructConst *ebpf.VariableSpec `ebpf:"struct_const"` StructVar *ebpf.VariableSpec `ebpf:"struct_var"` UnionVar *ebpf.VariableSpec `ebpf:"union_var"` } // testObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testObjects struct { testPrograms testMaps testVariables } func (o *testObjects) Close() error { return _TestClose( &o.testPrograms, &o.testMaps, ) } // testMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testMaps struct { Map1 *ebpf.Map `ebpf:"map1"` } func (m *testMaps) Close() error { return _TestClose( m.Map1, ) } // testVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testVariables struct { AnInt *ebpf.Variable `ebpf:"an_int"` IntArray *ebpf.Variable `ebpf:"int_array"` MyConstant *ebpf.Variable `ebpf:"my_constant"` StructArray *ebpf.Variable `ebpf:"struct_array"` StructConst *ebpf.Variable `ebpf:"struct_const"` StructVar *ebpf.Variable `ebpf:"struct_var"` UnionVar *ebpf.Variable `ebpf:"union_var"` } // testPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testPrograms struct { Filter *ebpf.Program `ebpf:"filter"` } func (p *testPrograms) Close() error { return _TestClose( p.Filter, ) } func _TestClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed test_bpfeb.o var _TestBytes []byte ================================================ FILE: cmd/bpf2go/test/test_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package test import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type testBar struct { _ structs.HostLayout A uint64 B uint32 _ [4]byte } type testBarfoo struct { _ structs.HostLayout Bar int64 Baz bool _ [3]byte Boo testE } type testBaz struct { _ structs.HostLayout A uint64 } type testE uint32 const ( testEHOOPY testE = 0 testEFROOD testE = 1 ) type testUbar struct { _ structs.HostLayout A uint32 _ [4]byte } // loadTest returns the embedded CollectionSpec for test. func loadTest() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_TestBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load test: %w", err) } return spec, err } // loadTestObjects loads test and converts it into a struct. // // The following types are suitable as obj argument: // // *testObjects // *testPrograms // *testMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadTestObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadTest() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // testSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testSpecs struct { testProgramSpecs testMapSpecs testVariableSpecs } // testProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testProgramSpecs struct { Filter *ebpf.ProgramSpec `ebpf:"filter"` } // testMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testMapSpecs struct { Map1 *ebpf.MapSpec `ebpf:"map1"` } // testVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type testVariableSpecs struct { AnInt *ebpf.VariableSpec `ebpf:"an_int"` IntArray *ebpf.VariableSpec `ebpf:"int_array"` MyConstant *ebpf.VariableSpec `ebpf:"my_constant"` StructArray *ebpf.VariableSpec `ebpf:"struct_array"` StructConst *ebpf.VariableSpec `ebpf:"struct_const"` StructVar *ebpf.VariableSpec `ebpf:"struct_var"` UnionVar *ebpf.VariableSpec `ebpf:"union_var"` } // testObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testObjects struct { testPrograms testMaps testVariables } func (o *testObjects) Close() error { return _TestClose( &o.testPrograms, &o.testMaps, ) } // testMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testMaps struct { Map1 *ebpf.Map `ebpf:"map1"` } func (m *testMaps) Close() error { return _TestClose( m.Map1, ) } // testVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testVariables struct { AnInt *ebpf.Variable `ebpf:"an_int"` IntArray *ebpf.Variable `ebpf:"int_array"` MyConstant *ebpf.Variable `ebpf:"my_constant"` StructArray *ebpf.Variable `ebpf:"struct_array"` StructConst *ebpf.Variable `ebpf:"struct_const"` StructVar *ebpf.Variable `ebpf:"struct_var"` UnionVar *ebpf.Variable `ebpf:"union_var"` } // testPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadTestObjects or ebpf.CollectionSpec.LoadAndAssign. type testPrograms struct { Filter *ebpf.Program `ebpf:"filter"` } func (p *testPrograms) Close() error { return _TestClose( p.Filter, ) } func _TestClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed test_bpfel.o var _TestBytes []byte ================================================ FILE: cmd/bpf2go/testdata/minimal.c ================================================ #include "../../../testdata/common.h" char __license[] __section("license") = "MIT"; enum e { HOOPY, FROOD }; typedef long long int longint; typedef struct { longint bar; _Bool baz; enum e boo; } barfoo; typedef struct { uint64_t a; } baz; struct bar { uint64_t a; uint32_t b; }; union ubar { uint32_t a; uint64_t b; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, enum e); __type(value, barfoo); __uint(max_entries, 1); } map1 __section(".maps"); volatile const int an_int; volatile const enum e my_constant = FROOD; volatile const int int_array[2]; volatile const barfoo struct_const; volatile const baz struct_array[2]; volatile struct bar struct_var; volatile union ubar union_var; __section("socket") int filter() { return my_constant + struct_const.bar; } ================================================ FILE: cmd/bpf2go/tools.go ================================================ //go:build !windows package main import ( "errors" "fmt" "strings" ) func splitCFlagsFromArgs(in []string) (args, cflags []string) { for i, arg := range in { if arg == "--" { return in[:i], in[i+1:] } } return in, nil } func splitArguments(in string) ([]string, error) { var ( result []string builder strings.Builder escaped bool delim = ' ' ) for _, r := range strings.TrimSpace(in) { if escaped { builder.WriteRune(r) escaped = false continue } switch r { case '\\': escaped = true case delim: current := builder.String() builder.Reset() if current != "" || delim != ' ' { // Only append empty words if they are not // delimited by spaces result = append(result, current) } delim = ' ' case '"', '\'', ' ': if delim == ' ' { delim = r continue } fallthrough default: builder.WriteRune(r) } } if delim != ' ' { return nil, fmt.Errorf("missing `%c`", delim) } if escaped { return nil, errors.New("unfinished escape") } // Add the last word if builder.Len() > 0 { result = append(result, builder.String()) } return result, nil } ================================================ FILE: cmd/bpf2go/tools_test.go ================================================ //go:build !windows package main import ( "reflect" "testing" ) func TestSplitArguments(t *testing.T) { testcases := []struct { in string out []string }{ {`foo`, []string{"foo"}}, {`foo bar`, []string{"foo", "bar"}}, {`foo bar`, []string{"foo", "bar"}}, {`\\`, []string{`\`}}, {`\\\`, nil}, {`foo\ bar`, []string{"foo bar"}}, {`foo "" bar`, []string{"foo", "", "bar"}}, {`"bar baz"`, []string{"bar baz"}}, {`'bar baz'`, []string{"bar baz"}}, {`'bar " " baz'`, []string{`bar " " baz`}}, {`"bar \" baz"`, []string{`bar " baz`}}, {`"`, nil}, {`'`, nil}, } for _, testcase := range testcases { have, err := splitArguments(testcase.in) if testcase.out == nil { if err == nil { t.Errorf("Test should fail for: %s", testcase.in) } } else if !reflect.DeepEqual(testcase.out, have) { t.Logf("Have: %q\n", have) t.Logf("Want: %q\n", testcase.out) t.Errorf("Test fails for: %s", testcase.in) } } } ================================================ FILE: collection.go ================================================ package ebpf import ( "encoding/binary" "errors" "fmt" "path/filepath" "reflect" "runtime" "slices" "strings" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/kallsyms" "github.com/cilium/ebpf/internal/kconfig" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) // CollectionOptions control loading a collection into the kernel. // // Maps and Programs are passed to NewMapWithOptions and NewProgramsWithOptions. type CollectionOptions struct { Maps MapOptions Programs ProgramOptions // MapReplacements takes a set of Maps that will be used instead of // creating new ones when loading the CollectionSpec. // // For each given Map, there must be a corresponding MapSpec in // CollectionSpec.Maps, and its type, key/value size, max entries and flags // must match the values of the MapSpec. // // The given Maps are Clone()d before being used in the Collection, so the // caller can Close() them freely when they are no longer needed. MapReplacements map[string]*Map } // CollectionSpec describes a collection. type CollectionSpec struct { Maps map[string]*MapSpec Programs map[string]*ProgramSpec // Variables refer to global variables declared in the ELF. They can be read // and modified freely before loading the Collection. Modifying them after // loading has no effect on a running eBPF program. Variables map[string]*VariableSpec // Types holds type information about Maps and Programs. // Modifications to Types are currently undefined behaviour. Types *btf.Spec // ByteOrder specifies whether the ELF was compiled for // big-endian or little-endian architectures. ByteOrder binary.ByteOrder } // Copy returns a recursive copy of the spec. func (cs *CollectionSpec) Copy() *CollectionSpec { if cs == nil { return nil } cpy := CollectionSpec{ Maps: copyMapOfSpecs(cs.Maps), Programs: copyMapOfSpecs(cs.Programs), Variables: make(map[string]*VariableSpec, len(cs.Variables)), ByteOrder: cs.ByteOrder, Types: cs.Types.Copy(), } for name, spec := range cs.Variables { cpy.Variables[name] = spec.Copy() } if cs.Variables == nil { cpy.Variables = nil } return &cpy } func copyMapOfSpecs[T interface{ Copy() T }](m map[string]T) map[string]T { if m == nil { return nil } cpy := make(map[string]T, len(m)) for k, v := range m { cpy[k] = v.Copy() } return cpy } // Assign the contents of a CollectionSpec to a struct. // // This function is a shortcut to manually checking the presence // of maps and programs in a CollectionSpec. Consider using bpf2go // if this sounds useful. // // 'to' must be a pointer to a struct. A field of the // struct is updated with values from Programs, Maps or Variables if it // has an `ebpf` tag and its type is *ProgramSpec, *MapSpec or *VariableSpec. // The tag's value specifies the name of the program or map as // found in the CollectionSpec. // // struct { // Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"` // Bar *ebpf.MapSpec `ebpf:"bar_map"` // Var *ebpf.VariableSpec `ebpf:"some_var"` // Ignored int // } // // Returns an error if any of the eBPF objects can't be found, or // if the same Spec is assigned multiple times. func (cs *CollectionSpec) Assign(to interface{}) error { getValue := func(typ reflect.Type, name string) (interface{}, error) { switch typ { case reflect.TypeOf((*ProgramSpec)(nil)): if p := cs.Programs[name]; p != nil { return p, nil } return nil, fmt.Errorf("missing program %q", name) case reflect.TypeOf((*MapSpec)(nil)): if m := cs.Maps[name]; m != nil { return m, nil } return nil, fmt.Errorf("missing map %q", name) case reflect.TypeOf((*VariableSpec)(nil)): if v := cs.Variables[name]; v != nil { return v, nil } return nil, fmt.Errorf("missing variable %q", name) default: return nil, fmt.Errorf("unsupported type %s", typ) } } return assignValues(to, getValue) } // LoadAndAssign loads Maps and Programs into the kernel and assigns them // to a struct. // // Omitting Map/Program.Close() during application shutdown is an error. // See the package documentation for details around Map and Program lifecycle. // // This function is a shortcut to manually checking the presence // of maps and programs in a CollectionSpec. Consider using bpf2go // if this sounds useful. // // 'to' must be a pointer to a struct. A field of the struct is updated with // a Program or Map if it has an `ebpf` tag and its type is *Program or *Map. // The tag's value specifies the name of the program or map as found in the // CollectionSpec. Before updating the struct, the requested objects and their // dependent resources are loaded into the kernel and populated with values if // specified. // // struct { // Foo *ebpf.Program `ebpf:"xdp_foo"` // Bar *ebpf.Map `ebpf:"bar_map"` // Ignored int // } // // opts may be nil. // // Returns an error if any of the fields can't be found, or // if the same Map or Program is assigned multiple times. func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) error { loader, err := newCollectionLoader(cs, opts) if err != nil { return err } defer loader.close() // Support assigning Programs and Maps, lazy-loading the required objects. assignedMaps := make(map[string]bool) assignedProgs := make(map[string]bool) assignedVars := make(map[string]bool) getValue := func(typ reflect.Type, name string) (interface{}, error) { switch typ { case reflect.TypeOf((*Program)(nil)): assignedProgs[name] = true return loader.loadProgram(name) case reflect.TypeOf((*Map)(nil)): assignedMaps[name] = true return loader.loadMap(name) case reflect.TypeOf((*Variable)(nil)): assignedVars[name] = true return loader.loadVariable(name) default: return nil, fmt.Errorf("unsupported type %s", typ) } } // Load the Maps and Programs requested by the annotated struct. if err := assignValues(to, getValue); err != nil { return err } // Populate the requested maps. Has a chance of lazy-loading other dependent maps. if err := loader.populateDeferredMaps(); err != nil { return err } // Evaluate the loader's objects after all (lazy)loading has taken place. for n, m := range loader.maps { if m.typ.canStoreProgram() { // Require all lazy-loaded ProgramArrays to be assigned to the given object. // The kernel empties a ProgramArray once the last user space reference // to it closes, which leads to failed tail calls. Combined with the library // closing map fds via GC finalizers this can lead to surprising behaviour. // Only allow unassigned ProgramArrays when the library hasn't pre-populated // any entries from static value declarations. At this point, we know the map // is empty and there's no way for the caller to interact with the map going // forward. if !assignedMaps[n] && len(cs.Maps[n].Contents) > 0 { return fmt.Errorf("ProgramArray %s must be assigned to prevent missed tail calls", n) } } } // Prevent loader.cleanup() from closing assigned Maps and Programs. for m := range assignedMaps { delete(loader.maps, m) } for p := range assignedProgs { delete(loader.programs, p) } for p := range assignedVars { delete(loader.vars, p) } return nil } // Collection is a collection of live BPF resources present in the kernel. type Collection struct { Programs map[string]*Program Maps map[string]*Map // Variables contains global variables used by the Collection's program(s). On // kernels older than 5.5, most interactions with Variables return // [ErrNotSupported]. Variables map[string]*Variable } // NewCollection creates a Collection from the given spec, creating and // loading its declared resources into the kernel. // // Omitting Collection.Close() during application shutdown is an error. // See the package documentation for details around Map and Program lifecycle. func NewCollection(spec *CollectionSpec) (*Collection, error) { return NewCollectionWithOptions(spec, CollectionOptions{}) } // NewCollectionWithOptions creates a Collection from the given spec using // options, creating and loading its declared resources into the kernel. // // Omitting Collection.Close() during application shutdown is an error. // See the package documentation for details around Map and Program lifecycle. func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) { loader, err := newCollectionLoader(spec, &opts) if err != nil { return nil, err } defer loader.close() // Create maps first, as their fds need to be linked into programs. for mapName := range spec.Maps { if _, err := loader.loadMap(mapName); err != nil { return nil, err } } for progName, prog := range spec.Programs { if prog.Type == UnspecifiedProgram { continue } if _, err := loader.loadProgram(progName); err != nil { return nil, err } } for varName := range spec.Variables { if _, err := loader.loadVariable(varName); err != nil { return nil, err } } // Maps can contain Program and Map stubs, so populate them after // all Maps and Programs have been successfully loaded. if err := loader.populateDeferredMaps(); err != nil { return nil, err } // Prevent loader.cleanup from closing maps, programs and vars. maps, progs, vars := loader.maps, loader.programs, loader.vars loader.maps, loader.programs, loader.vars = nil, nil, nil return &Collection{ progs, maps, vars, }, nil } type collectionLoader struct { coll *CollectionSpec opts *CollectionOptions maps map[string]*Map programs map[string]*Program vars map[string]*Variable types *btf.Cache } func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) { if opts == nil { opts = &CollectionOptions{} } // Check for existing MapSpecs in the CollectionSpec for all provided replacement maps. for name := range opts.MapReplacements { if _, ok := coll.Maps[name]; !ok { return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name) } } if err := populateKallsyms(coll.Programs); err != nil { return nil, fmt.Errorf("populating kallsyms caches: %w", err) } return &collectionLoader{ coll, opts, make(map[string]*Map), make(map[string]*Program), make(map[string]*Variable), btf.NewCache(), }, nil } // populateKallsyms populates kallsyms caches, making lookups cheaper later on // during individual program loading. Since we have less context available // at those stages, we batch the lookups here instead to avoid redundant work. func populateKallsyms(progs map[string]*ProgramSpec) error { // Look up addresses of all kernel symbols referenced by all programs. addrs := make(map[string]uint64) for _, p := range progs { iter := p.Instructions.Iterate() for iter.Next() { ins := iter.Ins meta, _ := ins.Metadata.Get(ksymMetaKey{}).(*ksymMeta) if meta != nil { addrs[meta.Name] = 0 } } } if len(addrs) != 0 { if err := kallsyms.AssignAddresses(addrs); err != nil { return fmt.Errorf("getting addresses from kallsyms: %w", err) } } return nil } // close all resources left over in the collectionLoader. func (cl *collectionLoader) close() { for _, m := range cl.maps { m.Close() } for _, p := range cl.programs { p.Close() } } func (cl *collectionLoader) loadMap(mapName string) (*Map, error) { if m := cl.maps[mapName]; m != nil { return m, nil } mapSpec := cl.coll.Maps[mapName] if mapSpec == nil { return nil, fmt.Errorf("missing map %s", mapName) } mapSpec = mapSpec.Copy() // Defer setting the mmapable flag on maps until load time. This avoids the // MapSpec having different flags on some kernel versions. Also avoid running // syscalls during ELF loading, so platforms like wasm can also parse an ELF. if isDataSection(mapSpec.Name) && haveMmapableMaps() == nil { mapSpec.Flags |= sys.BPF_F_MMAPABLE } if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok { // Check compatibility with the replacement map after setting // feature-dependent map flags. if err := mapSpec.Compatible(replaceMap); err != nil { return nil, fmt.Errorf("using replacement map %s: %w", mapSpec.Name, err) } // Clone the map to avoid closing user's map later on. m, err := replaceMap.Clone() if err != nil { return nil, err } cl.maps[mapName] = m return m, nil } if err := mapSpec.updateDataSection(cl.coll.Variables, mapName); err != nil { return nil, fmt.Errorf("assembling contents of map %s: %w", mapName, err) } m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.types) if err != nil { return nil, fmt.Errorf("map %s: %w", mapName, err) } // Finalize 'scalar' maps that don't refer to any other eBPF resources // potentially pending creation. This is needed for frozen maps like .rodata // that need to be finalized before invoking the verifier. if !mapSpec.Type.canStoreMapOrProgram() { if err := m.finalize(mapSpec); err != nil { _ = m.Close() return nil, fmt.Errorf("finalizing map %s: %w", mapName, err) } } cl.maps[mapName] = m return m, nil } func (cl *collectionLoader) loadProgram(progName string) (*Program, error) { if prog := cl.programs[progName]; prog != nil { return prog, nil } progSpec := cl.coll.Programs[progName] if progSpec == nil { return nil, fmt.Errorf("unknown program %s", progName) } // Bail out early if we know the kernel is going to reject the program. // This skips loading map dependencies, saving some cleanup work later. if progSpec.Type == UnspecifiedProgram { return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName) } progSpec = progSpec.Copy() // Rewrite any reference to a valid map in the program's instructions, // which includes all of its dependencies. for i := range progSpec.Instructions { ins := &progSpec.Instructions[i] if !ins.IsLoadFromMap() || ins.Reference() == "" { continue } // Don't overwrite map loads containing non-zero map fd's, // they can be manually included by the caller. // Map FDs/IDs are placed in the lower 32 bits of Constant. if int32(ins.Constant) > 0 { continue } m, err := cl.loadMap(ins.Reference()) if err != nil { return nil, fmt.Errorf("program %s: %w", progName, err) } if err := ins.AssociateMap(m); err != nil { return nil, fmt.Errorf("program %s: map %s: %w", progName, ins.Reference(), err) } } prog, err := newProgramWithOptions(progSpec, cl.opts.Programs, cl.types) if err != nil { return nil, fmt.Errorf("program %s: %w", progName, err) } cl.programs[progName] = prog return prog, nil } func (cl *collectionLoader) loadVariable(varName string) (*Variable, error) { if v := cl.vars[varName]; v != nil { return v, nil } varSpec := cl.coll.Variables[varName] if varSpec == nil { return nil, fmt.Errorf("unknown variable %s", varName) } m, err := cl.loadMap(varSpec.SectionName) if err != nil { return nil, fmt.Errorf("variable %s: %w", varName, err) } // If the kernel is too old or the underlying map was created without // BPF_F_MMAPABLE, [Map.Memory] will return ErrNotSupported. In this case, // emit a Variable with a nil Memory. This keeps Collection{Spec}.Variables // consistent across systems with different feature sets without breaking // LoadAndAssign. var mm *Memory if unsafeMemory { mm, err = m.unsafeMemory() } else { mm, err = m.Memory() } if err != nil && !errors.Is(err, ErrNotSupported) { return nil, fmt.Errorf("variable %s: getting memory for map %s: %w", varName, varSpec.SectionName, err) } v, err := newVariable( varSpec.Name, varSpec.Offset, varSpec.Size(), varSpec.Type, mm, ) if err != nil { return nil, fmt.Errorf("variable %s: %w", varName, err) } cl.vars[varName] = v return v, nil } // populateDeferredMaps iterates maps holding programs or other maps and loads // any dependencies. Populates all maps in cl and freezes them if specified. func (cl *collectionLoader) populateDeferredMaps() error { for mapName, m := range cl.maps { mapSpec, ok := cl.coll.Maps[mapName] if !ok { return fmt.Errorf("missing map spec %s", mapName) } // Scalar maps without Map or Program references are finalized during // creation. Don't finalize them again. if !mapSpec.Type.canStoreMapOrProgram() { continue } mapSpec = mapSpec.Copy() // MapSpecs that refer to inner maps or programs within the same // CollectionSpec do so using strings. These strings are used as the key // to look up the respective object in the Maps or Programs fields. // Resolve those references to actual Map or Program resources that // have been loaded into the kernel. for i, kv := range mapSpec.Contents { objName, ok := kv.Value.(string) if !ok { continue } switch t := mapSpec.Type; { case t.canStoreProgram(): // loadProgram is idempotent and could return an existing Program. prog, err := cl.loadProgram(objName) if err != nil { return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err) } mapSpec.Contents[i] = MapKV{kv.Key, prog} case t.canStoreMap(): // loadMap is idempotent and could return an existing Map. innerMap, err := cl.loadMap(objName) if err != nil { return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err) } mapSpec.Contents[i] = MapKV{kv.Key, innerMap} } } if mapSpec.Type == StructOpsMap { // populate StructOps data into `kernVData` if err := cl.populateStructOps(m, mapSpec); err != nil { return err } } // Populate and freeze the map if specified. if err := m.finalize(mapSpec); err != nil { return fmt.Errorf("populating map %s: %w", mapName, err) } } return nil } // populateStructOps translates the user struct bytes into the kernel value struct // layout for a struct_ops map and writes the result back to mapSpec.Contents[0]. func (cl *collectionLoader) populateStructOps(m *Map, mapSpec *MapSpec) error { userType, ok := btf.As[*btf.Struct](mapSpec.Value) if !ok { return fmt.Errorf("value should be a *Struct") } userData, err := mapSpec.dataSection() if err != nil { return fmt.Errorf("getting data section: %w", err) } if len(userData) < int(userType.Size) { return fmt.Errorf("user data too short: have %d, need at least %d", len(userData), userType.Size) } vType, _, module, err := structOpsFindTarget(userType, cl.types) if err != nil { return fmt.Errorf("struct_ops value type %q: %w", userType.Name, err) } defer module.Close() // Find the inner ops struct embedded in the value struct. kType, kTypeOff, err := structOpsFindInnerType(vType) if err != nil { return err } kernVData := make([]byte, int(vType.Size)) for _, m := range userType.Members { i := slices.IndexFunc(kType.Members, func(km btf.Member) bool { return km.Name == m.Name }) // Allow field to not exist in target as long as the source is zero. if i == -1 { mSize, err := btf.Sizeof(m.Type) if err != nil { return fmt.Errorf("sizeof(user.%s): %w", m.Name, err) } srcOff := int(m.Offset.Bytes()) if srcOff < 0 || srcOff+mSize > len(userData) { return fmt.Errorf("member %q: userdata is too small", m.Name) } // let fail if the field in type user type is missing in type kern type if !structOpsIsMemZeroed(userData[srcOff : srcOff+mSize]) { return fmt.Errorf("%s doesn't exist in %s, but it has non-zero value", m.Name, kType.Name) } continue } km := kType.Members[i] switch btf.UnderlyingType(m.Type).(type) { case *btf.Pointer: // If this is a pointer → resolve struct_ops program. psKey := kType.Name + ":" + m.Name for k, ps := range cl.coll.Programs { if ps.AttachTo == psKey { p, ok := cl.programs[k] if !ok || p == nil { return nil } if err := structOpsPopulateValue(km, kernVData[kTypeOff:], p); err != nil { return err } } } default: // Otherwise → memcpy the field contents. if err := structOpsCopyMember(m, km, userData, kernVData[kTypeOff:]); err != nil { return fmt.Errorf("field %s: %w", kType.Name, err) } } } // Populate the map explicitly and keep a reference on cl.programs. // This is necessary because we may inline fds into kernVData which // may become invalid if the GC frees them. if err := m.Put(uint32(0), kernVData); err != nil { return err } mapSpec.Contents = nil runtime.KeepAlive(cl.programs) return nil } // resolveKconfig resolves all variables declared in .kconfig and populates // m.Contents. Does nothing if the given m.Contents is non-empty. func resolveKconfig(m *MapSpec) error { ds, ok := m.Value.(*btf.Datasec) if !ok { return errors.New("map value is not a Datasec") } if platform.IsWindows { return fmt.Errorf(".kconfig: %w", internal.ErrNotSupportedOnOS) } type configInfo struct { offset uint32 size uint32 typ btf.Type } configs := make(map[string]configInfo) data := make([]byte, ds.Size) for _, vsi := range ds.Vars { v := vsi.Type.(*btf.Var) n := v.TypeName() switch n { case "LINUX_KERNEL_VERSION": if integer, ok := v.Type.(*btf.Int); !ok || integer.Size != 4 { return fmt.Errorf("variable %s must be a 32 bits integer, got %s", n, v.Type) } kv, err := linux.KernelVersion() if err != nil { return fmt.Errorf("getting kernel version: %w", err) } internal.NativeEndian.PutUint32(data[vsi.Offset:], kv.Kernel()) case "LINUX_HAS_SYSCALL_WRAPPER": integer, ok := v.Type.(*btf.Int) if !ok { return fmt.Errorf("variable %s must be an integer, got %s", n, v.Type) } var value uint64 = 1 if err := haveSyscallWrapper(); errors.Is(err, ErrNotSupported) { value = 0 } else if err != nil { return fmt.Errorf("unable to derive a value for LINUX_HAS_SYSCALL_WRAPPER: %w", err) } if err := kconfig.PutInteger(data[vsi.Offset:], integer, value); err != nil { return fmt.Errorf("set LINUX_HAS_SYSCALL_WRAPPER: %w", err) } default: // Catch CONFIG_*. configs[n] = configInfo{ offset: vsi.Offset, size: vsi.Size, typ: v.Type, } } } // We only parse kconfig file if a CONFIG_* variable was found. if len(configs) > 0 { f, err := linux.FindKConfig() if err != nil { return fmt.Errorf("cannot find a kconfig file: %w", err) } defer f.Close() filter := make(map[string]struct{}, len(configs)) for config := range configs { filter[config] = struct{}{} } kernelConfig, err := kconfig.Parse(f, filter) if err != nil { return fmt.Errorf("cannot parse kconfig file: %w", err) } for n, info := range configs { value, ok := kernelConfig[n] if !ok { return fmt.Errorf("config option %q does not exist on this kernel", n) } err := kconfig.PutValue(data[info.offset:info.offset+info.size], info.typ, value) if err != nil { return fmt.Errorf("problem adding value for %s: %w", n, err) } } } m.Contents = []MapKV{{uint32(0), data}} return nil } // LoadCollection reads an object file and creates and loads its declared // resources into the kernel. // // Omitting Collection.Close() during application shutdown is an error. // See the package documentation for details around Map and Program lifecycle. func LoadCollection(file string) (*Collection, error) { if platform.IsWindows { // This mirrors a check in efW. if ext := filepath.Ext(file); ext == ".sys" { return loadCollectionFromNativeImage(file) } } spec, err := LoadCollectionSpec(file) if err != nil { return nil, err } return NewCollection(spec) } // Assign the contents of a Collection to a struct. // // This function bridges functionality between bpf2go generated // code and any functionality better implemented in Collection. // // 'to' must be a pointer to a struct. A field of the // struct is updated with values from Programs or Maps if it // has an `ebpf` tag and its type is *Program or *Map. // The tag's value specifies the name of the program or map as // found in the CollectionSpec. // // struct { // Foo *ebpf.Program `ebpf:"xdp_foo"` // Bar *ebpf.Map `ebpf:"bar_map"` // Ignored int // } // // Returns an error if any of the eBPF objects can't be found, or // if the same Map or Program is assigned multiple times. // // Ownership and Close()ing responsibility is transferred to `to` // for any successful assigns. On error `to` is left in an undefined state. func (coll *Collection) Assign(to interface{}) error { assignedMaps := make(map[string]bool) assignedProgs := make(map[string]bool) assignedVars := make(map[string]bool) // Assign() only transfers already-loaded Maps and Programs. No extra // loading is done. getValue := func(typ reflect.Type, name string) (interface{}, error) { switch typ { case reflect.TypeOf((*Program)(nil)): if p := coll.Programs[name]; p != nil { assignedProgs[name] = true return p, nil } return nil, fmt.Errorf("missing program %q", name) case reflect.TypeOf((*Map)(nil)): if m := coll.Maps[name]; m != nil { assignedMaps[name] = true return m, nil } return nil, fmt.Errorf("missing map %q", name) case reflect.TypeOf((*Variable)(nil)): if v := coll.Variables[name]; v != nil { assignedVars[name] = true return v, nil } return nil, fmt.Errorf("missing variable %q", name) default: return nil, fmt.Errorf("unsupported type %s", typ) } } if err := assignValues(to, getValue); err != nil { return err } // Finalize ownership transfer for p := range assignedProgs { delete(coll.Programs, p) } for m := range assignedMaps { delete(coll.Maps, m) } for s := range assignedVars { delete(coll.Variables, s) } return nil } // Close frees all maps and programs associated with the collection. // // The collection mustn't be used afterwards. func (coll *Collection) Close() { for _, prog := range coll.Programs { prog.Close() } for _, m := range coll.Maps { m.Close() } } // DetachMap removes the named map from the Collection. // // This means that a later call to Close() will not affect this map. // // Returns nil if no map of that name exists. func (coll *Collection) DetachMap(name string) *Map { m := coll.Maps[name] delete(coll.Maps, name) return m } // DetachProgram removes the named program from the Collection. // // This means that a later call to Close() will not affect this program. // // Returns nil if no program of that name exists. func (coll *Collection) DetachProgram(name string) *Program { p := coll.Programs[name] delete(coll.Programs, name) return p } // structField represents a struct field containing the ebpf struct tag. type structField struct { reflect.StructField value reflect.Value } // ebpfFields extracts field names tagged with 'ebpf' from a struct type. // Keep track of visited types to avoid infinite recursion. func ebpfFields(structVal reflect.Value, visited map[reflect.Type]bool) ([]structField, error) { if visited == nil { visited = make(map[reflect.Type]bool) } structType := structVal.Type() if structType.Kind() != reflect.Struct { return nil, fmt.Errorf("%s is not a struct", structType) } if visited[structType] { return nil, fmt.Errorf("recursion on type %s", structType) } fields := make([]structField, 0, structType.NumField()) for i := 0; i < structType.NumField(); i++ { field := structField{structType.Field(i), structVal.Field(i)} // If the field is tagged, gather it and move on. name := field.Tag.Get("ebpf") if name != "" { fields = append(fields, field) continue } // If the field does not have an ebpf tag, but is a struct or a pointer // to a struct, attempt to gather its fields as well. var v reflect.Value switch field.Type.Kind() { case reflect.Ptr: if field.Type.Elem().Kind() != reflect.Struct { continue } if field.value.IsNil() { return nil, fmt.Errorf("nil pointer to %s", structType) } // Obtain the destination type of the pointer. v = field.value.Elem() case reflect.Struct: // Reference the value's type directly. v = field.value default: continue } inner, err := ebpfFields(v, visited) if err != nil { return nil, fmt.Errorf("field %s: %w", field.Name, err) } fields = append(fields, inner...) } return fields, nil } // assignValues attempts to populate all fields of 'to' tagged with 'ebpf'. // // getValue is called for every tagged field of 'to' and must return the value // to be assigned to the field with the given typ and name. func assignValues(to interface{}, getValue func(typ reflect.Type, name string) (interface{}, error)) error { toValue := reflect.ValueOf(to) if toValue.Type().Kind() != reflect.Ptr { return fmt.Errorf("%T is not a pointer to struct", to) } if toValue.IsNil() { return fmt.Errorf("nil pointer to %T", to) } fields, err := ebpfFields(toValue.Elem(), nil) if err != nil { return err } type elem struct { // Either *Map or *Program typ reflect.Type name string } assigned := make(map[elem]string) for _, field := range fields { // Get string value the field is tagged with. tag := field.Tag.Get("ebpf") if strings.Contains(tag, ",") { return fmt.Errorf("field %s: ebpf tag contains a comma", field.Name) } // Check if the eBPF object with the requested // type and tag was already assigned elsewhere. e := elem{field.Type, tag} if af := assigned[e]; af != "" { return fmt.Errorf("field %s: object %q was already assigned to %s", field.Name, tag, af) } // Get the eBPF object referred to by the tag. value, err := getValue(field.Type, tag) if err != nil { return fmt.Errorf("field %s: %w", field.Name, err) } if !field.value.CanSet() { return fmt.Errorf("field %s: can't set value", field.Name) } field.value.Set(reflect.ValueOf(value)) assigned[e] = field.Name } return nil } ================================================ FILE: collection_other.go ================================================ //go:build !windows package ebpf import "github.com/cilium/ebpf/internal" func loadCollectionFromNativeImage(_ string) (*Collection, error) { return nil, internal.ErrNotSupportedOnOS } ================================================ FILE: collection_test.go ================================================ package ebpf import ( "encoding/binary" "errors" "fmt" "io" "os" "reflect" "slices" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" ) func TestMain(m *testing.M) { testmain.Run(m) } func TestCollectionSpecNotModified(t *testing.T) { spec := &CollectionSpec{ Maps: map[string]*MapSpec{ "my-map": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, ".rodata": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, Flags: 0, // Loader sets BPF_F_MMAPABLE. Contents: []MapKV{{uint32(0), uint32(1)}}, }, }, Programs: map[string]*ProgramSpec{ "test": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R1, 0, asm.DWord).WithReference(".rodata"), asm.LoadImm(asm.R1, 0, asm.DWord).WithReference("my-map"), asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, } orig := spec.Copy() coll := mustNewCollection(t, spec, nil) qt.Assert(t, qt.CmpEquals(orig, spec, csCmpOpts)) for name := range spec.Maps { qt.Assert(t, qt.IsNotNil(coll.Maps[name])) } for name := range spec.Programs { qt.Assert(t, qt.IsNotNil(coll.Programs[name])) } } func TestCollectionSpecCopy(t *testing.T) { ms := &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, } cs := &CollectionSpec{ map[string]*MapSpec{"my-map": ms}, map[string]*ProgramSpec{ "test": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadMapPtr(asm.R1, 0), asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, map[string]*VariableSpec{ "my-var": { Name: "my-var", SectionName: "my-map", Offset: 0, }, }, &btf.Spec{}, binary.LittleEndian, } qt.Check(t, qt.IsNil((*CollectionSpec)(nil).Copy())) qt.Assert(t, testutils.IsDeepCopy(cs.Copy(), cs)) } // Load key "0" from a map called "test-map" and return the value. var loadKeyFromMapProgramSpec = &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ // R1 map asm.LoadMapPtr(asm.R1, 0).WithReference("test-map"), // R2 key asm.Mov.Reg(asm.R2, asm.R10), asm.Add.Imm(asm.R2, -4), asm.StoreImm(asm.R2, 0, 0, asm.Word), // Lookup map[0] asm.FnMapLookupElem.Call(), asm.JEq.Imm(asm.R0, 0, "error"), asm.LoadMem(asm.R0, asm.R0, 0, asm.Word), asm.Ja.Label("ret"), // Windows doesn't allow directly using R0 result from FnMapLookupElem. asm.Mov.Imm(asm.R0, 0).WithSymbol("error"), asm.Return().WithSymbol("ret"), }, } func TestCollectionSpecMapReplacements(t *testing.T) { cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "test-map": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, Programs: map[string]*ProgramSpec{ "test-prog": loadKeyFromMapProgramSpec.Copy(), }, } // Replace the map with another one newMap := mustNewMap(t, cs.Maps["test-map"], nil) err := newMap.Put(uint32(0), uint32(2)) if err != nil { t.Fatal(err) } coll := mustNewCollection(t, cs, &CollectionOptions{ MapReplacements: map[string]*Map{ "test-map": newMap, }, }) ret := mustRun(t, coll.Programs["test-prog"], nil) if ret != 2 { t.Fatal("new / override map not used") } // Check that newMap isn't closed when the collection is closed coll.Close() err = newMap.Put(uint32(0), uint32(3)) if err != nil { t.Fatalf("failed to update replaced map: %s", err) } } func TestCollectionSpecMapReplacements_NonExistingMap(t *testing.T) { cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "test-map": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, } // Override non-existing map newMap := mustNewMap(t, cs.Maps["test-map"], nil) coll, err := newCollection(t, cs, &CollectionOptions{ MapReplacements: map[string]*Map{ "non-existing-map": newMap, }, }) if err == nil { coll.Close() t.Fatal("Overriding a non existing map did not fail") } } func TestCollectionSpecMapReplacements_SpecMismatch(t *testing.T) { cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "test-map": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, } // Override map with mismatching spec newMap := mustNewMap(t, &MapSpec{ Type: Array, KeySize: 4, ValueSize: 8, // this is different MaxEntries: 1, }, nil) coll, err := newCollection(t, cs, &CollectionOptions{ MapReplacements: map[string]*Map{ "test-map": newMap, }, }) if err == nil { coll.Close() t.Fatal("Overriding a map with a mismatching spec did not fail") } if !errors.Is(err, ErrMapIncompatible) { t.Fatalf("Overriding a map with a mismatching spec failed with the wrong error") } } func TestMapReplacementsDataSections(t *testing.T) { // In some circumstances, it can be useful to share data sections between // Collections, for example to hold a ready/pause flag or some metrics. // Test read-only maps for good measure. file := testutils.NativeFile(t, "testdata/loader-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var objs struct { Data *Map `ebpf:".data"` ROData *Map `ebpf:".rodata"` } mustLoadAndAssign(t, spec, &objs, nil) defer objs.Data.Close() defer objs.ROData.Close() mustLoadAndAssign(t, spec, &objs, &CollectionOptions{ MapReplacements: map[string]*Map{ ".data": objs.Data, ".rodata": objs.ROData, }, }) qt.Assert(t, qt.IsNil(objs.Data.Close())) qt.Assert(t, qt.IsNil(objs.ROData.Close())) } func TestCollectionSpec_LoadAndAssign_LazyLoading(t *testing.T) { spec := &CollectionSpec{ Maps: map[string]*MapSpec{ "valid": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, "bogus": { Type: Array, MaxEntries: 0, }, }, Programs: map[string]*ProgramSpec{ "valid": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, "bogus": { Type: SocketFilter, Instructions: asm.Instructions{ // Undefined return value is rejected asm.Return(), }, License: "MIT", }, }, } var objs struct { Prog *Program `ebpf:"valid"` Map *Map `ebpf:"valid"` } mustLoadAndAssign(t, spec, &objs, nil) defer objs.Prog.Close() defer objs.Map.Close() if objs.Prog == nil { t.Error("Program is nil") } if objs.Map == nil { t.Error("Map is nil") } } func TestCollectionSpecAssign(t *testing.T) { var specs struct { Program *ProgramSpec `ebpf:"prog1"` Map *MapSpec `ebpf:"map1"` Variable *VariableSpec `ebpf:"var1"` } mapSpec := &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, } progSpec := &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", } cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": mapSpec, }, Programs: map[string]*ProgramSpec{ "prog1": progSpec, }, Variables: map[string]*VariableSpec{ "var1": {}, }, } if err := cs.Assign(&specs); err != nil { t.Fatal("Can't assign spec:", err) } if specs.Program != progSpec { t.Fatalf("Expected Program to be %p, got %p", progSpec, specs.Program) } if specs.Map != mapSpec { t.Fatalf("Expected Map to be %p, got %p", mapSpec, specs.Map) } if err := cs.Assign(new(int)); err == nil { t.Fatal("Assign allows to besides *struct") } if err := cs.Assign(new(struct{ Foo int })); err != nil { t.Fatal("Assign doesn't ignore untagged fields") } unexported := new(struct { foo *MapSpec `ebpf:"map1"` }) if err := cs.Assign(unexported); err == nil { t.Error("Assign should return an error on unexported fields") } } func TestNewCollectionFdLeak(t *testing.T) { spec := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, // 8 byte value will cause m.finalize to fail. Contents: []MapKV{{uint32(0), uint64(0)}}, }, }, } _, err := newCollection(t, spec, nil) qt.Assert(t, qt.IsNotNil(err)) } func TestAssignValues(t *testing.T) { zero := func(t reflect.Type, name string) (interface{}, error) { return reflect.Zero(t).Interface(), nil } type t1 struct { Bar int `ebpf:"bar"` } type t2 struct { t1 Foo int `ebpf:"foo"` } type t2ptr struct { *t1 Foo int `ebpf:"foo"` } invalid := []struct { name string to interface{} }{ {"non-struct", 1}, {"non-pointer struct", t1{}}, {"pointer to non-struct", new(int)}, {"embedded nil pointer", &t2ptr{}}, {"unexported field", new(struct { foo int `ebpf:"foo"` })}, {"identical tag", new(struct { Foo1 int `ebpf:"foo"` Foo2 int `ebpf:"foo"` })}, } for _, testcase := range invalid { t.Run(testcase.name, func(t *testing.T) { if err := assignValues(testcase.to, zero); err == nil { t.Fatal("assignValues didn't return an error") } else { t.Log(err) } }) } valid := []struct { name string to interface{} }{ {"pointer to struct", new(t1)}, {"embedded struct", new(t2)}, {"embedded struct pointer", &t2ptr{t1: new(t1)}}, {"untagged field", new(struct{ Foo int })}, } for _, testcase := range valid { t.Run(testcase.name, func(t *testing.T) { if err := assignValues(testcase.to, zero); err != nil { t.Fatal("assignValues returned", err) } }) } } func TestCollectionAssign(t *testing.T) { var objs struct { Program *Program `ebpf:"prog1"` Map *Map `ebpf:"map1"` } cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, Programs: map[string]*ProgramSpec{ "prog1": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, } coll := mustNewCollection(t, cs, nil) qt.Assert(t, qt.IsNil(coll.Assign(&objs))) defer objs.Program.Close() defer objs.Map.Close() // Check that objs has received ownership of map and prog qt.Assert(t, qt.IsTrue(objs.Program.FD() >= 0)) qt.Assert(t, qt.IsTrue(objs.Map.FD() >= 0)) // Check that the collection has lost ownership qt.Assert(t, qt.IsNil(coll.Programs["prog1"])) qt.Assert(t, qt.IsNil(coll.Maps["map1"])) } func TestCollectionAssignFail(t *testing.T) { // `map2` does not exist var objs struct { Program *Program `ebpf:"prog1"` Map *Map `ebpf:"map2"` } cs := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, Programs: map[string]*ProgramSpec{ "prog1": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, } coll := mustNewCollection(t, cs, nil) qt.Assert(t, qt.IsNotNil(coll.Assign(&objs))) // Check that the collection has retained ownership qt.Assert(t, qt.IsNotNil(coll.Programs["prog1"])) qt.Assert(t, qt.IsNotNil(coll.Maps["map1"])) } func TestIncompleteLoadAndAssign(t *testing.T) { spec := &CollectionSpec{ Programs: map[string]*ProgramSpec{ "valid": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, "invalid": { Type: SocketFilter, Instructions: asm.Instructions{ asm.Return(), }, License: "MIT", }, }, } s := struct { // Assignment to Valid should execute and succeed. Valid *Program `ebpf:"valid"` // Assignment to Invalid should fail and cause Valid's fd to be closed. Invalid *Program `ebpf:"invalid"` }{} if err := loadAndAssign(t, spec, &s, nil); err == nil { t.Fatal("expected error loading invalid ProgramSpec") } if s.Valid == nil { t.Fatal("expected valid prog to be non-nil") } if fd := s.Valid.FD(); fd != -1 { t.Fatal("expected valid prog to have closed fd -1, got:", fd) } if s.Invalid != nil { t.Fatal("expected invalid prog to be nil due to never being assigned") } } func BenchmarkNewCollection(b *testing.B) { file := testutils.NativeFile(b, "testdata/loader-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { b.Fatal(err) } for _, m := range spec.Maps { m.Pinning = PinNone } spec = fixupCollectionSpec(spec) b.ReportAllocs() for b.Loop() { coll, err := NewCollection(spec) if err != nil { b.Fatal(err) } coll.Close() } } func BenchmarkNewCollectionManyProgs(b *testing.B) { file := testutils.NativeFile(b, "testdata/manyprogs-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { b.Fatal(err) } spec = fixupCollectionSpec(spec) b.ReportAllocs() for b.Loop() { coll, err := NewCollection(spec) if err != nil { b.Fatal(err) } coll.Close() } } func BenchmarkLoadCollectionManyProgs(b *testing.B) { file, err := os.Open(testutils.NativeFile(b, "testdata/manyprogs-%s.elf")) qt.Assert(b, qt.IsNil(err)) defer file.Close() b.ReportAllocs() for b.Loop() { _, err := file.Seek(0, io.SeekStart) if err != nil { b.Fatal(err) } _, err = LoadCollectionSpecFromReader(file) if err != nil { b.Fatal(err) } } } func ExampleCollectionSpec_Assign() { spec := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, Programs: map[string]*ProgramSpec{ "prog1": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, } type maps struct { Map *MapSpec `ebpf:"map1"` } var specs struct { maps Program *ProgramSpec `ebpf:"prog1"` } if err := spec.Assign(&specs); err != nil { panic(err) } fmt.Println(specs.Program.Type) fmt.Println(specs.Map.Type) // Output: SocketFilter // Array } func ExampleCollectionSpec_LoadAndAssign() { spec := &CollectionSpec{ Maps: map[string]*MapSpec{ "map1": { Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, Programs: map[string]*ProgramSpec{ "prog1": { Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }, }, } var objs struct { Program *Program `ebpf:"prog1"` Map *Map `ebpf:"map1"` } if err := spec.LoadAndAssign(&objs, nil); err != nil { panic(err) } defer objs.Program.Close() defer objs.Map.Close() } func TestStructOpsMapSpecSimpleLoadAndAssign(t *testing.T) { requireTestmodOps(t) makeProg := func(attachTo string) map[string]*ProgramSpec { return map[string]*ProgramSpec{ "test_1": { Name: "test_1", Type: StructOps, AttachTo: attachTo, License: "GPL", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }, } } funcPtr := &btf.Pointer{ Target: &btf.FuncProto{ Return: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}, }, } type testCase struct { name string withProg bool attachTo string valueType *btf.Struct valueBytes []byte } cases := []testCase{ { name: "ops_with_data", withProg: true, attachTo: "bpf_testmod_ops:test_1", valueType: &btf.Struct{ Name: "bpf_testmod_ops", Size: 16, Members: []btf.Member{ { Name: "test_1", Type: funcPtr, Offset: 0, }, { Name: "data", Type: &btf.Int{Name: "int", Size: 4}, Offset: 64, // bits }, }, }, valueBytes: []byte{ // test_1 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // data (4B) + padding (4B) 0xde, 0xed, 0xbe, 0xef, 0x00, 0x00, 0x00, 0x00, }, }, { name: "ops_only_func", withProg: true, attachTo: "bpf_testmod_ops2:test_1", valueType: &btf.Struct{ Name: "bpf_testmod_ops2", Size: 8, Members: []btf.Member{ { Name: "test_1", Type: funcPtr, Offset: 0, }, }, }, valueBytes: []byte{ // test_1 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, }, { name: "ops_empty_value", withProg: false, valueType: &btf.Struct{ Name: "bpf_testmod_ops2", Size: 0, Members: []btf.Member{}, }, valueBytes: []byte{}, }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { spec := &CollectionSpec{ Programs: map[string]*ProgramSpec{}, Maps: map[string]*MapSpec{ "testmod_ops": { Name: "testmod_ops", Type: StructOpsMap, Flags: sys.BPF_F_LINK, Key: &btf.Int{Size: 4}, KeySize: 4, Value: c.valueType, MaxEntries: 1, Contents: []MapKV{ { Key: uint32(0), Value: slices.Clone(c.valueBytes), }, }, }, }, } if c.withProg { spec.Programs = makeProg(c.attachTo) } coll := mustNewCollection(t, spec, nil) for name := range spec.Maps { qt.Assert(t, qt.IsNotNil(coll.Maps[name])) } for name := range spec.Programs { qt.Assert(t, qt.IsNotNil(coll.Programs[name])) } }) } } func TestLinkedELF(t *testing.T) { spec, err := LoadCollectionSpec("testdata/linked-el.elf") qt.Assert(t, qt.IsNil(err)) // Require all maps that won during linking to have a MaxEntries of 1. for name, m := range spec.Maps { qt.Assert(t, qt.Equals(m.MaxEntries, 1), qt.Commentf(name)) } // Require all programs that won during linking to return 0 when executed. // Programs that should be overridden during linking should return their line // numbers. coll := mustNewCollection(t, spec, nil) for name, prog := range coll.Programs { res := mustRun(t, prog, nil) qt.Assert(t, qt.Equals(res, 0), qt.Commentf(name)) } } ================================================ FILE: collection_windows.go ================================================ package ebpf import ( "errors" "fmt" "unsafe" "github.com/cilium/ebpf/internal/efw" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) func loadCollectionFromNativeImage(file string) (_ *Collection, err error) { mapFds := make([]efw.FD, 32) programFds := make([]efw.FD, 32) var maps map[string]*Map var programs map[string]*Program defer func() { if err == nil { return } for _, fd := range append(mapFds, programFds...) { // efW never uses fd 0. if fd != 0 { _ = efw.EbpfCloseFd(int(fd)) } } for _, m := range maps { _ = m.Close() } for _, p := range programs { _ = p.Close() } }() nMaps, nPrograms, err := efw.EbpfObjectLoadNativeFds(file, mapFds, programFds) if errors.Is(err, efw.EBPF_NO_MEMORY) && (nMaps > len(mapFds) || nPrograms > len(programFds)) { mapFds = make([]efw.FD, nMaps) programFds = make([]efw.FD, nPrograms) nMaps, nPrograms, err = efw.EbpfObjectLoadNativeFds(file, mapFds, programFds) } if err != nil { return nil, err } mapFds = mapFds[:nMaps] programFds = programFds[:nPrograms] // The maximum length of a name is only 16 bytes on Linux, longer names // are truncated. This is not a problem when loading from an ELF, since // we get the full object name from the symbol table. // When loading a native image we do not have this luxury. Use an efW native // API to retrieve up to 64 bytes of the object name. maps = make(map[string]*Map, len(mapFds)) for _, raw := range mapFds { fd, err := sys.NewFD(int(raw)) if err != nil { return nil, err } m, mapErr := newMapFromFD(fd) if mapErr != nil { _ = fd.Close() return nil, mapErr } var efwMapInfo efw.BpfMapInfo size := uint32(unsafe.Sizeof(efwMapInfo)) _, err = efw.EbpfObjectGetInfoByFd(m.FD(), unsafe.Pointer(&efwMapInfo), &size) if err != nil { _ = m.Close() return nil, err } if size >= uint32(unsafe.Offsetof(efwMapInfo.Name)+unsafe.Sizeof(efwMapInfo.Name)) { m.name = unix.ByteSliceToString(efwMapInfo.Name[:]) } if m.name == "" { _ = m.Close() return nil, fmt.Errorf("unnamed map") } if _, ok := maps[m.name]; ok { return nil, fmt.Errorf("duplicate map with the same name: %s", m.name) } maps[m.name] = m } programs = make(map[string]*Program, len(programFds)) for _, raw := range programFds { fd, err := sys.NewFD(int(raw)) if err != nil { return nil, err } program, err := newProgramFromFD(fd) if err != nil { _ = fd.Close() return nil, err } var efwProgInfo efw.BpfProgInfo size := uint32(unsafe.Sizeof(efwProgInfo)) _, err = efw.EbpfObjectGetInfoByFd(program.FD(), unsafe.Pointer(&efwProgInfo), &size) if err != nil { _ = program.Close() return nil, err } if size >= uint32(unsafe.Offsetof(efwProgInfo.Name)+unsafe.Sizeof(efwProgInfo.Name)) { program.name = unix.ByteSliceToString(efwProgInfo.Name[:]) } if program.name == "" { _ = program.Close() return nil, fmt.Errorf("unnamed program") } if _, ok := programs[program.name]; ok { _ = program.Close() return nil, fmt.Errorf("duplicate program with the same name: %s", program.name) } programs[program.name] = program } return &Collection{programs, maps, nil}, nil } ================================================ FILE: collection_windows_test.go ================================================ package ebpf import ( "path/filepath" "sort" "testing" "github.com/go-quicktest/qt" ) func TestLoadNativeImage(t *testing.T) { for _, tc := range []struct { file string maps []string programs []string }{ { "testdata/windows/cgroup_sock_addr.sys", []string{ "egress_connection_policy_map", "ingress_connection_policy_map", "socket_cookie_map", }, []string{ "authorize_connect4", "authorize_connect6", "authorize_recv_accept4", "authorize_recv_accept6", }, }, } { t.Run(filepath.Base(tc.file), func(t *testing.T) { coll, err := LoadCollection(tc.file) qt.Assert(t, qt.IsNil(err)) defer coll.Close() var mapNames []string for name, obj := range coll.Maps { qt.Assert(t, qt.Equals(obj.name, name)) mapNames = append(mapNames, name) } sort.Strings(mapNames) qt.Assert(t, qt.DeepEquals(mapNames, tc.maps)) var programNames []string for name, obj := range coll.Programs { qt.Assert(t, qt.Equals(obj.name, name)) programNames = append(programNames, name) } sort.Strings(programNames) qt.Assert(t, qt.DeepEquals(programNames, tc.programs)) }) } } ================================================ FILE: cpu.go ================================================ package ebpf // PossibleCPU returns the max number of CPUs a system may possibly have // Logical CPU numbers must be of the form 0-n func PossibleCPU() (int, error) { return possibleCPU() } // MustPossibleCPU is a helper that wraps a call to PossibleCPU and panics if // the error is non-nil. func MustPossibleCPU() int { cpus, err := PossibleCPU() if err != nil { panic(err) } return cpus } ================================================ FILE: cpu_other.go ================================================ //go:build !windows package ebpf import ( "sync" "github.com/cilium/ebpf/internal/linux" ) var possibleCPU = sync.OnceValues(func() (int, error) { return linux.ParseCPUsFromFile("/sys/devices/system/cpu/possible") }) ================================================ FILE: cpu_test.go ================================================ package ebpf import ( "testing" "github.com/go-quicktest/qt" ) func TestPossibleCPU(t *testing.T) { num, err := PossibleCPU() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(num > 0)) } ================================================ FILE: cpu_windows.go ================================================ package ebpf import ( "sync" "golang.org/x/sys/windows" ) var possibleCPU = sync.OnceValues(func() (int, error) { return int(windows.GetMaximumProcessorCount(windows.ALL_PROCESSOR_GROUPS)), nil }) ================================================ FILE: doc.go ================================================ // Package ebpf is a toolkit for working with eBPF programs. // // eBPF programs are small snippets of code which are executed directly // in a VM in the Linux kernel, which makes them very fast and flexible. // Many Linux subsystems now accept eBPF programs. This makes it possible // to implement highly application specific logic inside the kernel, // without having to modify the actual kernel itself. // // This package is designed for long-running processes which // want to use eBPF to implement part of their application logic. It has no // run-time dependencies outside of the library and the Linux kernel itself. // eBPF code should be compiled ahead of time using clang, and shipped with // your application as any other resource. // // Use the link subpackage to attach a loaded program to a hook in the kernel. // // Note that losing all references to Map and Program resources will cause // their underlying file descriptors to be closed, potentially removing those // objects from the kernel. Always retain a reference by e.g. deferring a // Close() of a Collection or LoadAndAssign object until application exit. // // Special care needs to be taken when handling maps of type ProgramArray, // as the kernel erases its contents when the last userspace or bpffs // reference disappears, regardless of the map being in active use. package ebpf ================================================ FILE: docs/.gitignore ================================================ # Python __pycache__ # Build output build/ site/ ================================================ FILE: docs/Makefile ================================================ build: pipenv @# Run a production build of the documentation. Strict mode makes warnings fatal. pipenv run mkdocs build --strict @# Build main packages, discarding build output. go build -v ./... @# Build _test.go files containing Doc* functions, don't execute tests. go test -c -o /dev/null ./... >/dev/null preview: pipenv pipenv run mkdocs serve shell: pipenv @echo "pipenv shell" @exec pipenv shell pipenv: ifeq (, $(shell command -v pipenv 2> /dev/null)) $(error "pipenv is not installed, exiting..") endif @# Ensure a venv and install dependencies from Pipfile.lock. Buffer stdio @# and display it on error as pipenv uses stdin and stderr arbitrarily. @echo "pipenv sync" @out=`pipenv sync 2>&1` || echo "$${out}" .PHONY: pipenv ================================================ FILE: docs/Pipfile ================================================ [[source]] url = "https://pypi.org/simple" verify_ssl = true name = "pypi" [packages] mkdocs = "*" pymdown-extensions = "*" mkdocs-material = "*" mkdocs-macros-plugin = "*" mkdocs-git-revision-date-localized-plugin = "*" mkdocs-git-authors-plugin = "*" [dev-packages] [requires] # Whatever Netlify's Ubuntu version uses. python_version = "3.13" ================================================ FILE: docs/README.md ================================================ # epbf-go documentation The documentation project uses Pipenv to manage its dependencies, which will automatically create a Python virtualenv when invoked from this subdirectory. Follow your distribution's documentation for installing `pipenv`. You may also need `pyenv` to install a different Python version if your distribution doesn't provide the version specified in the `Pipfile`. Host a live preview of the documentation at http://127.0.0.1:8000: `make preview` Build the documentation, output to the site/ directory. This is a self-contained production copy that can be uploaded to hosting. `make build` To enter the virtualenv with all the documentation's Python dependencies installed: `make shell` ================================================ FILE: docs/ebpf/about.md ================================================ The project was initially created in 2017 as [`newtools/ebpf`](https://github.com/newtools/ebpf) by a group of passionate developers wanting to bring the power eBPF to Go applications. It quickly gained traction within the Go community, especially for projects that couldn't or wouldn't build upon the CGo-based BCC bindings at the time (`gobpf`). Since its inception, {{ proj }} has seen remarkable growth and widespread adoption. It has become a fundamental building block for numerous open-source projects. Major industry players and forward-thinking startups have integrated the library into their technology stacks to combine the power and flexibility of eBPF with the iteration speed, runtime safety and ease of deployment provided by the Go language. {{ proj }} maintains a strong commitment to collaborating with the upstream Linux project, which ensures that it stays aligned with the latest advancements in the eBPF ecosystem and remains compatible with the evolving Linux kernel and its co-located BPF library, `libbpf`. Thank you for being a part of our :ebee-color: eBPF journey! ================================================ FILE: docs/ebpf/concepts/features.md ================================================ # Feature Detection Feature detection allows applications to check which eBPF-related features are supported by the Linux kernel. This is useful for software that wants to be compatible with multiple kernel versions and lets developers tailor their code to use different eBPF features depending on what is supported by the running kernel. ## Usage In the `features` package, API calls follow a consistent pattern. The returned errors mean the following: - `nil` means the feature is supported. - {{ godoc('ErrNotSupported') }} means the feature is not supported. - Any other error suggests inconclusive detection, which could include false negatives. For example, here's using {{ godoc('features/HaveProgramType') }}: {{ go_example('DocDetectXDP', title="Detect kernel support for XDP programs") }} !!! note "" Feature detection results are cached to minimize overhead, except for inconclusive results. Subsequent calls to a conclusive probe will consistently return the same result without rerunning the probe logic. ## Limitations ### {{ godoc ('features/HaveProgramHelper') }} 1. Not all combinations of program types and helpers can be probed. Conclusively probing a BPF helper means successfully loading a generated BPF program. Certain program types like `LSM`, `StructOps` and `Tracing` are difficult to generate on-the-fly, as they depend on other components or symbols being present in the kernel, making the probes fragile. Instead, for these types, we don't rely on successfully loading a program, but we look for specific kernel error responses instead, such as `ENOTSUPP`. This indicates the program type is known, but our generated program was invalid (which is fine!). 2. This function only confirms the presence of the given BPF helper in the kernel. In cases where helpers themselves gain extra features in subsequent kernel releases, you'll have to write your own feature probe to test the particular combination of helper inputs you're looking for. Feel free to look at the implementation of package `features` for inspiration. ## Compared to `bpftool` Linux's command-line utility `bpftool` offers the `bpftool feature probe` subcommand for feature detection, inspiring the `features` package in {{ proj }}. That subcommand provides an extensive overview of eBPF-related features, issuing thousands of feature probes to identify kernel configuration options, and detect map types, program types, and helper functions. {{ proj }} aims to provide an equivalent set of feature probes, implemented in pure Go, to avoid a `bpftool` runtime dependency, and to allow users to probe only the exact features they need. ================================================ FILE: docs/ebpf/concepts/global-variables.md ================================================ {{ linux_version("5.2", "For all global variable-related BPF operations, the kernel needs to understand the BPF_PSEUDO_MAP_VALUE value in ldimm64 instructions. This is needed for direct, lookup-free map access." )}} Like typical C programs, BPF programs allow the use of global variables. These variables can be initialized from the BPF C code itself, or they can be modified by the loading user space application before handing it off to the kernel. The abstraction {{ proj }} provides to interact with global variables is the {{ godoc('VariableSpec') }}, found in the {{ godoc('CollectionSpec.Variables') }} field. This page describes how to declare variables in BPF C and how to interact with them in Go. ## Runtime Constants {{ linux_version("5.2", "Read-only maps and the BPF_MAP_FREEZE command are needed for implementing constant variables.") }} Global runtime constants are typically used for configuration values that influence the functionality of a BPF program. Think all sorts of network or hardware addresses for network filtering, or timeouts for rate limiting. The C compiler will reject any runtime modifications to these variables in the BPF program, like a typical const. Crucially, the BPF verifier will also perform dead code analysis if constants are used in if statements. If a condition is always true or false, it will remove unused code paths from the BPF program, reducing verification time and increasing runtime performance. This enables many features like portable kfuncs, allowing C code to refer to kfuncs that may not exist in some kernels, as long as those code paths are guaranteed not to execute at runtime. Similarly, this can be used to your advantage to disable code paths that are not needed in certain configurations, or would result in a verifier error on some kernels or in some contexts. :ebee-color: Consider the following C BPF program that reads a global constant and returns it: {{ c_example('variables_const', title='BPF C program declaring global constant const_u32') }} ??? warning "Why is `const_u32` declared `volatile`?" In short: without the `volatile` qualifier, the variable would be optimized away and not appear in the BPF object file, leaving us unable to modify it from our user space application. In this program, the compiler (in)correctly deduces two things about `const_u32`: it is never assigned a value, and it doesn't change over the course of the program. Implementation details aside, it will now assume that the return value of `const_example()` is always 0 and omit the variable from the ELF altogether. For BPF programs, it's common practice to declare all global variables that need to be accessed from user space as `volatile`, especially non-`const` globals. Doing so ensures the compiler reliably allocates them in a data section in the ELF. :simple-go: First, let's take a look at a full Go example that will comprise the majority of interactions with constants. In the example below, we'll load a BPF object from disk, pull out a variable, set its value and call the BPF program once with an empty context. Variations on this pattern will follow later. {{ go_example('DocVariablesSetConst', title='Go program modifying a const, loading and running the BPF program') }} 1. Any values passed into {{ godoc('VariableSpec.Set') }} must marshal to a fixed width. This behaviour is identical to {{ godoc('Map.Put') }} and friends. Using untyped integers is not supported since their size is platform dependent. We recommend the same approach in BPF C to keep data size predictable. 2. A 15-byte context is the minimum the kernel will accept for dry-running a BPF program. If your BPF program reads from its context, populating this slice is a great way of doing unit testing without setting up a live testing environment. ## Global Variables Non-const global variables are mutable and can be modified by both the BPF program and the user space application. They are typically used for keeping state like metrics, counters, rate limiting, etc. These variables can also be initialized from user space, much like their `const` counterparts, and can be both read and written to from the BPF program as well as the user space application. More on that in a future section. :ebee-color: The following C BPF program reads a global variable and returns it: {{ c_example('variables_global', title='BPF C program declaring global variable global_u16') }} ??? warning "Why is `global_u16` declared `volatile`?" Similar to `volatile const` in a prior example, `volatile` is used here to make compiler output more deterministic. Without it, the compiler may choose to optimize away a variable if it's never assigned to, not knowing its value is actually provided by user space. The `volatile` qualifier doesn't change the variable's semantics. ### Before Loading: Using VariableSpec For interacting with global variables before loading the BPF program into the kernel, use the methods on its {{ godoc('VariableSpec') }} found in {{ godoc('CollectionSpec.Variables') }} or injected using {{ godoc('LoadAndAssign') }}. This ensures the variable is populated before the BPF program has a chance to execute. :simple-go: In user space, initialize `global_u16` to 9000: {{ go_example('DocVariablesSetGlobalU16') }} Dry-running `global_example()` a few times results in the value increasing on every invocation: {{ go_example('DocVariablesSetGlobalRun') }} Once a CollectionSpec has been loaded into the kernel, further modifications to a VariableSpec are ineffectual. ### After Loading: Using Variable After loading the BPF program into the kernel, accessing global variables from user space can be done through the {{ godoc('Variable') }} abstraction. These can be injected into an object using {{ godoc('LoadAndAssign') }}, or found in the {{ godoc('Collection.Variables') }} field. :simple-go: Building on the previous example, read the incremented variable using {{ godoc('Variable.Get') }}: {{ go_example('DocVariablesGetGlobalU16') }} Modify the Variable at runtime using {{ godoc('Variable.Set') }}. ## Internal/Hidden Global Variables By default, all global variables described in an ELF's data sections are exposed through {{ godoc('CollectionSpec.Variables') }}. However, there may be cases where you don't want user space to interfere with a variable (either on purpose or by accident) and you want to keep the variable internal to the BPF program. {{ c_example('variables_hidden', title='BPF C program declaring internal global variable internal_var') }} The `__hidden` macro is found in Linux' `` as of version 5.13 and is defined as follows: ```c #define __hidden __attribute__((visibility("hidden"))) ``` This will cause the VariableSpec for `hidden_var` to not be included in the CollectionSpec. ## Static Global Variables With the introduction of `bpftool gen object`. BPF received a full-blown static linker, giving the `static` keyword for declaring objects local to a single .c file an actual semantic meaning. {{ proj }} follows the convention set by libbpf to not expose static variables to user space. In our case, this means that static variables are not included in the {{ godoc('CollectionSpec.Variables') }} field or emitted in bpf2go-generated code. The ELF loader has no way to differentiate function-scoped local variables (also not exposed) and static variables, since they're both marked with `LOCAL` linkage in the ELF. If you need to expose a variable to user space, drop the `static` keyword and declare it in the global scope of your BPF C program. ================================================ FILE: docs/ebpf/concepts/loader.md ================================================ # Loading Objects {{ proj }} ships an eBPF object (ELF) loader that aims to be compatible with the upstream libbpf and iproute2 (`tc`/`ip`) projects. An ELF is typically obtained by compiling a eBPF C program using the LLVM toolchain (`clang`). This page describes the journey from compiled eBPF ELF to resources in the kernel. This involves parsing the ELF into intermediate Go (Spec) types that can be modified and copied before loading them into the kernel. ```mermaid graph LR ELF --> ProgramSpec --> Program ELF --> Types ELF --> MapSpec --> Map Map & Program --> Links subgraph Collection Program & Map end subgraph CollectionSpec ProgramSpec & MapSpec & Types end ``` ## {{ godoc('CollectionSpec') }} A CollectionSpec represents eBPF objects extracted from an ELF, and can be obtained by calling {{ godoc('LoadCollectionSpec') }}. In the examples below, we declare a Map and Program in eBPF C, then load and inspect them using Go. Use the tabs to explore the Go and C counterparts below. === ":simple-go: Go" {{ go_example('DocLoadCollectionSpec', title='Parse ELF and inspect its CollectionSpec') | indent(4) }} !!! warning "" All of a Spec's attributes can be modified, and those modifications influence the resources created in the kernel. Be aware that doing so may invalidate any assumptions made by the compiler, resulting in maps or programs being rejected by the kernel. Proceed with caution. === ":ebee-color: eBPF C" {{ c_example('DocMyMapProgram', title='Declare a minimal map and a program') | indent(4) }} !!! tip "" See [Section Naming](section-naming.md) to learn about the use of the `SEC()` macro in the example above. ## {{ godoc('NewCollection') }} After parsing the ELF into a CollectionSpec, it can be loaded into the kernel using {{ godoc('NewCollection') }}, resulting in a {{ godoc('Collection') }}. {{ go_example('DocNewCollection') }} !!! note "" {{ godoc('Collection.Close') }} closes all Maps and Programs in the Collection. Interacting with any resources after `Close()` will return an error, since their underlying file descriptors will be closed. See [Object Lifecycle](object-lifecycle.md) to gain a better understanding of how {{ proj }} manages its resources and for best practices handling Maps and Programs. ## {{ godoc('CollectionSpec.LoadAndAssign', short=True) }} LoadAndAssign is a convenience API that can be used instead of `NewCollection`. It has two major benefits: - It automates pulling Maps and Programs out of a Collection. No more `#!go if m := coll.Maps["my_map"]; m == nil { return ... }`. - **Selective loading of Maps and Programs!** Only resources of interest and their dependencies are loaded into the kernel. Great for working with large CollectionSpecs that only need to be partially loaded. First, declare a struct that will receive pointers to a Map and a Program after loading them into the kernel. Give it a `#!go Close()` method to make cleanup easier. {{ go_example('DocLoadAndAssignObjs', title='Declare a custom struct myObjs') }} !!! note "" Use bpf2go if the preceding code snippet looks tedious. bpf2go can generate this kind of boilerplate code automatically and will make sure it stays in sync with your C code. Next, instantiate a variable of our newly-declared type and pass its pointer to `LoadAndAssign`. {{ go_example('DocLoadAndAssign', title='Pass a custom struct to LoadAndAssign') }} !!! warning "" If your use case requires dynamically renaming keys in CollectionSpec.Maps, you may need to use NewCollection instead. Map and Program names in struct tags are baked into the Go binary at compile time. ## Type Information (BTF) If an eBPF ELF was built with `clang -g`, it will automatically contain BTF type information. This information can be accessed programmatically through {{ godoc('CollectionSpec.Types') }}. Note that this field will be `nil` if the ELF was built without BTF. {{ go_example('DocBTFTypeByName') }} !!! note "" Many eBPF features rely on ELFs to be built with BTF, and there is little to be gained by opting out of it. `clang -g` also includes DWARF information in the ELF which can be safely removed with `llvm-strip`. eBPF does not rely on DWARF information. ================================================ FILE: docs/ebpf/concepts/object-lifecycle.md ================================================ !!! info "" This is an advanced topic and does not need to be fully understood in order to get started writing useful tools. If you find yourself debugging unexpectedly-detached programs, resource leaks, or you want to gain a deeper understanding of how eBPF objects are managed by {{ proj }}, this page should prove helpful. ## File Descriptors and Go Interacting with eBPF objects from user space is done using file descriptors. Counter-intuitively, 'file' descriptors are used as references to many types of kernel resources in modern Linux, not just files. In {{ proj }}, {{ godoc('Map') }}, {{ godoc('Program') }} and {{ godoc('link/Link') }} are all modeled around these underlying file descriptors. Go, being a garbage-collected language, automatically manages the lifecycle of Go objects. Keeping in line with the standard library's `os.File` and friends, eBPF resources in {{ proj }} were designed in a way so their underlying file descriptors are closed when their Go objects are garbage collected. This generally prevents runaway resource leaks, but is not without its drawbacks. This has subtle but important repercussions for BPF, since this means the Go runtime will call `Close()` on an object's underlying file descriptor if the object is no longer reachable by the garbage collector. For example, this can happen if an object is created in a function, but is not returned to the caller. One type of map, {{ godoc('ProgramArray') }}, is particularly sensitive to this. More about that in [Program Arrays](#program-arrays). ## Extending Object Lifetime ### Pinning Aside from file descriptors, BPF provides another method of creating references to eBPF objects: pinning. This is the concept of associating a file on a virtual file system (the BPF File System, bpffs for short) with a BPF resource like a Map, Program or Link. Pins can be organized into arbitrary directory structures, just like on any other file system. When the Go process exits, the pin will maintain a reference to the object, preventing it from being automatically destroyed. In this scenario, removing the pin using plain `rm` will remove the last reference, causing the kernel to destroy the object. If you're holding an active object in Go, you can also call {{ godoc('Map.Unpin') }}, {{ godoc('Program.Unpin') }} or {{ godoc('link/Link.Unpin') }} if the object was previously pinned. !!! warning Pins do **not** persist through a reboot! A common use case for pinning is sharing eBPF objects between processes. For example, one could create a Map from Go, pin it, and inspect it using `bpftool map dump pinned /sys/fs/bpf/my_map`. ### Attaching Attaching a Program to a hook acts as a reference to a Program, since the kernel needs to be able to execute the program's instructions at any point. For legacy reasons, some {{ godoc('link/Link') }} types don't support pinning. It is generally safe to assume these links will persist beyond the lifetime of the Go application. ## :warning: Program Arrays A {{ godoc('ProgramArray') }} is a Map type that holds references to other Programs. This allows programs to 'tail call' into other programs, useful for splitting up long and complex programs. Program Arrays have a unique property: they allow cyclic dependencies to be created between the Program Array and a Program (e.g. allowing programs to call into themselves).To avoid ending up with a set of programs loaded into the kernel that cannot be freed, the kernel maintains a hard rule: **Program Arrays require at least one open file descriptor or bpffs pin**. !!! warning If all user space/bpffs references are gone, **any tail calls into the array will fail**, but the Map itself will remain loaded as long as there are programs that use it. This property, combined with interactions with Go's garbage collector previously described in [File Descriptors and Go](#file-descriptors-and-go), is a great source of bugs. A few tips to handle this problem correctly: - Use {{ godoc('CollectionSpec.LoadAndAssign') }}. It will refuse to load the CollectionSpec if doing so would result in a Program Array without a userspace reference. - Pin Program Arrays if execution of your eBPF code needs to continue past the lifetime of your Go application, e.g. for upgrades or short-lived CLI tools. - Retain references to the Map at all times in long-running applications. Note that `#!go defer m.Close()` makes Go retain a reference until the end of the current scope. ================================================ FILE: docs/ebpf/concepts/rlimit.md ================================================ # Resource Limits Creating eBPF objects (Maps, Programs, even BTF blobs) requires kernel memory allocation. Before kernel version 5.11, the memory available to a process for creating eBPF objects was restricted by its `RLIMIT_MEMLOCK` rlimit value, visible through the `ulimit -l` command. Starting with [version 5.11](https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com), the Linux kernel switched from rlimits to memory cgroup (memcg) accounting for managing memory limits on processes handling eBPF objects in the kernel. eBPF object allocations are tracked alongside regular allocations within the cgroup. Memory consumption and limits can be queried and set through cgroupfs, the same mechanism used for setting memory limits on containers. ## Purpose of package `rlimit` On kernels supporting memcg accounting, there's no need to manage `RLIMIT_MEMLOCK` for effectively using eBPF, as eBPF object allocations now count towards the cgroup memory limit instead. However, since many Linux distributions still ship pre-5.11 kernels, it's necessary to conditionally manage rlimit for kernels lacking memcg accounting for eBPF. To support writing portable Go tools that work across various kernel versions, the `rlimit` package was introduced. It encapsulates two behaviours: 1. As an **import side effect** of importing the package, it lowers the rlimit of the current process to induce a Map creation failure, then restores the original rlimit. 2. {{ godoc('rlimit/RemoveMemlock') }} conditionally increases `RLIMIT_MEMLOCK` to infinity based on the probe's result. If the kernel supports memcg accounting, this is a no-op. ## Usage Include this in your application: {{ go_example('DocRlimit', title="Remove RLIMIT_MEMLOCK if kernel lacks memcg accounting") }} !!! note "" You can call `RemoveMemlock()` multiple times if your program has multiple entry points or CLI subcommands. The rlimit operation will only execute once. ## Caveats ### Race Conditions The package was carefully designed with Go's runtime initialization semantics in mind, meaning only one `init()` will execute at a time across all packages, minimizing the risk of racing against other callers to `prlimit(2)` (which should hopefully be rare). The rlimit package first gets the process' current `RLIMIT_MEMLOCK` value, drops it to 0, attempts to create a BPF map, then finally resets the rlimit to the old value. It's important to note that this happens **before invoking** `RemoveMemlock()` and has two potential side effects: - On kernels before 5.11, other concurrent BPF object creations may fail due to insufficient memory being available while the rlimit is at 0. - Other Go packages interacting with `prlimit(2)` may interfere with this process, leading to a wrong `RLIMIT_MEMLOCK` value being read or restored. Please audit your code and dependencies for potential conflicts. ### Why does my application always create a Map on startup? !!! note "" The `rlimit` package is entirely optional and serves as a convenience feature. Since the package creates a Map from `init()`, there is currently no way to prevent your application from interacting with `bpf(2)`, even if `RemoveMemlock()` is never invoked or if none of your application's eBPF features remain disabled. We consider this a reasonable trade-off to provide maximum value for the majority of use cases. If this is not desirable, you can avoid using package `rlimit` altogether and increase the rlimit through other means like Docker's `--ulimit memlock=-1` flag or systemd's `LimitMEMLOCK=infinity` unit limit property. ================================================ FILE: docs/ebpf/concepts/section-naming.md ================================================ You may have seen the `SEC()` macro used around eBPF C code. This macro sends a hint to the compiler to place a symbol (a variable or function) in a specific section of the resulting eBPF object binary. Typically, program binaries for Unix-like systems are divided into so-called 'sections'. All sections have names, many of which are assigned special meaning. For example, `.text` is where [program text](https://en.wikipedia.org/wiki/Code_segment) (executable instructions) goes by default. Like common application binaries, eBPF also relies heavily on section naming to distinguish various parts of an application. As an example, the section name of an individual eBPF program determines its program type, affecting the way the program is verified by the kernel and defining what the program is allowed to do. ## Executable Linkable Format (ELF) Executable Linkable Format (ELF) is the standard application binary format for Linux. It is also used as the output format of LLVM's BPF backend. ELF binaries are typically [executed directly by the kernel](https://lwn.net/Articles/631631/), but for eBPF, a different approach is needed. eBPF programs are not executable in the traditional sense. They depend on a user space component that loads them, manages their resources, and can interact with their components. This is where projects such as libbpf and {{ proj }} come in. For compatibility reasons, {{ proj }} follows the section naming conventions established by libbpf, since we consider upstream decisions to be authoritative on this subject. There's also little reason to do things differently; section names are essentially considered an API. ??? tip "How do I explore an ELF's contents?" You can display an ELF's section table using `readelf -S `. For visualizing a program instructions or the contents of a map's data section, you'll need a tool from the LLVM toolchain: `llvm-objdump`. For example: `llvm-objdump -SD my_ebpf.o -j xdp`. This will limit output to the `xdp` section (see [Program Sections](#program-sections)), display corresponding source code lines if available using `-S`, and display disassembled instructions using `-D`. The same can be done for data sections like `.data` and `.rodata.` (see [Map Sections](#map-sections)). Also worth mentioning: display an eBPF object's BTF type information using `bpftool btf dump file my_object.o`. ## Section Prefixes To support encoding extra information into section names, a prefix convention using forward slashes `/` is used. For example, a Kprobe-type program meant to be attached to the `slub_flush` kernel symbol would be put into an ELF section called `kprobe/slub_flush`. ### Miscellaneous Sections `license` : In order to use certain BPF helpers in your program, it must be licensed under a GPL-compatible license. BPF programs licensing follows the same rules as kernel module licensing. This is explained in more detail in the Linux kernel's [BPF licensing documentation](https://docs.kernel.org/bpf/bpf_licensing.html#using-bpf-programs-in-the-linux-kernel). See the [`license_is_gpl_compatible`](https://elixir.bootlin.com/linux/v6.5.4/source/include/linux/license.h) function in the Linux source code or the [Module Licensing table](https://docs.kernel.org/process/license-rules.html#id1). This section must only contain the license string of the programs in the ELF. for example: `#!c char __license[] SEC("license") = "Dual MIT/GPL";`. `version` : **Deprecated.** Kernels <5.0 require this section to contain a value matching the kernel's `LINUX_VERSION_CODE` for Kprobe-type programs. Always omit this, {{ proj }} will populate this field automatically if needed. ### Map Sections `.maps` : This section is dedicated to BTF-style Map definitions. `maps` : **Deprecated.** This section is expected to only contain fixed-width `struct bpf_map_def` variables. Larger structs like iproute2's `struct bpf_elf_map` can also be used for backwards compatibility. Any extra bytes past the end of the size of a `struct bpf_map_def` are exposed by {{ godoc('MapSpec.Extra') }} and must be drained before attempting to create the Map. #### :material-head-cog: Advanced: Special Map Sections `.data*` : The LLVM BPF backend implements accesses to mutable global variables as direct Array Map accesses. Since a single BPF program can be executed concurrently as a result of the kernel processing packets and other events asynchronously, a data section and the global variables it represents are considered shared memory. Variables can be emitted to specific sections, like `#!c SEC(".data.foo") my_var = 123;`, as long as they match the `.data*` prefix. This can prove useful for isolating certain variables to well-known sections for Go code generation or custom variable rewriting logic. Global, non-hidden variables are emitted to {{ godoc('CollectionSpec.Variables') }}, where they can be modified before loading the CollectionSpec into the kernel. See [Global Variables](../concepts/global-variables.md) for instructions. `.rodata*` : Like `.data*`, but for constants. These become read-only after loading the CollectionSpec into the kernel, and are also exposed through {{ godoc('CollectionSpec.Variables') }}. `.bss` : Section emitted by the compiler when zero-initialized globals are present in the ELF. Is typically zero-length in the ELF, and initialized by {{ proj }} after loading. Also exposed through {{ godoc('CollectionSpec.Variables') }}. `.rel*` : Not exposed by {{ proj }}, only used behind the scenes. Relocation sections contain relocation records against their non-`.rel` prefixed counterparts. This is mainly used for fixing up BPF instructions referring to Maps and global variables. ### Program Sections Names of Program sections mainly define the program's {{ godoc('ProgramType') }}, but also its {{ godoc('AttachType') }} and {{ godoc('AttachFlags') }} are automatically set for convenience based on its section name. As described previously, section prefixes containing a forward slash `/` expect a second component to follow the slash. For example, a program in the `kprobe/slub_flush` section will automatically have its {{ godoc('ProgramSpec.AttachTo') }} field set to `slub_flush` to facilitate attaching the program later on. Additionally, the program's original full section name can be found in {{ godoc('ProgramSpec.SectionName') }}. !!! tip "" There's also [upstream libbpf documentation](https://docs.kernel.org/bpf/libbpf/program_types.html) for this. Not all of libbpf's program types may be supported by {{ proj }} yet. If a program type you require is missing, please file an issue or send a pull request! | Section (Prefix) | {{ godoc('ProgramType') }} | {{ godoc('AttachType') }} | {{ godoc('AttachFlags') }} | |:----------------------|:---------------------------|:---------------------------------|:---------------------------| | socket | SocketFilter | | | | sk_reuseport/migrate | SkReuseport | AttachSkReuseportSelectOrMigrate | | | sk_reuseport | SkReuseport | AttachSkReuseportSelect | | | kprobe/ | Kprobe | | | | uprobe/ | Kprobe | | | | kretprobe/ | Kprobe | | | | uretprobe/ | Kprobe | | | | tc | SchedCLS | | | | classifier | SchedCLS | | | | action | SchedACT | | | | tracepoint/ | TracePoint | | | | tp/ | TracePoint | | | | raw_tracepoint/ | RawTracepoint | | | | raw_tp/ | RawTracepoint | | | | raw_tracepoint.w/ | RawTracepointWritable | | | | raw_tp.w/ | RawTracepointWritable | | | | tp_btf/ | Tracing | AttachTraceRawTp | | | fentry/ | Tracing | AttachTraceFEntry | | | fmod_ret/ | Tracing | AttachModifyReturn | | | fexit/ | Tracing | AttachTraceFExit | | | fentry.s/ | Tracing | AttachTraceFEntry | BPF_F_SLEEPABLE | | fmod_ret.s/ | Tracing | AttachModifyReturn | BPF_F_SLEEPABLE | | fexit.s/ | Tracing | AttachTraceFExit | BPF_F_SLEEPABLE | | freplace/ | Extension | | | | lsm/ | LSM | AttachLSMMac | | | lsm.s/ | LSM | AttachLSMMac | BPF_F_SLEEPABLE | | iter/ | Tracing | AttachTraceIter | | | iter.s/ | Tracing | AttachTraceIter | BPF_F_SLEEPABLE | | syscall | Syscall | | | | xdp.frags/devmap | XDP | AttachXDPDevMap | BPF_F_XDP_HAS_FRAGS | | xdp/devmap | XDP | AttachXDPDevMap | | | xdp.frags/cpumap | XDP | AttachXDPCPUMap | BPF_F_XDP_HAS_FRAGS | | xdp/cpumap | XDP | AttachXDPCPUMap | | | xdp.frags | XDP | | BPF_F_XDP_HAS_FRAGS | | xdp | XDP | | | | perf_event | PerfEvent | | | | lwt_in | LWTIn | | | | lwt_out | LWTOut | | | | lwt_xmit | LWTXmit | | | | lwt_seg6local | LWTSeg6Local | | | | cgroup_skb/ingress | CGroupSKB | AttachCGroupInetIngress | | | cgroup_skb/egress | CGroupSKB | AttachCGroupInetEgress | | | cgroup/skb | CGroupSKB | | | | cgroup/sock_create | CGroupSock | AttachCGroupInetSockCreate | | | cgroup/sock_release | CGroupSock | AttachCgroupInetSockRelease | | | cgroup/sock | CGroupSock | AttachCGroupInetSockCreate | | | cgroup/post_bind4 | CGroupSock | AttachCGroupInet4PostBind | | | cgroup/post_bind6 | CGroupSock | AttachCGroupInet6PostBind | | | cgroup/dev | CGroupDevice | AttachCGroupDevice | | | sockops | SockOps | AttachCGroupSockOps | | | sk_skb/stream_parser | SkSKB | AttachSkSKBStreamParser | | | sk_skb/stream_verdict | SkSKB | AttachSkSKBStreamVerdict | | | sk_skb | SkSKB | | | | sk_msg | SkMsg | AttachSkMsgVerdict | | | lirc_mode2 | LircMode2 | AttachLircMode2 | | | flow_dissector | FlowDissector | AttachFlowDissector | | | cgroup/bind4 | CGroupSockAddr | AttachCGroupInet4Bind | | | cgroup/bind6 | CGroupSockAddr | AttachCGroupInet6Bind | | | cgroup/connect4 | CGroupSockAddr | AttachCGroupInet4Connect | | | cgroup/connect6 | CGroupSockAddr | AttachCGroupInet6Connect | | | cgroup/sendmsg4 | CGroupSockAddr | AttachCGroupUDP4Sendmsg | | | cgroup/sendmsg6 | CGroupSockAddr | AttachCGroupUDP6Sendmsg | | | cgroup/recvmsg4 | CGroupSockAddr | AttachCGroupUDP4Recvmsg | | | cgroup/recvmsg6 | CGroupSockAddr | AttachCGroupUDP6Recvmsg | | | cgroup/getpeername4 | CGroupSockAddr | AttachCgroupInet4GetPeername | | | cgroup/getpeername6 | CGroupSockAddr | AttachCgroupInet6GetPeername | | | cgroup/getsockname4 | CGroupSockAddr | AttachCgroupInet4GetSockname | | | cgroup/getsockname6 | CGroupSockAddr | AttachCgroupInet6GetSockname | | | cgroup/sysctl | CGroupSysctl | AttachCGroupSysctl | | | cgroup/getsockopt | CGroupSockopt | AttachCGroupGetsockopt | | | cgroup/setsockopt | CGroupSockopt | AttachCGroupSetsockopt | | | struct_ops+ | StructOps | | | | struct_ops.s+ | StructOps | | BPF_F_SLEEPABLE | | sk_lookup/ | SkLookup | AttachSkLookup | | | kprobe.multi | Kprobe | AttachTraceKprobeMulti | | | kretprobe.multi | Kprobe | AttachTraceKprobeMulti | | ================================================ FILE: docs/ebpf/contributing/architecture.md ================================================ Architecture of the library === The bulk of the functionality of the library split across the `ebpf`, `btf` and `link` packages. Below is a diagram how the most important types relate to each other. The graph is in dependecy order, so an arrow from `Links` to `Map` can be read as "Link depends on Map". ```mermaid graph RL Program --> ProgramSpec --> ELF btf.Spec --> ELF Map --> MapSpec --> ELF Links --> Map & Program ProgramSpec -.-> btf.Spec MapSpec -.-> btf.Spec subgraph Collection Program & Map end subgraph CollectionSpec ProgramSpec & MapSpec & btf.Spec end ``` ELF --- BPF is usually produced by using Clang to compile a subset of C. Clang outputs an ELF file which contains program byte code (aka BPF), but also metadata for maps used by the program. The metadata follows the conventions set by libbpf shipped with the kernel. Certain ELF sections have special meaning and contain structures defined by libbpf. Newer versions of clang emit additional metadata in BPF Type Format. The library aims to be compatible with libbpf so that moving from a C toolchain to a Go one creates little friction. To that end, the ELF reader is tested against the Linux selftests and avoids introducing custom behaviour if possible. The output of the ELF reader is a `CollectionSpec` which encodes all of the information contained in the ELF in a form that is easy to work with in Go. The returned `CollectionSpec` should be deterministic: reading the same ELF file on different systems must produce the same output. As a corollary, any changes that depend on the runtime environment like the current kernel version must happen when creating [Objects](#objects). Specifications --- `CollectionSpec` is a very simple container for `ProgramSpec`, `MapSpec` and `btf.Spec`. Avoid adding functionality to it if possible. `ProgramSpec` and `MapSpec` are blueprints for in-kernel objects and contain everything necessary to execute the relevant `bpf(2)` syscalls. They refer to `btf.Spec` for type information such as `Map` key and value types. The {{ godoc("asm") }} package provides an assembler that can be used to generate `ProgramSpec` on the fly. Objects --- `Program` and `Map` are the result of loading specifications into the kernel. Features that depend on knowledge of the current system (e.g kernel version) are implemented at this point. Sometimes loading a spec will fail because the kernel is too old, or a feature is not enabled. There are multiple ways the library deals with that: * Fallback: older kernels don't allow naming programs and maps. The library automatically detects support for names, and omits them during load if necessary. This works since name is primarily a debug aid. * Sentinel error: sometimes it's possible to detect that a feature isn't available. In that case the library will return an error wrapping `ErrNotSupported`. This is also useful to skip tests that can't run on the current kernel. Once program and map objects are loaded they expose the kernel's low-level API, e.g. `NextKey`. Often this API is awkward to use in Go, so there are safer wrappers on top of the low-level API, like `MapIterator`. The low-level API is useful when our higher-level API doesn't support a particular use case. Links --- Programs can be attached to many different points in the kernel and newer BPF hooks tend to use bpf_link to do so. Older hooks unfortunately use a combination of syscalls, netlink messages, etc. Adding support for a new link type should not pull in large dependencies like netlink, so XDP programs or tracepoints are out of scope. Each bpf_link_type has one corresponding Go type, e.g. `link.tracing` corresponds to BPF_LINK_TRACING. In general, these types should be unexported as long as they don't export methods outside of the Link interface. Each Go type may have multiple exported constructors. For example `AttachTracing` and `AttachLSM` create a tracing link, but are distinct functions since they may require different arguments. ================================================ FILE: docs/ebpf/contributing/index.md ================================================ # How to contribute Development happens on [GitHub](https://github.com/cilium/ebpf) and contributions in all forms are welcome. Please take a look at [the architecture](architecture.md) to get a better understanding of the high-level goals. ## Developer Certificate of Origin The Cilium project requires that all contributions to project repositories carry the [Developer Certificate of Origin][DCO]. This is as simple as appending a footer to your commits: ``` Signed-off-by: Your Name ``` Signing off your contributions this way means that you've read and understood the contents of the DCO. ## Running the tests Many of the tests require privileges to set resource limits and load eBPF code. The easiest way to obtain these is to run the tests with `sudo`. Run all tests with the following command: ```shell-session go test -exec sudo ./... ``` To test the current package with a different kernel version you can use [vimto]. Once you have installed `vimto` and its dependencies you can run all tests on a different kernel: ```shell-session vimto -- go test ./... ``` Use one of the [precompiled kernels](https://github.com/cilium/ci-kernels/pkgs/container/ci-kernels/versions) like so: ```shell-session vimto -kernel :mainline -- go test ./... ``` ## Regenerating testdata and source code The library includes some binary artifacts which are used for tests and some generated source code. Run `make` in the root of the repository to start this process. ```shell-session make ``` This requires Docker, as it relies on a standardized build environment to keep the build output stable. It is possible to regenerate data using Podman by overriding the `CONTAINER_*` variables: ```shell-session make CONTAINER_ENGINE=podman CONTAINER_RUN_ARGS= ``` ## Project Roles If you'd like to contribute to the library more regularly, one of the [maintainers][ebpf-lib-maintainers] can add you to the appropriate team or mark you as a code owner. Please create an issue in the repository. * [ebpf-go-contributors] * Have ["Triage"][permissions] role * May be asked to review certain parts of code * May be asked to help with certain issues * [ebpf-go-reviewers] and [ebpf-go-windows-reviewers] * Have ["Write"][permissions] role * CODEOWNER of a part of the code base * In-depth review of code, escalates to maintainers if necessary * For bugfixes: review within 1-2 days * Otherwise: review within a work week * When lacking time: escalate to maintainers, but don’t ignore * [ebpf-lib-maintainers] * Have ["Admin"][permissions] role * Manage releases * Triage incoming issues and discussions and pull in CODEOWNERS if needed * Maintain CI & project permissions * Maintain roadmap and encourage contributions towards it * Merge approved PRs [vimto]: https://github.com/lmb/vimto [permissions]: https://docs.github.com/en/organizations/managing-user-access-to-your-organizations-repositories/repository-roles-for-an-organization#permissions-for-each-role [ebpf-go-contributors]: https://github.com/cilium/community/blob/main/ladder/teams/ebpf-go-contributors.yaml [ebpf-go-reviewers]: https://github.com/cilium/community/blob/main/ladder/teams/ebpf-go-reviewers.yaml [ebpf-go-windows-reviewers]: https://github.com/cilium/community/blob/main/ladder/teams/ebpf-go-windows-reviewers.yaml [ebpf-lib-maintainers]: https://github.com/cilium/community/blob/main/roles/Maintainers.md#ebpf-lib-maintainers-maintainers-of-ciliumebpf [DCO]: https://developercertificate.org/ ================================================ FILE: docs/ebpf/contributing/new-example.md ================================================ # Adding a new example The library includes some examples to make getting started easier. The aim of the examples is to __show how the library works, not how to implement a specific thing in eBPF__. This is because the scope of eBPF is simply too large for us to cover. Please consider the following before proposing a new example: 1. What feature __of the library__ does it showcase? 2. Is there already an existing example for that feature? If yes, could it be extended without making it harder to understand? 3. How complicated is the eBPF code required to make it work? How could the amount of eBPF be minimised? Please contact the maintainers on Slack if you are in doubt about any of these points. ## What makes a good example? * It should be concise. The less code the better. * It should show a single thing. The less configurable the better. * It should be well documented. Even a novice user must be able to follow along. * It should produce meaningful output or have an easily testable effect. * It should have as few requirements on software / hardware as possible. ================================================ FILE: docs/ebpf/contributing/new-feature.md ================================================ # Adding a new feature We're very much looking for contributions which flesh out the functionality of the library. 1. Have a look at the [architecture](architecture.md) of the library if you haven't already. 2. [Join](https://ebpf.io/slack) the [#ebpf-go-dev](https://cilium.slack.com/messages/ebpf-go-dev) channel to discuss your requirements and how the feature can be implemented. Alternatively open a new Discussion if you prefer to not use Slack. The most important part is figuring out how much new exported API is necessary. **The less new API is required the easier it will be to land the feature.** Also see [API stability](#api-stability). 3. (*optional*) Create a draft PR if you want to discuss the implementation or have hit a problem. It's fine if this doesn't compile or contains debug statements. 4. Create a PR that is ready to merge. This must pass CI and have tests. ## API stability There is an emphasis on compatibility even though the library doesn't guarantee the stability of its API at the moment. 1. If possible, avoid breakage by introducing new API and deprecating the old one at the same time. If an API was deprecated in v0.x it can be removed in v0.x+1. This is especially important if there is no straighforward way to convert from the old to the new API. 2. Breaking API in a way that causes compilation failures is acceptable but must have good reasons. 3. Changing the semantics of the API without causing compilation failures is heavily discouraged. ================================================ FILE: docs/ebpf/contributing/windows.md ================================================ # Working on the Windows port The library has basic support for interacting with eBPF for Windows (efW). Things are subject to change because eBPF for Windows has not had a stable (signed) release yet. ## Differences between Linux and eBPF for Windows * eBPF for Windows has three distinct modes of operation: an interpreter, a JIT and a way to compile eBPF to a native Windows driver. The native driver can be signed using the usual mechanisms. It is likely that a stable release of eBPF for Windows will only support native drivers. The library supports both mechanisms, and relies on the JIT for its testsuite. This is because the native Windows driver mechanism still comes with significant downsides. * eBPF for Windows has a large user-space component which ebpf-go calls into via dynamic runtime linking. This uses the same infrastructure as CGo but does not require a C toolchain and is therefore trivial to distribute. ## Exported API The library only supports a subset of the full API on Windows, because the eBPF for Windows runtime doesn't yet or never will support certain features. API which are not supported will return `ErrNotSupported`. Some interfaces such as Linux-specific link types are removed outright, but this is kept to a minimum since it is very cumbersome for users to deal with API that change based on platform. ## Development setup The port is developed using a Windows VM running on a Linux host. There is a [script](https://github.com/cilium/ebpf/tree/main/scripts/windows) which automates the Windows installation. After the installation finishes you should be able to SSH to the VM and [follow the instructions to clone and build eBPF for Windows][efw-clone]. __Execute `Import-VsEnv` (installed by the setup script) to add `msbuild` to PATH.__ ``` PS C:\Users\lmbauer> Import-VsEnv ********************************************************************** ** Visual Studio 2022 Developer PowerShell v17.10.4 ** Copyright (c) 2022 Microsoft Corporation ********************************************************************** PS C:\Users\lmbauer> msbuild MSBuild version 17.10.4+10fbfbf2e for .NET Framework MSBUILD : error MSB1003: Specify a project or solution file. The current working directory does not contain a project or solution file. ``` ### Compiling the runtime !!! note "Pre-built eBPF for Windows binaries" You may be able to download precompiled binaries from the [efW CI/CD] pipeline. Look for an artifact called "Build-x64-Debug", which should contain `setup-ebpf.ps1` mentioned below. The upstream instructions currently explain how to compile the full project, which takes quite a long time. It is possible to build only some parts from the command line: * Installer: `msbuild /m /p:Configuration=Debug /p:Platform=x64 ebpf-for-windows.sln -t:"installer\ebpf-for-windows"` * Unit tests: `msbuild /m /p:Configuration=Debug /p:Platform=x64 ebpf-for-windows.sln -t:"tests\unit_tests"` * Clean: `msbuild /m /p:Configuration=Debug /p:Platform=x64 ebpf-for-windows.sln -t:"Clean"` After compilation of the installer finishes you can install the runtime: ``` .\x64\Debug\setup-ebpf.ps1 ``` _(You can pass `-Uninstall` to the script to remove a previous installation.)_ You can now run the Go unit tests of the library: ``` go test ./internal/sys ``` !!! note "Tests fail with `load ebpfapi.dll: not found`" This usually means that either the Windows runtime is not installed or that the efW installation folder is not on the PATH yet. The latter tends to happen when executing tests via ssh, since sshd doesn't pick up changes in the environment without restarting. Restart the service by issuing `Restart-Service sshd` from a powershell prompt and then re-establish the ssh session. ### efW extensions efW separates the runtime from the implementation of the various hooks / program types. The hooks are shipped as extensions in a separate Windows kernel service. Installing an extension involves two steps: 1. Installing the extension as a Windows kernel service. 2. Registering the program type(s) in the "eBPF Store". For [ntosebpfext] the setup process looks as follows, assuming the extension has already been built: ``` PS C:\Users\lorenz\ntosebpfext> .\tests\process_monitor.Tests\Setup-ProcessMonitorTests.ps1 -ArtifactsRoot .\x64\Debug\ Creating and starting the ntosebpfext service from C:\Users\lorenz\ntosebpfext\x64\Debug\\ntosebpfext.sys. PS C:\Users\lorenz\ntosebpfext> .\x64\Debug\ntos_ebpf_ext_export_program_info.exe Exporting program information. Exporting section information. ``` ## Debugging Debugging on Windows is a bit painful, since we call from Go into `ebpfapi.dll` which is implemented in C++. There is currently no debugger which understands both C++ and Go. The most fruitful approach is to use [WinDbg]. It will catch exceptions in C++ code, give useful backtraces and allows stepping through source code. Run the WinDbg GUI as an administrator and then open the executable via `Ctrl-E`. At the prompt you can set a breakpoint on `bpf()`: ``` bu ebpfapi!bpf g ``` This will halt execution once the library calls into `bpf()` inside `ebpfapi.dll`. Use the [`CDB` commands][cdb-commands] or the GUI to navigate. It may be possible to use [CDB] to debug via the command line, but this doesn't seem to work via ssh. ### Windows trace log The `testmain` package has a small bit of instrumentation which enables tracing of the efW subsystem on demand. Simply pass the `-trace-log` flag when running tests: ``` PS C:\Users\lorenz\ebpf> go test -run '^TestMap$' -v -trace-log === RUN TestMap map_test.go:54: WindowsArray#3 --- PASS: TestMap (0.02s) PASS 100% [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] base ebpf_api_initiate returned success entry-exit ebpf_map_create entry-exit _create_map entry-exit _ebpf_core_protocol_create_map entry-exit ebpf_core_create_map entry-exit ebpf_map_create base eBPF object initialized object=0xFFFF8982A875BF30 object_type= 1 base ebpf_map_create returned success entry-exit ebpf_handle_create core ebpf_handle_create: returning handle value=376 base ebpf_handle_create returned success base ebpf_core_create_map returned success ... ``` Enabling the instrumentation can fail if the tests crashed too often. In that case you can manually stop and remove the tracing entries via the GUI: `compmgmt.msc` -> "Performance" -> "Data Collector Sets" -> "Event Trace Sessions". Look for sessions containing "ebpf-go". Rebooting might also help. ### Interpreting error codes efW uses several layers of error codes. * Windows [system error codes] and [RPC errors] are sometimes exposed by exceptions, which appear in the trace log. * [`ebpf_result_t`][ebpf_result_t]: wraps Windows errors and is returned from "native" efW API. * Unix-style errno, as defined by Windows' [`errno.h`][errno.h]: wraps `ebpf_result_t` and is returned from libbpf and `bpf()` API. Unfortunately not all [errno values] line up with Linux. This usually manifests in cryptic `Errno(119)` errors. [efw-clone]: https://github.com/microsoft/ebpf-for-windows/blob/main/docs/GettingStarted.md#how-to-clone-and-build-the-project-using-visual-studio [CDB]: https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/debugging-using-cdb-and-ntsd [cdb-commands]: https://learn.microsoft.com/en-us/windows-hardware/drivers/debuggercmds/commands [WinDbg]: https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/ [ebpf_result_t]: https://github.com/microsoft/ebpf-for-windows/blob/main/include/ebpf_result.h [system error codes]: https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- [RPC errors]: https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--1700-3999- [errno.h]: https://learn.microsoft.com/en-us/cpp/c-runtime-library/errno-constants?view=msvc-170 [errno values]: https://github.com/microsoft/ebpf-for-windows/issues/3729#issuecomment-2289025455 [ntosebpfext]: https://github.com/microsoft/ntosebpfext [access the debug version of the msvc runtime]: https://github.com/microsoft/ebpf-for-windows/issues/3872 [msvc debug DLLs]: https://github.com/microsoft/ebpf-for-windows/blob/7005b7ff47e7281843d6b414cd69fc5a979507c8/scripts/setup-ebpf.ps1#L17-L27 [efW CI/CD]: https://github.com/microsoft/ebpf-for-windows/actions/workflows/cicd.yml?query=branch%3Amain+is%3Acompleted ================================================ FILE: docs/ebpf/guides/getting-started.md ================================================ # Getting Started with eBPF in Go In this guide, we'll walk you through building a new eBPF-powered Go application from scratch. We'll introduce the toolchain, write a minimal eBPF C example and compile it using bpf2go. Then, we'll put together a Go application that loads the eBPF program into the kernel and periodically displays its output. The application attaches an eBPF program to an XDP hook that counts the number of packets received by a physical interface. Filtering and modifying packets is a major use case for eBPF, so you'll see a lot of its features being geared towards it. However, eBPF's capabilities are ever-growing, and it has been adopted for tracing, systems and application observability, security and much more. ## eBPF C program !!! abstract "Dependencies" To follow along with the example, you'll need: * Linux kernel version 5.7 or later, for bpf_link support * LLVM 11 or later [^1] (`clang` and `llvm-strip`) * libbpf headers [^2] * Linux kernel headers [^3] * Go compiler version supported by {{ proj }}'s Go module [^1]: Use `clang --version` to check which version of LLVM you have installed. Refer to your distribution's package index to finding the right packages to install, as this tends to vary wildly across distributions. Some distributions ship `clang` and `llvm-strip` in separate packages. [^2]: For Debian/Ubuntu, you'll typically need `libbpf-dev`. On Fedora, it's `libbpf-devel`. [^3]: On AMD64 Debian/Ubuntu, install `linux-headers-amd64`. On Fedora, install `kernel-devel`. On Debian, you may also need `ln -sf /usr/include/asm-generic/ /usr/include/asm` since the example expects to find ``. Let's begin by writing our eBPF C program, as its structure will be used as the basis for generating Go boilerplate. Click the :material-plus-circle: annotations in the code snippet for a detailed explanation of the individual components. {{ c_example('getting_started_counter', title='counter.c') }} 1. When putting C files alongside Go files, they need to be excluded by a Go build tag, otherwise `go build` will complain with `C source files not allowed when not using cgo or SWIG`. The Go toolchain can safely ignore our eBPF C files. 2. Include headers containing the C macros used in the example. Identifiers such as `__u64` and `BPF_MAP_TYPE_ARRAY` are shipped by the Linux kernel, with `__uint`, `__type`, `SEC` and BPF helper definitions being provided by libbpf. 3. Declare a BPF map called `pkt_count`, an Array-type Map holding a single u64 value. See `man bpf` or the online [bpf man pages](https://man7.org/linux/man-pages/man2/bpf.2.html) for an overview of all available map types.

For this example, we went with an array since it's a well-known data structure you're likely familiar with. In BPF, arrays are preallocated and zeroed, making them safe and ready to use without any initialization. 4. The Map definition is placed in the `.maps` ELF section, which is where {{ proj }} expects to find it. 5. In BPF, not all programs are equal. Some act on raw packets, some execute within the context of kernel or user space functions, while others expect to be run against an `__sk_buff`. These differences are encoded in the Program Type. libbpf introduced a set of conventions around which ELF sections correspond to which type. In this example, we've chosen `xdp` since we'll attach this program to the XDP hook later. 6. There's only one possible element in `pkt_count` since we've specified a `max_entries` value of 1. We'll always access the 0th element of the array. 7. Here, we're asking the BPF runtime for a pointer to the 0th element of the `pkt_count` Map.

`bpf_map_lookup_elem` is a BPF helper declared in `docs.h`. Helpers are small pieces of logic provided by the kernel that enable a BPF program to interact with its context or other parts of the kernel. Discover all BPF helpers supported by your kernel using `man bpf-helpers` or the online [bpf-helpers man pages](https://man7.org/linux/man-pages/man7/bpf-helpers.7.html). 8. All Map lookups can fail. If there's no element for the requested `key` in the Map, `count` will hold a null pointer. The BPF verifier is very strict about checking access to potential null pointers, so any further access to `count` needs to be gated by a null check. 9. Atomically increase the value pointed to by `count` by 1. It's important to note that on systems with SMP enabled (most systems nowadays), the same BPF program can be executed concurrently.

Even though we're loading only one 'copy' of our Program, accompanied by a single `pkt_count` Map, the kernel may need to process incoming packets on multiple receive queues in parallel, leading to multiple instances of the program being executed, and `pkt_count` effectively becoming a piece of shared memory. Use atomics to avoid dirty reads/writes. 10. XDP allows for dropping packets early, way before it's passed to the kernel's networking stack where routing, firewalling (ip/nftables) and things like TCP and sockets are implemented. We issue the `XDP_PASS` verdict to avoid ever interfering with the kernel's network stack. 11. Since some BPF helpers allow calling kernel code licensed under GPLv2, BPF programs using specific helpers need to declare they're (at least partially) licensed under GPL. Dual-licensing is possible, which we've opted for here with `Dual MIT/GPL`, since {{ proj }} is MIT-licensed. Create an empty directory and save this file as `counter.c`. In the next step, we'll set up the necessary bits to compile our eBPF C program using `bpf2go`. ## Compile eBPF C and generate scaffolding using bpf2go With the `counter.c` source file in place, create another file called `gen.go` containing a `//go:generate` statement. This invokes `bpf2go` when running `go generate` in the project directory. Aside from compiling our eBPF C program, bpf2go will also generate some scaffolding code we'll use to load our eBPF program into the kernel and interact with its various components. This greatly reduces the amount of code we need to write to get up and running. {{ go_example('getting_started_gen', title='gen.go') }} !!! tip "" Using a dedicated file for your package's `//go:generate` statement(s) is neat for keeping them separated from application logic. At this point in the guide, we don't have a `main.go` file yet. Feel free to include it in existing Go source files if you prefer. Before using the Go toolchain, Go wants us to declare a Go module. This command should take care of that: ```{ .shell-session data-copy="go mod init ebpf-test && go mod tidy" } % go mod init ebpf-test go: creating new go.mod: module ebpf-test go: to add module requirements and sums: go mod tidy % go mod tidy ``` First, add `bpf2go` as a tool dependency to your Go module. This ensures the version of `bpf2go` used by the Go toolchain always matches your version of the library. ```{ .shell-session data-copy="go get -tool github.com/cilium/ebpf/cmd/bpf2go" } % go get -tool github.com/cilium/ebpf/cmd/bpf2go ``` Now we're ready to run `go generate`: ```{ .shell-session data-copy="go generate" } % go generate Compiled /home/timo/getting_started/counter_bpfel.o Stripped /home/timo/getting_started/counter_bpfel.o Wrote /home/timo/getting_started/counter_bpfel.go Compiled /home/timo/getting_started/counter_bpfeb.o Stripped /home/timo/getting_started/counter_bpfeb.o Wrote /home/timo/getting_started/counter_bpfeb.go ``` `bpf2go` built `counter.c` into `counter_bpf*.o` behind the scenes using `clang`. It generated two object files and two corresponding Go source files based on the contents of the object files. Do not remove any of these, we'll need them later. Let's inspect one of the generated .go files: {{ go_example('counterPrograms', title='counter_bpfel.go', signature=True) }} Neat! Looks like bpf2go automatically generated a scaffolding for interacting with our `count_packets` Program from Go. In the next step, we'll explore how to load our program into the kernel and put it to work by attaching it to an XDP hook! ## The Go application Finally, with our eBPF C code compiled and Go scaffolding generated, all that's left is writing the Go code responsible for loading and attaching the program to a hook in the Linux kernel. Click the :material-plus-circle: annotations in the code snippet for some of the more intricate details. Note that we won't cover anything related to the Go standard library here. {{ go_example('getting_started_main', title='main.go') }} 1. Linux kernels before 5.11 use RLIMIT_MEMLOCK to control the maximum amount of memory allocated for a process' eBPF resources. By default, it's set to a relatively low value. See [Resource Limits](../concepts/rlimit.md) for a deep dive. 1. `counterObjects` is a struct containing nil pointers to Map and Program objects. A subsequent call to `loadCounterObjects` populates these fields based on the struct tags declared on them. This mechanism saves a lot of repetition that would occur by checking a Collection for Map and Program objects by string.

As an added bonus, `counterObjects` adds type safety by turning these into compile-time lookups. If a Map or Program doesn't appear in the ELF, it won't appear as a struct field and your Go application won't compile, eliminating a whole class of runtime errors. 1. Close all file descriptors held by `objs` right before the Go application terminates. See [Object Lifecycle](../concepts/object-lifecycle.md) for a deep dive. 1. Associate the `count_packets` (stylized in the Go scaffolding as `CountPackets`) eBPF program with `eth0`. This returns a {{ godoc('link/Link') }} abstraction. 1. Close the file descriptor of the Program-to-interface association. Note that this will stop the Program from executing on incoming packets if the Link was not {{ godoc('link/Link.Pin') }}ed to the bpf file system. 1. Load a uint64 stored at index 0 from the `pkt_count` Map (stylized in the Go scaffolding as `PktCount`). This corresponds to the logic in `counter.c`. Save this file as `main.go` in the same directory alongside `counter.c` and `gen.go`. ## Building and running the Go application Now `main.go` is in place, we can finally compile and run our Go application! ```{ .shell-session data-copy="go build && sudo ./ebpf-test" } % go build && sudo ./ebpf-test 2023/09/20 17:18:43 Counting incoming packets on eth0.. 2023/09/20 17:18:47 Received 0 packets 2023/09/20 17:18:48 Received 4 packets 2023/09/20 17:18:49 Received 11 packets 2023/09/20 17:18:50 Received 15 packets ``` Generate some traffic on eth0 and you should see the counter increase. ### Iteration Workflow When iterating on the C code, make sure to keep generated files up-to-date. Without re-running bpf2go, the eBPF C won't be recompiled, and any changes made to the C program structure won't be reflected in the Go scaffolding. ```{ .shell-session data-copy="go generate && go build && sudo ./ebpf-test" } % go generate && go build && sudo ./ebpf-test ``` ## What's Next? Congratulations, you've just built your (presumably) first eBPF-powered Go app! Hopefully, this guide piqued your interest and gave you a better sense of what eBPF can do and how it works. With XDP, we've only barely scratched the surface of eBPF's many use cases and applications. For more easily-accessible examples, see [the main repository's examples/ folder](https://github.com/cilium/ebpf/tree/main/examples). It demonstrates use cases like tracing user space applications, extracting information from the kernel, attaching eBPF programs to network sockets and more. Follow our other guides to continue on your journey of shipping a portable eBPF-powered application to your users. ================================================ FILE: docs/ebpf/guides/portable-ebpf.md ================================================ # Shipping Portable eBPF-powered Applications !!! incomplete This guide builds on Getting Started. Document what the various ways are for making tools portable across kernel versions and what the various CO-RE techniques are. !!! tip "" We recommend building eBPF C code from within a container with a stable LLVM toolchain, as well as checking all generated `.o` and `.go` files into source control. This buys you fully-reproducible builds, prevents bugs due to team members using different LLVM versions and makes your packages fully independent and `go run`nable. It also prevents PII from leaking into ELFs in the form of absolute paths to `.c` source files in DWARF info. ### Cross-compiling You may have noticed bpf2go generating two sets of files: - `*_bpfel.o` and `*_bpfel.go` for little-endian architectures like amd64, arm64, riscv64 and loong64 - `*_bpfeb.o` and `*_bpfeb.go` for big-endian architectures like s390(x), mips and sparc Both sets of .go files contain a `//go:embed` statement that slurps the contents of the respective .o files into a byte slice at compile time. The result is a standalone Go application binary that can be deployed to a target machine without any of the .o files included. To further reduce runtime dependencies, add `CGO_ENABLED=0` to `go build` and your application won't depend on libc. (assuming none of your other dependencies require cgo) Moreover, because both eBPF objects and Go scaffolding are generated for both big- and little-endian architectures, cross-compiling your Go application is as simple as setting the right `GOARCH` value at compile time. Pulling it all together, for building an eBPF-powered Go application for a Raspberry Pi running a 64-bit Linux distribution: ```shell-session CGO_ENABLED=0 GOARCH=arm64 go build ``` ### Compile Once - Run Everywhere? Since we can generate a standalone binary and deploy it to any system, does that mean tools built using {{ proj }} will magically work anywhere? Unfortunately, no, not really. The kernel's internal data structures change as the kernel progresses in development, just like any other software. Differences in compile-time configuration affect data structures and the presence of certain kernel symbols. This means that, even when using the exact same kernel release, no two Linux distributions will be the same when it comes to data layout. This is problematic for authors that want to ship a single binary to their users and expect it to work across multiple distributions and kernel versions. In response to this, the term *Compile Once - Run Everywhere* was coined to describe the collection of techniques employed to achieve universal interoperability for eBPF. This technique relies on type information encoded in BPF Type Format (BTF) to be shipped with the kernel so memory accesses can be adjusted right before loading the eBPF program into the kernel. Alternatively, you may opt for shipping a full LLVM compiler toolchain along with your application and recompiling the eBPF C against Linux kernel headers present on the target machine. This approach is out of scope of the {{ proj }} documentation. ================================================ FILE: docs/ebpf/guides/windows-support.md ================================================ # Windows support The library has preliminary support for the [eBPF for Windows] runtime, allowing you to build Go applications for Windows using the same APIs as on Linux. !!! warning "Feature parity" efW doesn't have feature parity with Linux. Many APIs in the library will return `ErrNotSupported` in this case. !!! warning "Binary compatibility" efW is not binary compatible with Linux. It is not possible to compile an eBPF program for Linux and use it on Windows. ## Platform specific constants efW only provides [source compatibility] with Linux. While certain Linux map or program types have an equivalent on Windows, they don't always behave the same. For this reason, the various type enumerations have completely distinct values on Windows, for example `WindowsHashMap` is the equivalent of `HashMap`. Attempting to create a `HashMap` on Windows will return an error, and vice versa. ## Platform specific ELFs !!! note "" Loading Windows ELFs is not supported yet. ELFs compiled against Linux and Windows headers are not binary compatible. Add the following to ELFs targeting Windows until there is an [official way to declare the platform](https://github.com/microsoft/ebpf-for-windows/issues/3956): ```C const bool __ebpf_for_windows_tag __attribute__((section(".ebpf_for_windows"))) = true; ``` ## Working with signed programs The runtime will most likely require all eBPF programs to be signed by Microsoft. Signing programs relies on packaging eBPF `.c` files as drivers using the [native code pipeline], converting bytecode into a `.sys` file. The interface to load such drivers does not allow modifying the bytecode or map definitions, therefore you can't interact with them via `CollectionSpec`, etc. Instead you must load them via `LoadCollection`: ```go coll, err := LoadCollection("path\\to\\driver.sys") ``` The returned Collection contains Maps and Programs which you can interact with as usual. [eBPF for Windows]: https://github.com/microsoft/ebpf-for-windows [source compatibility]: https://github.com/microsoft/ebpf-for-windows?tab=readme-ov-file#2-does-this-provide-app-compatibility-with-ebpf-programs-written-for-linux [native code pipeline]: https://github.com/microsoft/ebpf-for-windows/blob/main/docs/NativeCodeGeneration.md [LoadCollection]: https://pkg.go.dev/github.com/cilium/ebpf#LoadCollection ================================================ FILE: docs/ebpf/index.md ================================================

The eBPF Library for Go

![Honeygopher](ebpf-go.png){ align=right width="180" } [![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf) :ebpf-go: {{ proj }} is a Go library for working with :ebee-color: eBPF. It does not depend on C, libbpf, or any other Go libraries other than the standard library, making it an excellent choice for writing self-contained, portable tools that run on a variety of architectures. This documentation aims to provide a central resource for learning how to build Go applications that use eBPF. ## Installing To add {{ proj }} as a dependency to an existing Go module, run this from within the module's directory: ``` go get github.com/cilium/ebpf ``` ## Target Audience This documentation assumes familiarity with the basic concepts and terminology of eBPF, as well as a basic understanding of the Go toolchain and how to write idiomatic Go code. For a high-level understanding of what eBPF is and how it works, please see [the eBPF introduction at :ebee-color: ebpf.io](https://ebpf.io/what-is-ebpf). ## Examples Discover [projects using {{ proj }} here](users.md). The repository contains an [examples/ directory](https://github.com/cilium/ebpf/tree/main/examples) with minimal demo applications that can be tested on any supported Linux machine. ================================================ FILE: docs/ebpf/stylesheets/extra.css ================================================ /* Tagline on landing page. */ .tagline { font-size: 3em; font-weight: 900; letter-spacing: -0.5px; background: linear-gradient(120deg, #4051B5, 35%, #6AD6E4); background-clip: text; -webkit-background-clip: text; -webkit-text-fill-color: transparent; } /* :progress-wrench: Custom 'incomplete' admonition for sections that need work or maintenance. Create blocks using '!!! incomplete'. */ :root { --md-admonition-icon--incomplete: url('data:image/svg+xml;charset=utf-8,') } .md-typeset .admonition.incomplete, .md-typeset details.incomplete { border-color: rgb(255, 204, 77); } .md-typeset .incomplete>.admonition-title, .md-typeset .incomplete>summary { background-color: rgba(255, 204, 77, 0.1); } .md-typeset .incomplete>.admonition-title::before, .md-typeset .incomplete>summary::before { background-color: rgb(255, 204, 77); -webkit-mask-image: var(--md-admonition-icon--incomplete); mask-image: var(--md-admonition-icon--incomplete); } /* gp and go are the classes used for prompt and output in shell-session code blocks. Prevent these from being highlighted as it hurts UX. */ .highlight .gp, .highlight .go { user-select: none; } .md-typeset { .md-badge { font-size: 0.85em; .md-badge__icon { padding: 0.4em; background: var(--md-accent-fg-color--transparent); border-start-start-radius: 0.1em; border-end-start-radius: 0.1em; } .md-badge__text { padding: 0.4em 0.8em; border-start-end-radius: 0.1em; border-end-end-radius: 0.1em; box-shadow: 0 0 0 1px inset var(--md-accent-fg-color--transparent); } } .md-badge--right { float: right; margin-left: 0.35em; } } ================================================ FILE: docs/ebpf/users.md ================================================ # Projects built with {{ proj }} Below is a non-comprehensive list of open-source software built with {{ proj }}, just for inspiration or to gain a better understanding of how to tackle certain problems using eBPF. A list of :fontawesome-brands-golang: {{ proj }} importers can be found on [Sourcegraph]. If you'd like to include a project on this page, feel free to open a pull request. [`Cilium`](https://github.com/cilium/cilium) : Kubernetes-oriented Container Networking Interface implementation providing network policy and observability. [`containerd`](https://github.com/containerd/cgroups) & [`runc`](https://github.com/opencontainers/runc) : Used by Docker and podman, these use eBPF for implementing device filters in cgroups. [`coroot`](https://github.com/coroot/coroot) : Zero-instrumentation observability featuring root cause analysis and anomaly detection. [`datadog-agent`](https://github.com/DataDog/datadog-agent) : The Datadog agent, the component responsible for collecting system and application metrics and shipping them to the Datadog platform. [`Delve`](https://github.com/go-delve/delve) : A debugger for the Go programming language. Uses eBPF uprobes for tracing user space code execution. [`gVisor`](https://github.com/google/gvisor) : gVisor relies on eBPF for implementing various forms of guest/workload isolation and security. [`Inspektor Gadget`](https://github.com/inspektor-gadget/inspektor-gadget) : A collection of tools to debug and inspect Kubernetes resources and applications. Reimplements many of the BCC tools for easy deployment onto a Kubernetes cluster. [`Istio`](https://github.com/istio/istio) : In Istio’s ambient mode, eBPF is used for redirecting application traffic to the zero-trust tunnel on the node. [`KubeArmor`](https://github.com/kubearmor/KubeArmor) : KubeArmor allows restricting the behaviour of Pods, containers and Kubernetes nodes at the system level. [`kube-proxy-ng`](https://github.com/kubernetes-sigs/kpng) : Emerging eBPF-based `kube-proxy` implementation, developed by the upstream Kubernetes project. [`OpenShift`](https://github.com/openshift/ingress-node-firewall) : OpenShift's ingress node firewall is implemented using eBPF. [`pwru`](https://github.com/cilium/pwru) : Packet, where are you? `tcpdump`, but for tracing a packet's journey through the kernel. [`Pyroscope`](https://github.com/grafana/pyroscope) : From Grafana, open source continuous profiling platform. Flame graphs! [`Tetragon`](https://github.com/cilium/tetragon) : eBPF-based security framework, also providing observability and runtime enforcement. [`Tubular`](https://github.com/cloudflare/tubular) : From Cloudflare, bind a service to any IP or port. See [the announcement blog post](https://blog.cloudflare.com/tubular-fixing-the-socket-api-with-ebpf/) for a deep dive into why it was created and how it works. [Sourcegraph]: https://sourcegraph.com/search?q=context:global+lang:Go+type:file+github.com/cilium/ebpf+-repo:%5Egithub%5C.com/cilium/ebpf%24+-path:%5Evendor/+select:repo+&patternType=standard&sm=1&groupBy=repo ================================================ FILE: docs/examples/docs.c ================================================ //go:build ignore // DocMyMapProgram { #include #include // Declare a hash map called 'my_map' with a u32 key and a u64 value. // The __uint, __type and SEC macros are from libbpf's bpf_helpers.h. struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, __u32); __type(value, __u64); __uint(max_entries, 1); } my_map SEC(".maps"); // Declare a dummy socket program called 'my_prog'. SEC("socket") int my_prog() { return 0; } // } ================================================ FILE: docs/examples/docs_test.go ================================================ //go:build linux package examples import ( "fmt" "github.com/cilium/ebpf" ) func DocLoadCollectionSpec() { // Parse an ELF into a CollectionSpec. // bpf_prog.o is the result of compiling BPF C code. spec, err := ebpf.LoadCollectionSpec("bpf_prog.o") if err != nil { panic(err) } // Look up the MapSpec and ProgramSpec in the CollectionSpec. m := spec.Maps["my_map"] p := spec.Programs["my_prog"] // Note: We've omitted nil checks for brevity, take a look at // LoadAndAssign for an automated way of checking for maps/programs. // Inspect the map and program type. fmt.Println(m.Type, p.Type) // Print the map's key and value BTF types. fmt.Println(m.Key, m.Value) // Print the program's instructions in a human-readable form, // similar to llvm-objdump -S. fmt.Println(p.Instructions) } func DocNewCollection() { spec, err := ebpf.LoadCollectionSpec("bpf_prog.o") if err != nil { panic(err) } // Instantiate a Collection from a CollectionSpec. coll, err := ebpf.NewCollection(spec) if err != nil { panic(err) } // Close the Collection before the enclosing function returns. defer coll.Close() // Obtain a reference to 'my_map'. m := coll.Maps["my_map"] // Set map key '1' to value '2'. if err := m.Put(uint32(1), uint64(2)); err != nil { panic(err) } } // DocLoadAndAssignObjs { type myObjs struct { MyMap *ebpf.Map `ebpf:"my_map"` MyProg *ebpf.Program `ebpf:"my_prog"` } func (objs *myObjs) Close() error { if err := objs.MyMap.Close(); err != nil { return err } if err := objs.MyProg.Close(); err != nil { return err } return nil } // } func DocLoadAndAssign() { spec, err := ebpf.LoadCollectionSpec("bpf_prog.o") if err != nil { panic(err) } // Insert only the resources specified in 'obj' into the kernel and assign // them to their respective fields. If any requested resources are not found // in the ELF, this will fail. Any errors encountered while loading Maps or // Programs will also be returned here. var objs myObjs if err := spec.LoadAndAssign(&objs, nil); err != nil { panic(err) } defer objs.Close() // Interact with MyMap through the custom struct. if err := objs.MyMap.Put(uint32(1), uint64(2)); err != nil { panic(err) } } func DocBTFTypeByName() { spec, err := ebpf.LoadCollectionSpec("bpf_prog.o") if err != nil { panic(err) } // Look up the __64 type declared in linux/bpf.h. t, err := spec.Types.AnyTypeByName("__u64") if err != nil { panic(err) } fmt.Println(t) } ================================================ FILE: docs/examples/features_test.go ================================================ //go:build linux package examples import ( "errors" "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" ) func DocDetectXDP() { err := features.HaveProgramType(ebpf.XDP) if errors.Is(err, ebpf.ErrNotSupported) { fmt.Println("XDP program type is not supported") return } if err != nil { // Feature detection was inconclusive. // // Note: always log and investigate these errors! These can be caused // by a lack of permissions, verifier errors, etc. Unless stated // otherwise, probes are expected to be conclusive. Please file // an issue if this is not the case in your environment. panic(err) } fmt.Println("XDP program type is supported") } ================================================ FILE: docs/examples/getting_started/counter.c ================================================ // getting_started_counter { // (1)! //go:build ignore #include // (2)! #include struct { __uint(type, BPF_MAP_TYPE_ARRAY); // (3)! __type(key, __u32); __type(value, __u64); __uint(max_entries, 1); } pkt_count SEC(".maps"); // (4)! // count_packets atomically increases a packet counter on every invocation. SEC("xdp") // (5)! int count_packets() { __u32 key = 0; // (6)! __u64 *count = bpf_map_lookup_elem(&pkt_count, &key); // (7)! if (count) { // (8)! __sync_fetch_and_add(count, 1); // (9)! } return XDP_PASS; // (10)! } char __license[] SEC("license") = "Dual MIT/GPL"; // (11)! // } ================================================ FILE: docs/examples/getting_started/counter_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadCounter returns the embedded CollectionSpec for counter. func loadCounter() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_CounterBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load counter: %w", err) } return spec, err } // loadCounterObjects loads counter and converts it into a struct. // // The following types are suitable as obj argument: // // *counterObjects // *counterPrograms // *counterMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadCounterObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadCounter() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // counterSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterSpecs struct { counterProgramSpecs counterMapSpecs counterVariableSpecs } // counterProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterProgramSpecs struct { CountPackets *ebpf.ProgramSpec `ebpf:"count_packets"` } // counterMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterMapSpecs struct { PktCount *ebpf.MapSpec `ebpf:"pkt_count"` } // counterVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterVariableSpecs struct { } // counterObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterObjects struct { counterPrograms counterMaps counterVariables } func (o *counterObjects) Close() error { return _CounterClose( &o.counterPrograms, &o.counterMaps, ) } // counterMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterMaps struct { PktCount *ebpf.Map `ebpf:"pkt_count"` } func (m *counterMaps) Close() error { return _CounterClose( m.PktCount, ) } // counterVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterVariables struct { } // counterPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterPrograms struct { CountPackets *ebpf.Program `ebpf:"count_packets"` } func (p *counterPrograms) Close() error { return _CounterClose( p.CountPackets, ) } func _CounterClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed counter_bpfeb.o var _CounterBytes []byte ================================================ FILE: docs/examples/getting_started/counter_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadCounter returns the embedded CollectionSpec for counter. func loadCounter() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_CounterBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load counter: %w", err) } return spec, err } // loadCounterObjects loads counter and converts it into a struct. // // The following types are suitable as obj argument: // // *counterObjects // *counterPrograms // *counterMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadCounterObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadCounter() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // counterSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterSpecs struct { counterProgramSpecs counterMapSpecs counterVariableSpecs } // counterProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterProgramSpecs struct { CountPackets *ebpf.ProgramSpec `ebpf:"count_packets"` } // counterMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterMapSpecs struct { PktCount *ebpf.MapSpec `ebpf:"pkt_count"` } // counterVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type counterVariableSpecs struct { } // counterObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterObjects struct { counterPrograms counterMaps counterVariables } func (o *counterObjects) Close() error { return _CounterClose( &o.counterPrograms, &o.counterMaps, ) } // counterMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterMaps struct { PktCount *ebpf.Map `ebpf:"pkt_count"` } func (m *counterMaps) Close() error { return _CounterClose( m.PktCount, ) } // counterVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterVariables struct { } // counterPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadCounterObjects or ebpf.CollectionSpec.LoadAndAssign. type counterPrograms struct { CountPackets *ebpf.Program `ebpf:"count_packets"` } func (p *counterPrograms) Close() error { return _CounterClose( p.CountPackets, ) } func _CounterClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed counter_bpfel.o var _CounterBytes []byte ================================================ FILE: docs/examples/getting_started/gen.go ================================================ //go:build linux // getting_started_gen { package main //go:generate go tool bpf2go -tags linux counter counter.c // } ================================================ FILE: docs/examples/getting_started/main.go ================================================ //go:build linux // getting_started_main { package main import ( "log" "net" "os" "os/signal" "time" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) func main() { // Remove resource limits for kernels <5.11. if err := rlimit.RemoveMemlock(); err != nil { // (1)! log.Fatal("Removing memlock:", err) } // Load the compiled eBPF ELF and load it into the kernel. var objs counterObjects // (2)! if err := loadCounterObjects(&objs, nil); err != nil { log.Fatal("Loading eBPF objects:", err) } defer objs.Close() // (3)! ifname := "eth0" // Change this to an interface on your machine. iface, err := net.InterfaceByName(ifname) if err != nil { log.Fatalf("Getting interface %s: %s", ifname, err) } // Attach count_packets to the network interface. link, err := link.AttachXDP(link.XDPOptions{ // (4)! Program: objs.CountPackets, Interface: iface.Index, }) if err != nil { log.Fatal("Attaching XDP:", err) } defer link.Close() // (5)! log.Printf("Counting incoming packets on %s..", ifname) // Periodically fetch the packet counter from PktCount, // exit the program when interrupted. tick := time.Tick(time.Second) stop := make(chan os.Signal, 5) signal.Notify(stop, os.Interrupt) for { select { case <-tick: var count uint64 err := objs.PktCount.Lookup(uint32(0), &count) // (6)! if err != nil { log.Fatal("Map lookup:", err) } log.Printf("Received %d packets", count) case <-stop: log.Print("Received signal, exiting..") return } } } // } ================================================ FILE: docs/examples/rlimit_test.go ================================================ //go:build linux package examples // DocRlimit { import "github.com/cilium/ebpf/rlimit" func init() { if err := rlimit.RemoveMemlock(); err != nil { panic(err) } } // } ================================================ FILE: docs/examples/variables/gen.go ================================================ package main //go:generate go tool bpf2go variables variables.c ================================================ FILE: docs/examples/variables/main.go ================================================ package main import ( "fmt" "github.com/cilium/ebpf" ) func main() { DocVariablesSetConst() DocVariablesSetGlobal() } // Full example written to be displayed in its entirety, so is commented generously. func DocVariablesSetConst() { // Load the object file from disk using a bpf2go-generated scaffolding. spec, err := loadVariables() if err != nil { panicf("loading CollectionSpec: %s", err) } // Set the 'const_u32' variable to 42 in the CollectionSpec. want := uint32(42) // (1)! if err := spec.Variables["const_u32"].Set(want); err != nil { panicf("setting variable: %s", err) } // Load the CollectionSpec. // // Note: modifying spec.Variables after this point is ineffectual! // Modifying *Spec resources does not affect loaded/running BPF programs. var obj variablesPrograms if err := spec.LoadAndAssign(&obj, nil); err != nil { panicf("loading BPF program: %s", err) } fmt.Println("Running program with const_u32 set to", want) // Dry-run the BPF program with an empty context. ret, _, err := obj.ConstExample.Test(make([]byte, 15)) // (2)! if err != nil { panicf("running BPF program: %s", err) } if ret != want { panicf("unexpected return value %d", ret) } fmt.Println("BPF program returned", ret) // Output: // Running program with const_u32 set to 42 // BPF program returned 42 } func DocVariablesSetGlobal() { spec, err := loadVariables() if err != nil { panicf("loading CollectionSpec: %s", err) } // DocVariablesSetGlobalU16 { set := uint16(9000) if err := spec.Variables["global_u16"].Set(set); err != nil { panicf("setting variable: %s", err) } // } coll, err := ebpf.NewCollection(spec) if err != nil { panicf("loading BPF program: %s", err) } fmt.Println("Running program with global_u16 set to", set) // DocVariablesSetGlobalRun { for range 3 { ret, _, err := coll.Programs["global_example"].Test(make([]byte, 15)) if err != nil { panicf("running BPF program: %s", err) } fmt.Println("BPF program returned", ret) } // Output: // Running program with global_u16 set to 9000 // BPF program returned 9000 // BPF program returned 9001 // BPF program returned 9002 // } // DocVariablesGetGlobalU16 { var global_u16 uint16 if err := coll.Variables["global_u16"].Get(&global_u16); err != nil { panicf("getting variable: %s", err) } fmt.Println("Variable global_u16 is now", global_u16) // Output: // Variable global_u16 is now 9003 // } } func panicf(format string, args ...interface{}) { panic(fmt.Sprintf(format, args...)) } ================================================ FILE: docs/examples/variables/variables.c ================================================ //go:build ignore #include #include // Remove when toolchain Docker image ships with 5.13+ headers. #define __hidden __attribute__((visibility("hidden"))) // variables_const { volatile const __u32 const_u32; SEC("socket") int const_example() { return const_u32; } // } // variables_global { volatile __u16 global_u16; SEC("socket") int global_example() { global_u16++; return global_u16; } // } // variables_hidden { __hidden __u64 hidden_var; SEC("socket") int hidden_example() { hidden_var++; return hidden_var; } // } ================================================ FILE: docs/examples/variables/variables_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build mips || mips64 || ppc64 || s390x package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadVariables returns the embedded CollectionSpec for variables. func loadVariables() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_VariablesBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load variables: %w", err) } return spec, err } // loadVariablesObjects loads variables and converts it into a struct. // // The following types are suitable as obj argument: // // *variablesObjects // *variablesPrograms // *variablesMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadVariablesObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadVariables() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // variablesSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesSpecs struct { variablesProgramSpecs variablesMapSpecs variablesVariableSpecs } // variablesProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesProgramSpecs struct { ConstExample *ebpf.ProgramSpec `ebpf:"const_example"` GlobalExample *ebpf.ProgramSpec `ebpf:"global_example"` HiddenExample *ebpf.ProgramSpec `ebpf:"hidden_example"` } // variablesMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesMapSpecs struct { } // variablesVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesVariableSpecs struct { ConstU32 *ebpf.VariableSpec `ebpf:"const_u32"` GlobalU16 *ebpf.VariableSpec `ebpf:"global_u16"` } // variablesObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesObjects struct { variablesPrograms variablesMaps variablesVariables } func (o *variablesObjects) Close() error { return _VariablesClose( &o.variablesPrograms, &o.variablesMaps, ) } // variablesMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesMaps struct { } func (m *variablesMaps) Close() error { return _VariablesClose() } // variablesVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesVariables struct { ConstU32 *ebpf.Variable `ebpf:"const_u32"` GlobalU16 *ebpf.Variable `ebpf:"global_u16"` } // variablesPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesPrograms struct { ConstExample *ebpf.Program `ebpf:"const_example"` GlobalExample *ebpf.Program `ebpf:"global_example"` HiddenExample *ebpf.Program `ebpf:"hidden_example"` } func (p *variablesPrograms) Close() error { return _VariablesClose( p.ConstExample, p.GlobalExample, p.HiddenExample, ) } func _VariablesClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed variables_bpfeb.o var _VariablesBytes []byte ================================================ FILE: docs/examples/variables/variables_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build 386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadVariables returns the embedded CollectionSpec for variables. func loadVariables() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_VariablesBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load variables: %w", err) } return spec, err } // loadVariablesObjects loads variables and converts it into a struct. // // The following types are suitable as obj argument: // // *variablesObjects // *variablesPrograms // *variablesMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadVariablesObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadVariables() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // variablesSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesSpecs struct { variablesProgramSpecs variablesMapSpecs variablesVariableSpecs } // variablesProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesProgramSpecs struct { ConstExample *ebpf.ProgramSpec `ebpf:"const_example"` GlobalExample *ebpf.ProgramSpec `ebpf:"global_example"` HiddenExample *ebpf.ProgramSpec `ebpf:"hidden_example"` } // variablesMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesMapSpecs struct { } // variablesVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type variablesVariableSpecs struct { ConstU32 *ebpf.VariableSpec `ebpf:"const_u32"` GlobalU16 *ebpf.VariableSpec `ebpf:"global_u16"` } // variablesObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesObjects struct { variablesPrograms variablesMaps variablesVariables } func (o *variablesObjects) Close() error { return _VariablesClose( &o.variablesPrograms, &o.variablesMaps, ) } // variablesMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesMaps struct { } func (m *variablesMaps) Close() error { return _VariablesClose() } // variablesVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesVariables struct { ConstU32 *ebpf.Variable `ebpf:"const_u32"` GlobalU16 *ebpf.Variable `ebpf:"global_u16"` } // variablesPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadVariablesObjects or ebpf.CollectionSpec.LoadAndAssign. type variablesPrograms struct { ConstExample *ebpf.Program `ebpf:"const_example"` GlobalExample *ebpf.Program `ebpf:"global_example"` HiddenExample *ebpf.Program `ebpf:"hidden_example"` } func (p *variablesPrograms) Close() error { return _VariablesClose( p.ConstExample, p.GlobalExample, p.HiddenExample, ) } func _VariablesClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed variables_bpfel.o var _VariablesBytes []byte ================================================ FILE: docs/includes/glossary.md ================================================ *[Program]: Instructions that can be loaded and attached to one or more hooks in the Linux kernel. *[Map]: Shared piece of memory between userspace and an eBPF program loaded into the kernel. *[Link]: Connection between a Program and a hook/event in the kernel. *[BTF]: BPF Type Format; a description of all data types present in the Linux kernel an eBPF object. *[ELF]: Executable and Linkable Format, a container format used for compiled eBPF programs. *[Spec]: Unrealized blueprint of an eBPF resource, e.g. MapSpec, ProgramSpec, btf.Spec. *[CollectionSpec]: Bundle of ProgramSpecs, MapSpecs and a btf.Spec. Direct result of loading an eBPF ELF. *[VariableSpec]: Accessor for a global variable declared in an eBPF program. *[Collection]: Bundle of Maps and Programs that were loaded into the kernel. Direct result of instantiating (loading into the kernel) a CollectionSpec. *[Variable]: Accessor for a global variable declared in an eBPF program, used after loading. *[bpffs]: Birtual filesystem for 'pinning' references to eBPF resources in an familiar file hierarchy. Usually mounted at /sys/fs/bpf, but many individual instances can be mounted. *[helper]: A piece of logic provided by the kernel. Read a map value, redirect a packet, etc. *[kfunc]: An extensible evolution of the BPF helper mechanism. Can be dynamically provided by kernel modules. Not specified in UAPI. *[XDP]: eXpress Data Path, a high-performance eBPF-powered data path. Only has a receive hook. *[bpf2go]: Convenience utility to compile eBPF C using clang and generate a Go skeleton. *[libbpf]: A library for writing kernel- and user space BPF programs in C, developed by the upstream Linux project. *[qemu]: A popular virtual machine manager. *[DCO]: Developer Certificate of Origin. *[efW]: eBPF for Windows ================================================ FILE: docs/macros.py ================================================ """Macro definitions for documentation.""" # Use built-in 'list' type when upgrading to Python 3.9. import glob import os import re import textwrap from io import TextIOWrapper from typing import List from urllib.parse import ParseResult, urlparse from mkdocs_macros.plugin import MacrosPlugin def define_env(env: MacrosPlugin): """ Define the mkdocs-macros-plugin environment. This function is called on setup. 'env' can be interacted with for defining variables, macros and filters. - variables: the dictionary that contains the environment variables - macro: a decorator function, to declare a macro. - filter: a function with one or more arguments, used to perform a transformation """ # Values can be overridden in mkdocs.yml:extras. go_examples_path: str = env.variables.get( "go_examples_path", "examples/**/*.go" ) godoc_url: ParseResult = urlparse( env.variables.get( "godoc_url", "https://pkg.go.dev/github.com/cilium/ebpf" ) ) c_examples_path: str = env.variables.get("c_examples_path", "examples/**/*.c") @env.macro def godoc(sym: str, short: bool = False): """ Generate a godoc link based on the configured godoc_url. `sym` is the symbol to link to. A dot '.' separator means it's a method on another type. Forward slashes '/' can be used to navigate to symbols in subpackages. For example: - CollectionSpec.LoadAndAssign - link/Link - btf/Spec.TypeByID `short` renders only the symbol name. """ if len(godoc_url) == 0: raise ValueError("Empty godoc url") # Support referring to symbols in subpackages. subpkg = os.path.dirname(sym) # Symbol name including dots for struct methods. (e.g. Map.Get) name = os.path.basename(sym) # Python's urljoin() expects the base path to have a trailing slash for # it to correctly append subdirs. Use urlparse instead, and interact # with the URL's components individually. url = godoc_url._replace( path=os.path.join(godoc_url.path, subpkg), # Anchor token appearing after the # in the URL. fragment=name, ).geturl() text = name if short: text = text.split(".")[-1] return f"[:fontawesome-brands-golang: `{text}`]({url})" @env.macro def go_example(*args, **kwargs): """ Include the body of a Go code example. See docstring of code_example() for details. """ return code_example( *args, **kwargs, language="go", path=go_examples_path ) @env.macro def c_example(*args, **kwargs): """ Include the body of a C code example. See docstring of `code_example` for details. """ return code_example( *args, **kwargs, language="c", path=c_examples_path ) @env.macro def linux_version(version: str, why: str = ''): """ Render a badge with the Linux logo and a version number denoting the minimum kernel version needed to use a feature. Optional string to explain why the feature won't work on older versions. """ return ('' # TODO: Make the icon link to some docs about handling kernel # versions, once those are written. ':simple-linux:' f'[{version}](# "{why}")' '') def code_example( symbol: str, title: str = None, language: str = "", lines: bool = True, signature: bool = False, path: str = "", ) -> str: """ Include the body of a code example. `symbol` takes the name of the function or snippet to include. `title` is rendered as a title at the top of the snippet. `language` is the name of the programming language passed to pygments. `lines` controls rendering line numbers. `signature` controls whether or not the function signature and brackets are included. `path` specifies the include path that may contain globs. """ opts: List[str] = [] if lines: opts.append("linenums='1'") if title: opts.append(f"title='{title}'") if signature: body = full_body(path, symbol) else: body = inner_body(path, symbol) out = f"``` {language} {' '. join(opts)}\n{body}```" return out def inner_body(path: str, sym: str) -> str: """ Get the inner body of sym, using default delimiters. First and last lines (so, function signature and closing bracket) are stripped, the remaining body dedented. """ out = _search_body(path, sym) if len(out) < 2: raise ValueError( f"Need at least two lines to get inner body for symbol {sym}" ) return textwrap.dedent("".join(out[1:-1])) def full_body(path: str, sym: str) -> str: """Get the full body of sym, using default delimiters, dedented.""" out = _search_body(path, sym) return textwrap.dedent("".join(out)) def _get_body( f: TextIOWrapper, sym: str, start: str = "{", end: str = "}" ) -> List[str]: """ Extract a body of text between sym and start/end delimiters. Tailored to finding function bodies of C-family programming languages with curly braces. The starting line of the body must contain sym prefixed by a space, with 'start' appearing on the same line, for example " Foo() {". Further occurrences of "{" and its closing counterpart "}" are tracked, and the lines between and including the final "}" are returned. """ found = False stack = 0 lines = [] for line in f.readlines(): if not found: # Skip current line if we're not in a body and the current line # doesn't contain the given symbol. # # The symbol must be surrounded by non-word characters like spaces # or parentheses. For example, a line "// DocObjs {" or "func # DocLoader() {" should match. if re.search(rf"\W{sym}\W", line) is None: continue found = True # Count the amount of start delimiters. stack += line.count(start) if stack == 0: # No opening delimiter found, ignore the line. found = False continue lines.append(line) # Count the amount of end delimiters and stop if we've escaped the # current scope. stack -= line.count(end) if stack <= 0: break # Rewind the file for reuse. f.seek(0) if stack > 0: raise LookupError(f"No end delimiter for {sym}") if len(lines) == 0: raise LookupError(f"Symbol {sym} not found") return lines def _search_body(path: str, sym: str) -> List[str]: """Find the body of the given symbol in a path glob.""" files = glob.glob(path, recursive=True) if len(files) == 0: raise LookupError(f"Path {path} did not match any files") for file in files: with open(file, mode="r") as f: try: return _get_body(f, sym) except LookupError: continue raise LookupError(f"Symbol {sym} not found in any of {files}") ================================================ FILE: docs/mkdocs.yml ================================================ site_name: "ebpf-go Documentation" site_description: Pure-Go library to read, modify and load eBPF programs and attach them to various hooks in the Linux kernel. site_author: Cilium Community # Rendered in header. repo_url: https://github.com/cilium/ebpf repo_name: cilium/ebpf edit_uri: edit/main/docs/ebpf/ # Directory to look for Markdown files within docs/. docs_dir: ebpf theme: logo: ebpf-go.png favicon: ebpf-go.png name: material icon: # GitHub link in the header. repo: fontawesome/brands/github-alt # Edit button at the top of each page. edit: material/pencil-ruler features: # Display sections in the navbar. - navigation.sections # Anchor tracking, updates the address bar with the active anchor. - navigation.tracking # Use XHR instead of fully reloading the page when navigating around. - nagivation.instant # Clipboard button in code blocks. - content.code.copy # Enable annotations in code blocks. - content.code.annotate # Button to edit page on GitHub. - content.action.edit # Better (faster) tooltips, replacing the browser's rendering logic. - content.tooltips palette: # Palette toggle for light mode - media: "(prefers-color-scheme: light)" scheme: default toggle: icon: material/lightbulb-off name: Switch to dark mode # Palette toggle for dark mode - media: "(prefers-color-scheme: dark)" scheme: slate toggle: icon: material/lightbulb-on name: Switch to light mode # Template overrides. custom_dir: overrides nav: - 'Home': index.md - 'Guides': - 'Getting Started': guides/getting-started.md - 'Portable eBPF': guides/portable-ebpf.md - 'Windows support': guides/windows-support.md - 'Concepts': - 'Loading eBPF Programs': concepts/loader.md - 'Global Variables': concepts/global-variables.md - 'Resource Limits': concepts/rlimit.md - 'Section Naming': concepts/section-naming.md - 'Feature Detection': concepts/features.md - 'Object Lifecycle': concepts/object-lifecycle.md - 'Contributing': - contributing/index.md - contributing/architecture.md - contributing/new-feature.md - contributing/new-example.md - contributing/windows.md - 'Users': users.md - 'Go Reference': https://pkg.go.dev/github.com/cilium/ebpf - 'GitHub': - 'Repository': https://github.com/cilium/ebpf - 'Issue Tracker': https://github.com/cilium/ebpf/issues - 'Discussions': https://github.com/cilium/ebpf/discussions - 'About': about.md extra: social: - icon: fontawesome/brands/github link: https://github.com/cilium/ebpf extra_css: - stylesheets/extra.css watch: - examples/ - includes/ - overrides/ - macros.py plugins: - search - macros: # This opens macros.py in docs/. module_name: macros # Make the mkdocs build fail if any errors occur. # Otherwise, any errors would be rendered to the build output. on_error_fail: true include_yaml: - vars.yml # Updated/authors displayed in footer. # Layout is customized in overrides/partials/source-file.html. - git-revision-date-localized: type: timeago - git-authors: show_email_address: false authorship_threshold_percent: 10 exclude: - index.md # Enable syntax highlighting in mkdocs-material. markdown_extensions: # Automatic tooltips for abbreviations/glossary. - abbr # Setting attributes on code fences (e.g. ``` go linenums='1') - attr_list # Special content blocks like '!!! note' - admonition # Definition lists using indented descriptions - def_list - footnotes # Collapsible admonitions - pymdownx.details # Syntax highlighting in code blocks - pymdownx.highlight: anchor_linenums: true - pymdownx.inlinehilite # Glossary - pymdownx.snippets: auto_append: - includes/glossary.md # Superfences enables nested and tabbed code blocks and Mermaid support - pymdownx.superfences: custom_fences: - name: mermaid class: mermaid format: !!python/name:pymdownx.superfences.fence_code_format # Content tabs for code snippets, checklists, etc. - pymdownx.tabbed: alternate_style: true # Emoji and icons like :fontawesome-brands-golang: - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg options: custom_icons: - overrides/.icons # Table of Contents - toc: permalink: true ================================================ FILE: docs/overrides/partials/source-file.html ================================================
{% if page.meta.git_revision_date_localized %} Last updated {{ page.meta.git_revision_date_localized }} {% endif %}
{% if git_page_authors %} Authored by {{ git_page_authors }} {% endif %}
================================================ FILE: docs/vars.yml ================================================ # Variables accessible in documentation using e.g. '{{ proj }}'. extra: proj: "`ebpf-go`" ================================================ FILE: elf_reader.go ================================================ package ebpf import ( "bufio" "bytes" "debug/elf" "encoding/binary" "errors" "fmt" "io" "iter" "maps" "math" "os" "slices" "strings" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) type kconfigMetaKey struct{} type kconfigMeta struct { Map *MapSpec Offset uint32 } type kfuncMetaKey struct{} type kfuncMeta struct { Binding elf.SymBind Func *btf.Func } type ksymMetaKey struct{} type ksymMeta struct { Binding elf.SymBind Name string } // elfCode is a convenience to reduce the amount of arguments that have to // be passed around explicitly. You should treat its contents as immutable. type elfCode struct { *internal.SafeELFFile sections map[elf.SectionIndex]*elfSection license string version uint32 btf *btf.Spec extInfo *btf.ExtInfos maps map[string]*MapSpec vars map[string]*VariableSpec kfuncs map[string]*btf.Func ksyms map[string]struct{} kconfig *MapSpec } // LoadCollectionSpec parses an ELF file into a CollectionSpec. func LoadCollectionSpec(file string) (*CollectionSpec, error) { f, err := os.Open(file) if err != nil { return nil, err } defer f.Close() spec, err := LoadCollectionSpecFromReader(f) if err != nil { return nil, fmt.Errorf("file %s: %w", file, err) } return spec, nil } // LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec. func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { f, err := internal.NewSafeELFFile(rd) if err != nil { return nil, err } // Checks if the ELF file is for BPF data. // Old LLVM versions set e_machine to EM_NONE. if f.Machine != elf.EM_NONE && f.Machine != elf.EM_BPF { return nil, fmt.Errorf("unexpected machine type for BPF ELF: %s", f.Machine) } var ( licenseSection *elf.Section versionSection *elf.Section sections = make(map[elf.SectionIndex]*elfSection) relSections = make(map[elf.SectionIndex]*elf.Section) ) // This is the target of relocations generated by inline assembly. sections[elf.SHN_UNDEF] = newElfSection(new(elf.Section), undefSection) // Collect all the sections we're interested in. This includes relocations // which we parse later. // // Keep the documentation at docs/ebpf/loading/elf-sections.md up-to-date. for i, sec := range f.Sections { idx := elf.SectionIndex(i) switch { case strings.HasPrefix(sec.Name, "license"): licenseSection = sec case strings.HasPrefix(sec.Name, "version"): versionSection = sec case strings.HasPrefix(sec.Name, "maps"): sections[idx] = newElfSection(sec, mapSection) case sec.Name == ".maps": sections[idx] = newElfSection(sec, btfMapSection) case isDataSection(sec.Name): sections[idx] = newElfSection(sec, dataSection) case sec.Type == elf.SHT_REL: // Store relocations under the section index of the target relSections[elf.SectionIndex(sec.Info)] = sec case sec.Type == elf.SHT_PROGBITS && sec.Size > 0: if (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0 { sections[idx] = newElfSection(sec, programSection) } else if sec.Name == structOpsLinkSec { // classification based on sec names so that struct_ops-specific // sections (.struct_ops.link) is correctly recognized // as non-executable PROGBITS, allowing value placement and link metadata to be loaded. sections[idx] = newElfSection(sec, structOpsSection) } else if sec.Name == structOpsSec { return nil, fmt.Errorf("section %q: got '.struct_ops' section: %w", sec.Name, ErrNotSupported) } } } license, err := loadLicense(licenseSection) if err != nil { return nil, fmt.Errorf("load license: %w", err) } version, err := loadVersion(versionSection, f.ByteOrder) if err != nil { return nil, fmt.Errorf("load version: %w", err) } btfSpec, btfExtInfo, err := btf.LoadSpecAndExtInfosFromReader(rd) if err != nil && !errors.Is(err, btf.ErrNotFound) { return nil, fmt.Errorf("load BTF: %w", err) } ec := &elfCode{ SafeELFFile: f, sections: sections, license: license, version: version, btf: btfSpec, extInfo: btfExtInfo, maps: make(map[string]*MapSpec), vars: make(map[string]*VariableSpec), kfuncs: make(map[string]*btf.Func), ksyms: make(map[string]struct{}), } symbols, err := f.Symbols() if err != nil { return nil, fmt.Errorf("load symbols: %v", err) } ec.assignSymbols(symbols) if err := ec.loadRelocations(relSections, symbols); err != nil { return nil, fmt.Errorf("load relocations: %w", err) } if err := ec.loadMaps(); err != nil { return nil, fmt.Errorf("load maps: %w", err) } if err := ec.loadBTFMaps(); err != nil { return nil, fmt.Errorf("load BTF maps: %w", err) } if err := ec.loadDataSections(); err != nil { return nil, fmt.Errorf("load data sections: %w", err) } if err := ec.loadKconfigSection(); err != nil { return nil, fmt.Errorf("load virtual .kconfig section: %w", err) } if err := ec.loadKsymsSection(); err != nil { return nil, fmt.Errorf("load virtual .ksyms section: %w", err) } // Finally, collect programs and link them. progs, err := ec.loadProgramSections() if err != nil { return nil, fmt.Errorf("load programs: %w", err) } // assiociate members in structs with ProgramSpecs using relo if err := ec.associateStructOpsRelocs(progs); err != nil { return nil, fmt.Errorf("load struct_ops: %w", err) } return &CollectionSpec{ ec.maps, progs, ec.vars, btfSpec, ec.ByteOrder, }, nil } func loadLicense(sec *elf.Section) (string, error) { if sec == nil { return "", nil } data, err := sec.Data() if err != nil { return "", fmt.Errorf("section %s: %v", sec.Name, err) } return string(bytes.TrimRight(data, "\000")), nil } func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) { if sec == nil { return 0, nil } var version uint32 if err := binary.Read(sec.Open(), bo, &version); err != nil { return 0, fmt.Errorf("section %s: %v", sec.Name, err) } return version, nil } func isDataSection(name string) bool { return name == ".bss" || strings.HasPrefix(name, ".data") || strings.HasPrefix(name, ".rodata") } func isConstantDataSection(name string) bool { return strings.HasPrefix(name, ".rodata") } func isKconfigSection(name string) bool { return name == ".kconfig" } type elfSectionKind int const ( undefSection elfSectionKind = iota mapSection btfMapSection programSection dataSection structOpsSection ) type elfSection struct { *elf.Section kind elfSectionKind // Offset from the start of the section to a symbol symbols map[uint64]elf.Symbol // Offset from the start of the section to a relocation, which points at // a symbol in another section. relocations map[uint64]elf.Symbol // The number of relocations pointing at this section. references int } func newElfSection(section *elf.Section, kind elfSectionKind) *elfSection { return &elfSection{ section, kind, make(map[uint64]elf.Symbol), make(map[uint64]elf.Symbol), 0, } } // symbolsSorted returns the section's symbols sorted by offset. func (es *elfSection) symbolsSorted() iter.Seq2[uint64, elf.Symbol] { return func(yield func(uint64, elf.Symbol) bool) { for _, off := range slices.Sorted(maps.Keys(es.symbols)) { if !yield(off, es.symbols[off]) { return } } } } // assignSymbols takes a list of symbols and assigns them to their // respective sections, indexed by name. func (ec *elfCode) assignSymbols(symbols []elf.Symbol) { for _, symbol := range symbols { symType := elf.ST_TYPE(symbol.Info) symSection := ec.sections[symbol.Section] if symSection == nil { continue } // Anonymous symbols only occur in debug sections which we don't process // relocations for. Anonymous symbols are not referenced from other sections. if symbol.Name == "" { continue } // Older versions of LLVM don't tag symbols correctly, so keep // all NOTYPE ones. switch symSection.kind { case mapSection, btfMapSection, dataSection: if symType != elf.STT_NOTYPE && symType != elf.STT_OBJECT { continue } case programSection: if symType != elf.STT_NOTYPE && symType != elf.STT_FUNC { continue } // Program sections may contain NOTYPE symbols with local scope, these are // usually labels for jumps. We do not care for these for the purposes of // linking and they may overlap with function symbols. if symType == elf.STT_NOTYPE && elf.ST_BIND(symbol.Info) == elf.STB_LOCAL { continue } // Only collect symbols that occur in program/maps/data sections. default: continue } symSection.symbols[symbol.Value] = symbol } } // loadRelocations iterates .rel* sections and extracts relocation entries for // sections of interest. Makes sure relocations point at valid sections. func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section, symbols []elf.Symbol) error { for idx, relSection := range relSections { section := ec.sections[idx] if section == nil { continue } rels, err := ec.loadSectionRelocations(relSection, symbols) if err != nil { return fmt.Errorf("relocation for section %q: %w", section.Name, err) } for _, rel := range rels { target := ec.sections[rel.Section] if target == nil { return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported) } target.references++ } section.relocations = rels } return nil } // loadProgramSections iterates ec's sections and emits a ProgramSpec // for each function it finds. // // The resulting map is indexed by function name. func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) { progs := make(map[string]*ProgramSpec) // Generate a ProgramSpec for each function found in each program section. var export []string for _, sec := range ec.sections { if sec.kind != programSection { continue } if len(sec.symbols) == 0 { return nil, fmt.Errorf("section %v: missing symbols", sec.Name) } funcs, err := ec.loadFunctions(sec) if err != nil { return nil, fmt.Errorf("section %v: %w", sec.Name, err) } progType, attachType, progFlags, attachTo := getProgType(sec.Name) for name, insns := range funcs { spec := &ProgramSpec{ Name: name, Type: progType, Flags: progFlags, AttachType: attachType, AttachTo: attachTo, SectionName: sec.Name, License: ec.license, KernelVersion: ec.version, Instructions: insns, ByteOrder: ec.ByteOrder, } // Function names must be unique within a single ELF blob. if progs[name] != nil { return nil, fmt.Errorf("duplicate program name %s", name) } progs[name] = spec if spec.SectionName != ".text" { export = append(export, name) } } } flattenPrograms(progs, export) // Hide programs (e.g. library functions) that were not explicitly emitted // to an ELF section. These could be exposed in a separate CollectionSpec // field later to allow them to be modified. for n, p := range progs { if p.SectionName == ".text" { delete(progs, n) } } return progs, nil } // loadFunctions extracts instruction streams from the given program section // starting at each symbol in the section. The section's symbols must already // be narrowed down to STT_NOTYPE (emitted by clang <8) or STT_FUNC. // // The resulting map is indexed by function name. func (ec *elfCode) loadFunctions(sec *elfSection) (map[string]asm.Instructions, error) { progs := make(map[string]asm.Instructions) // Pull out ExtInfos once per section to avoid map lookups on every // instruction. fo, lo, ro := ec.extInfo.Section(sec.Name) // Raw instruction count since start of the section. ExtInfos point at raw // insn offsets and ignore the gaps between symbols in case of linked objects. // We need to count them, we can't obtain this info by any other means. var raw asm.RawInstructionOffset // Sort symbols by offset so we can track instructions by their raw offsets. for _, sym := range sec.symbolsSorted() { if progs[sym.Name] != nil { return nil, fmt.Errorf("duplicate symbol %s in section %s", sym.Name, sec.Name) } // Decode the symbol's instruction stream, limited to its size. sr := internal.NewBufferedSectionReader(sec, int64(sym.Value), int64(sym.Size)) insns := make(asm.Instructions, 0, sym.Size/asm.InstructionSize) insns, err := asm.AppendInstructions(insns, sr, ec.ByteOrder, platform.Linux) if err != nil { return nil, fmt.Errorf("decoding instructions for symbol %s in section %s: %w", sym.Name, sec.Name, err) } if len(insns) == 0 { return nil, fmt.Errorf("no instructions found for symbol %s in section %s", sym.Name, sec.Name) } // Mark the first instruction as the start of a function. insns[0] = insns[0].WithSymbol(sym.Name) iter := insns.Iterate() for iter.Next() { // Global byte offset of the instruction within the ELF section. offset := sym.Value + iter.Offset.Bytes() // Apply any relocations for the current instruction. If no relocation is // present, resolve any section-relative function calls. if rel, ok := sec.relocations[offset]; ok { if err := ec.relocateInstruction(iter.Ins, rel); err != nil { return nil, fmt.Errorf("offset %d in section %s: relocating instruction: %w", offset, sec.Name, err) } } else { if err := referenceRelativeJump(iter.Ins, offset, sec.symbols); err != nil { return nil, fmt.Errorf("offset %d in section %s: resolving relative jump: %w", offset, sec.Name, err) } } assignMetadata(iter.Ins, raw, &fo, &lo, &ro) raw += iter.Ins.Width() } // Emit the program's instructions. progs[sym.Name] = insns } return progs, nil } // take pops and returns the first item in q if it matches the given predicate // f. Otherwise, it returns nil. func take[T any](q *[]T, f func(T) bool) *T { if q == nil || len(*q) == 0 { return nil } out := (*q)[0] if f(out) { *q = (*q)[1:] return &out } return nil } // Tag the instruction with any ExtInfo metadata that's pointing at the given // raw instruction. func assignMetadata(ins *asm.Instruction, raw asm.RawInstructionOffset, fo *btf.FuncOffsets, lo *btf.LineOffsets, ro *btf.CORERelocationOffsets) { if f := take(fo, func(f btf.FuncOffset) bool { return f.Offset == raw }); f != nil { *ins = btf.WithFuncMetadata(*ins, f.Func) } if l := take(lo, func(l btf.LineOffset) bool { return l.Offset == raw }); l != nil { *ins = ins.WithSource(l.Line) } if r := take(ro, func(r btf.CORERelocationOffset) bool { return r.Offset == raw }); r != nil { *ins = btf.WithCORERelocationMetadata(*ins, r.Relo) } } // referenceRelativeJump turns a relative jump to another bpf subprogram within // the same ELF section into a Reference Instruction. // // Up to LLVM 9, calls to subprograms within the same ELF section are sometimes // encoded using relative jumps instead of relocation entries. These jumps go // out of bounds of the current program, so their targets must be memoized // before the section's instruction stream is split. // // The relative jump Constant is blinded to -1 and the target Symbol is set as // the Instruction's Reference so it can be resolved by the linker. func referenceRelativeJump(ins *asm.Instruction, offset uint64, symbols map[uint64]elf.Symbol) error { if !ins.IsFunctionReference() || ins.Constant == -1 { return nil } tgt := jumpTarget(offset, *ins) sym := symbols[tgt].Name if sym == "" { return fmt.Errorf("no jump target found at offset %d", tgt) } *ins = ins.WithReference(sym) ins.Constant = -1 return nil } // jumpTarget takes ins' offset within an instruction stream (in bytes) // and returns its absolute jump destination (in bytes) within the // instruction stream. func jumpTarget(offset uint64, ins asm.Instruction) uint64 { // A relative jump instruction describes the amount of raw BPF instructions // to jump, convert the offset into bytes. dest := ins.Constant * asm.InstructionSize // The starting point of the jump is the end of the current instruction. dest += int64(offset + asm.InstructionSize) if dest < 0 { return 0 } return uint64(dest) } var errUnsupportedBinding = errors.New("unsupported binding") func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error { var ( typ = elf.ST_TYPE(rel.Info) bind = elf.ST_BIND(rel.Info) name = rel.Name ) target := ec.sections[rel.Section] switch target.kind { case mapSection, btfMapSection: if bind == elf.STB_LOCAL { return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name) } if bind != elf.STB_GLOBAL { return fmt.Errorf("map %q: %w: %s", name, errUnsupportedBinding, bind) } if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE { // STT_NOTYPE is generated on clang < 8 which doesn't tag // relocations appropriately. return fmt.Errorf("map load: incorrect relocation type %v", typ) } ins.Src = asm.PseudoMapFD case dataSection: var offset uint32 switch typ { case elf.STT_SECTION: if bind != elf.STB_LOCAL { return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } // This is really a reference to a static symbol, which clang doesn't // emit a symbol table entry for. Instead it encodes the offset in // the instruction itself. offset = uint32(uint64(ins.Constant)) case elf.STT_OBJECT: // LLVM 9 emits OBJECT-LOCAL symbols for anonymous constants. if bind != elf.STB_GLOBAL && bind != elf.STB_LOCAL && bind != elf.STB_WEAK { return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } offset = uint32(rel.Value) case elf.STT_NOTYPE: // LLVM 7 emits NOTYPE-LOCAL symbols for anonymous constants. if bind != elf.STB_LOCAL { return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } offset = uint32(rel.Value) default: return fmt.Errorf("incorrect relocation type %v for direct map load", typ) } // We rely on using the name of the data section as the reference. It // would be nicer to keep the real name in case of an STT_OBJECT, but // it's not clear how to encode that into Instruction. name = target.Name // The kernel expects the offset in the second basic BPF instruction. ins.Constant = int64(uint64(offset) << 32) ins.Src = asm.PseudoMapValue case programSection: switch opCode := ins.OpCode; { case opCode.JumpOp() == asm.Call: if ins.Src != asm.PseudoCall { return fmt.Errorf("call: %s: incorrect source register", name) } switch typ { case elf.STT_NOTYPE, elf.STT_FUNC: if bind != elf.STB_GLOBAL && bind != elf.STB_WEAK { return fmt.Errorf("call: %s: %w: %s", name, errUnsupportedBinding, bind) } case elf.STT_SECTION: if bind != elf.STB_LOCAL { return fmt.Errorf("call: %s: %w: %s", name, errUnsupportedBinding, bind) } // The function we want to call is in the indicated section, // at the offset encoded in the instruction itself. Reverse // the calculation to find the real function we're looking for. // A value of -1 references the first instruction in the section. offset := int64(int32(ins.Constant)+1) * asm.InstructionSize sym, ok := target.symbols[uint64(offset)] if !ok { return fmt.Errorf("call: no symbol at offset %d", offset) } name = sym.Name ins.Constant = -1 default: return fmt.Errorf("call: %s: invalid symbol type %s", name, typ) } case opCode.IsDWordLoad(): switch typ { case elf.STT_FUNC: if bind != elf.STB_GLOBAL { return fmt.Errorf("load: %s: %w: %s", name, errUnsupportedBinding, bind) } case elf.STT_SECTION: if bind != elf.STB_LOCAL { return fmt.Errorf("load: %s: %w: %s", name, errUnsupportedBinding, bind) } // ins.Constant already contains the offset in bytes from the // start of the section. This is different than a call to a // static function. default: return fmt.Errorf("load: %s: invalid symbol type %s", name, typ) } sym, ok := target.symbols[uint64(ins.Constant)] if !ok { return fmt.Errorf("load: no symbol at offset %d", ins.Constant) } name = sym.Name ins.Constant = -1 ins.Src = asm.PseudoFunc default: return fmt.Errorf("neither a call nor a load instruction: %v", ins) } // The Undefined section is used for 'virtual' symbols that aren't backed by // an ELF section. This includes symbol references from inline asm, forward // function declarations, as well as extern kfunc declarations using __ksym // and extern kconfig variables declared using __kconfig. case undefSection: if bind != elf.STB_GLOBAL && bind != elf.STB_WEAK { return fmt.Errorf("asm relocation: %s: %w: %s", name, errUnsupportedBinding, bind) } if typ != elf.STT_NOTYPE { return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ) } kf := ec.kfuncs[name] _, ks := ec.ksyms[name] switch { // If a Call / DWordLoad instruction is found and the datasec has a btf.Func with a Name // that matches the symbol name we mark the instruction as a referencing a kfunc. case kf != nil && ins.OpCode.JumpOp() == asm.Call: ins.Metadata.Set(kfuncMetaKey{}, &kfuncMeta{ Func: kf, Binding: bind, }) ins.Src = asm.PseudoKfuncCall ins.Constant = -1 case kf != nil && ins.OpCode.IsDWordLoad(): ins.Metadata.Set(kfuncMetaKey{}, &kfuncMeta{ Func: kf, Binding: bind, }) ins.Constant = 0 case ks && ins.OpCode.IsDWordLoad(): if bind != elf.STB_GLOBAL && bind != elf.STB_WEAK { return fmt.Errorf("asm relocation: %s: %w: %s", name, errUnsupportedBinding, bind) } ins.Metadata.Set(ksymMetaKey{}, &ksymMeta{ Binding: bind, Name: name, }) // If no kconfig map is found, this must be a symbol reference from inline // asm (see testdata/loader.c:asm_relocation()) or a call to a forward // function declaration (see testdata/fwd_decl.c). Don't interfere, These // remain standard symbol references. // extern __kconfig reads are represented as dword loads that need to be // rewritten to pseudo map loads from .kconfig. If the map is present, // require it to contain the symbol to disambiguate between inline asm // relos and kconfigs. case ec.kconfig != nil && ins.OpCode.IsDWordLoad(): if bind != elf.STB_GLOBAL { return fmt.Errorf("asm relocation: %s: %w: %s", name, errUnsupportedBinding, bind) } for _, vsi := range ec.kconfig.Value.(*btf.Datasec).Vars { if vsi.Type.(*btf.Var).Name != rel.Name { continue } ins.Src = asm.PseudoMapValue ins.Metadata.Set(kconfigMetaKey{}, &kconfigMeta{ec.kconfig, vsi.Offset}) return nil } return fmt.Errorf("kconfig %s not found in .kconfig", rel.Name) } default: return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported) } *ins = ins.WithReference(name) return nil } // loadMaps iterates over all ELF sections marked as map sections (like .maps) // and parses each symbol into a MapSpec. func (ec *elfCode) loadMaps() error { for _, sec := range ec.sections { if sec.kind != mapSection { continue } if len(sec.symbols) == 0 { return fmt.Errorf("section %v: no symbols", sec.Name) } vars, err := ec.sectionVars(ec.btf, sec.Name) if err != nil { return fmt.Errorf("section %v: loading map variable BTF: %w", sec.Name, err) } for _, sym := range sec.symbols { name := sym.Name if ec.maps[name] != nil { return fmt.Errorf("duplicate symbol %s in section %s", name, sec.Name) } sr := internal.NewBufferedSectionReader(sec, int64(sym.Value), int64(sym.Size)) spec := MapSpec{ Name: sanitizeName(name, -1), } switch { case binary.Read(sr, ec.ByteOrder, &spec.Type) != nil: return fmt.Errorf("map %s: missing type", name) case binary.Read(sr, ec.ByteOrder, &spec.KeySize) != nil: return fmt.Errorf("map %s: missing key size", name) case binary.Read(sr, ec.ByteOrder, &spec.ValueSize) != nil: return fmt.Errorf("map %s: missing value size", name) case binary.Read(sr, ec.ByteOrder, &spec.MaxEntries) != nil: return fmt.Errorf("map %s: missing max entries", name) case binary.Read(sr, ec.ByteOrder, &spec.Flags) != nil: return fmt.Errorf("map %s: missing flags", name) } extra, err := io.ReadAll(sr) if err != nil { return fmt.Errorf("map %s: reading map tail: %w", name, err) } if len(extra) > 0 { spec.Extra = bytes.NewReader(extra) } if v, ok := vars[name]; ok { spec.Tags = slices.Clone(v.Tags) } ec.maps[name] = &spec } } return nil } // sectionVars looks up the BTF Datasec for the given section name and returns a // map of variable names to their btf.Var definitions. func (ec *elfCode) sectionVars(spec *btf.Spec, sec string) (map[string]*btf.Var, error) { vars := make(map[string]*btf.Var) if spec == nil { return vars, nil } var ds *btf.Datasec if err := ec.btf.TypeByName(sec, &ds); err != nil { return vars, nil } for _, vsi := range ds.Vars { v, ok := btf.As[*btf.Var](vsi.Type) if !ok { return nil, fmt.Errorf("btf.VarSecInfo doesn't point to a *btf.Var: %T", vsi.Type) } vars[string(v.Name)] = v } return vars, nil } // loadBTFMaps iterates over all ELF sections marked as BTF map sections // (like .maps) and parses them into MapSpecs. Dump the .maps section and // any relocations with `readelf -x .maps -r `. func (ec *elfCode) loadBTFMaps() error { for _, sec := range ec.sections { if sec.kind != btfMapSection { continue } if ec.btf == nil { return fmt.Errorf("missing BTF") } vars, err := ec.sectionVars(ec.btf, sec.Name) if err != nil { return fmt.Errorf("section %v: loading map variable BTF: %w", sec.Name, err) } if len(vars) != len(sec.symbols) { return fmt.Errorf("section %v: contains %d symbols but %d btf.Vars", sec.Name, len(sec.symbols), len(vars)) } syms := make(map[string]elf.Symbol) for _, sym := range sec.symbols { syms[sym.Name] = sym } for _, v := range vars { name := v.Name // Find the ELF symbol corresponding to this Var. sym, ok := syms[name] if !ok { return fmt.Errorf("section %v: missing symbol for map %s", sec.Name, name) } sr := internal.NewBufferedSectionReader(sec, int64(sym.Value), int64(sym.Size)) // The BTF metadata for each Var contains the full length of the map // declaration, so read the corresponding amount of bytes from the ELF. // This way, we can pinpoint which map declaration contains unexpected // (and therefore unsupported) data. if _, err = io.Copy(internal.DiscardZeroes{}, sr); err != nil { return fmt.Errorf("section %v: map %s: initializing BTF map definitions: %w", sec.Name, name, internal.ErrNotSupported) } if ec.maps[name] != nil { return fmt.Errorf("section %v: map %s already exists", sec.Name, name) } // Each Var representing a BTF map definition contains a Struct. mapStruct, ok := btf.UnderlyingType(v.Type).(*btf.Struct) if !ok { return fmt.Errorf("expected struct, got %s", v.Type) } spec, err := mapSpecFromBTF(sec, sym, v, mapStruct, ec.btf, name, false) if err != nil { return fmt.Errorf("map %v: %w", name, err) } ec.maps[name] = spec } } return nil } // mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing // a BTF map definition. The name and spec arguments will be copied to the // resulting MapSpec, and inner must be true on any recursive invocations. func mapSpecFromBTF(es *elfSection, sym elf.Symbol, v *btf.Var, def *btf.Struct, spec *btf.Spec, name string, inner bool) (*MapSpec, error) { var ( key, value btf.Type keySize, valueSize uint64 mapType MapType flags, maxEntries uint64 pinType PinType mapExtra uint64 innerMapSpec *MapSpec contents []MapKV err error ) for i, member := range def.Members { switch member.Name { case "type": mt, err := uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get type: %w", err) } mapType = MapType(mt) case "map_flags": flags, err = uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get BTF map flags: %w", err) } case "max_entries": maxEntries, err = uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get BTF map max entries: %w", err) } case "key": if keySize != 0 { return nil, errors.New("both key and key_size given") } pk, ok := member.Type.(*btf.Pointer) if !ok { return nil, fmt.Errorf("key type is not a pointer: %T", member.Type) } key = pk.Target size, err := btf.Sizeof(pk.Target) if err != nil { return nil, fmt.Errorf("can't get size of BTF key: %w", err) } keySize = uint64(size) case "value": if valueSize != 0 { return nil, errors.New("both value and value_size given") } vk, ok := member.Type.(*btf.Pointer) if !ok { return nil, fmt.Errorf("value type is not a pointer: %T", member.Type) } value = vk.Target size, err := btf.Sizeof(vk.Target) if err != nil { return nil, fmt.Errorf("can't get size of BTF value: %w", err) } valueSize = uint64(size) case "key_size": // Key needs to be nil and keySize needs to be 0 for key_size to be // considered a valid member. if key != nil || keySize != 0 { return nil, errors.New("both key and key_size given") } keySize, err = uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get BTF key size: %w", err) } case "value_size": // Value needs to be nil and valueSize needs to be 0 for value_size to be // considered a valid member. if value != nil || valueSize != 0 { return nil, errors.New("both value and value_size given") } valueSize, err = uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get BTF value size: %w", err) } case "pinning": if inner { return nil, errors.New("inner maps can't be pinned") } pinning, err := uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("can't get pinning: %w", err) } pinType = PinType(pinning) case "values": // The 'values' field in BTF map definitions is used for declaring map // value types that are references to other BPF objects, like other maps // or programs. It is always expected to be an array of pointers. if i != len(def.Members)-1 { return nil, errors.New("'values' must be the last member in a BTF map definition") } if valueSize != 0 && valueSize != 4 { return nil, errors.New("value_size must be 0 or 4") } valueSize = 4 valueType, err := resolveBTFArrayMacro(member.Type) if err != nil { return nil, fmt.Errorf("can't resolve type of member 'values': %w", err) } switch t := valueType.(type) { case *btf.Struct: // The values member pointing to an array of structs means we're expecting // a map-in-map declaration. if mapType != ArrayOfMaps && mapType != HashOfMaps { return nil, errors.New("outer map needs to be an array or a hash of maps") } if inner { return nil, fmt.Errorf("nested inner maps are not supported") } // This inner map spec is used as a map template, but it needs to be // created as a traditional map before it can be used to do so. // libbpf names the inner map template '.inner', but we // opted for _inner to simplify validation logic. (dots only supported // on kernels 5.2 and up) // Pass the BTF spec from the parent object, since both parent and // child must be created from the same BTF blob (on kernels that support BTF). innerMapSpec, err = mapSpecFromBTF(es, sym, v, t, spec, name+"_inner", true) if err != nil { return nil, fmt.Errorf("can't parse BTF map definition of inner map: %w", err) } case *btf.FuncProto: // The values member contains an array of function pointers, meaning an // autopopulated PROG_ARRAY. if mapType != ProgramArray { return nil, errors.New("map needs to be a program array") } default: return nil, fmt.Errorf("unsupported value type %q in 'values' field", t) } contents, err = resolveBTFValuesContents(es, sym, member) if err != nil { return nil, fmt.Errorf("resolving values contents: %w", err) } case "map_extra": mapExtra, err = uintFromBTF(member.Type) if err != nil { return nil, fmt.Errorf("resolving map_extra: %w", err) } default: return nil, fmt.Errorf("unrecognized field %s in BTF map definition", member.Name) } } // Some maps don't support value sizes, but annotating their map definitions // with __type macros can still be useful, especially to let bpf2go generate // type definitions for them. if value != nil && !mapType.canHaveValueSize() { valueSize = 0 } return &MapSpec{ Name: sanitizeName(name, -1), Type: MapType(mapType), KeySize: uint32(keySize), ValueSize: uint32(valueSize), MaxEntries: uint32(maxEntries), Flags: uint32(flags), Key: key, Value: value, Pinning: pinType, InnerMap: innerMapSpec, Contents: contents, Tags: slices.Clone(v.Tags), MapExtra: mapExtra, }, nil } // uintFromBTF resolves the __uint and __ulong macros. // // __uint emits a pointer to a sized array. For int (*foo)[10], this function // will return 10. // // __ulong emits an enum with a single value that can represent a 64-bit // integer. The first (and only) enum value is returned. func uintFromBTF(typ btf.Type) (uint64, error) { switch t := typ.(type) { case *btf.Pointer: arr, ok := t.Target.(*btf.Array) if !ok { return 0, fmt.Errorf("not a pointer to array: %v", typ) } return uint64(arr.Nelems), nil case *btf.Enum: if len(t.Values) == 0 { return 0, errors.New("enum has no values") } return t.Values[0].Value, nil default: return 0, fmt.Errorf("not a pointer or enum: %v", typ) } } // resolveBTFArrayMacro resolves the __array macro, which declares an array // of pointers to a given type. This function returns the target Type of // the pointers in the array. func resolveBTFArrayMacro(typ btf.Type) (btf.Type, error) { arr, ok := typ.(*btf.Array) if !ok { return nil, fmt.Errorf("not an array: %v", typ) } ptr, ok := arr.Type.(*btf.Pointer) if !ok { return nil, fmt.Errorf("not an array of pointers: %v", typ) } return ptr.Target, nil } // valuesRelocations returns an iterator over the relocations in the ELF section // corresponding to the elements of a .values array in a BTF map definition. Each // iteration yields the array index and the symbol referenced by the relocation // at that index. Empty indices are skipped. func valuesRelocations(es *elfSection, sym elf.Symbol, member btf.Member) iter.Seq2[uint32, elf.Symbol] { // The elements of a .values pointer array are not encoded in BTF itself. // Instead, each array index receives a relocation pointing at a symbol // (map/prog) in another section. However, it's possible to leave certain // array indices empty, so all indices' offsets need to be checked for emitted // relocations. // Absolute offset of the .values member within the section. start := sym.Value + uint64(member.Offset.Bytes()) // .values is a variable-length struct member, so its contents run until the // end of the symbol. The symbol offset + size is the absolute offset of the // end of the array in the section. end := sym.Value + sym.Size // The size of an address in this section. This determines the width of an // index in the array. align := es.Addralign // Amount of elements in the .values array. elems := (end - start) / align return func(yield func(uint32, elf.Symbol) bool) { for i := range uint32(elems) { // off increases by align on each iteration, starting at .values. off := start + (uint64(i) * align) r, ok := es.relocations[off] if !ok { continue } if !yield(i, r) { return } } } } // resolveBTFValuesContents looks up the symbols referenced by the relocations // in a .values array and returns them as MapKV pairs, where the key is the // array index and the value is the symbol name. Empty indices are skipped. func resolveBTFValuesContents(es *elfSection, sym elf.Symbol, member btf.Member) ([]MapKV, error) { var contents []MapKV if member.Offset.Bytes() > uint32(sym.Size) { return nil, fmt.Errorf("member offset %d exceeds symbol size %d", member.Offset.Bytes(), sym.Size) } for i, sym := range valuesRelocations(es, sym, member) { // Emit a value stub based on the type of relocation to be replaced by a // real fd later in the pipeline before populating the Map. switch t := elf.ST_TYPE(sym.Info); t { case elf.STT_FUNC: contents = append(contents, MapKV{i, sym.Name}) case elf.STT_OBJECT: contents = append(contents, MapKV{i, sym.Name}) default: return nil, fmt.Errorf("unknown relocation type %v for symbol %s", t, sym.Name) } } return contents, nil } func (ec *elfCode) loadDataSections() error { for _, sec := range ec.sections { if sec.kind != dataSection { continue } // If a section has no references, it will be freed as soon as the // Collection closes, so creating and populating it is wasteful. If it has // no symbols, it is likely an ephemeral section used during compilation // that wasn't sanitized by the bpf linker. (like .rodata.str1.1) // // No symbols means no VariableSpecs can be generated from it, making it // pointless to emit a data section for. if sec.references == 0 && len(sec.symbols) == 0 { continue } if sec.Size > math.MaxUint32 { return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name) } mapSpec := &MapSpec{ Name: sanitizeName(sec.Name, -1), Type: Array, KeySize: 4, ValueSize: uint32(sec.Size), MaxEntries: 1, } if isConstantDataSection(sec.Name) { mapSpec.Flags = sys.BPF_F_RDONLY_PROG } var data []byte switch sec.Type { // Only open the section if we know there's actual data to be read. case elf.SHT_PROGBITS: var err error data, err = sec.Data() if err != nil { return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err) } case elf.SHT_NOBITS: // NOBITS sections like .bss contain only zeroes and are not allocated in // the ELF. Since data sections are Arrays, the kernel can preallocate // them. Don't attempt reading zeroes from the ELF, instead allocate the // zeroed memory to support getting and setting VariableSpecs for sections // like .bss. data = make([]byte, sec.Size) default: return fmt.Errorf("data section %s: unknown section type %s", sec.Name, sec.Type) } mapSpec.Contents = []MapKV{{uint32(0), data}} for off, sym := range sec.symbols { // Skip symbols marked with the 'hidden' attribute. if elf.ST_VISIBILITY(sym.Other) == elf.STV_HIDDEN || elf.ST_VISIBILITY(sym.Other) == elf.STV_INTERNAL { continue } // Only accept symbols with global or weak bindings. The common // alternative is STB_LOCAL, which are either function-scoped or declared // 'static'. if elf.ST_BIND(sym.Info) != elf.STB_GLOBAL && elf.ST_BIND(sym.Info) != elf.STB_WEAK { continue } if ec.vars[sym.Name] != nil { return fmt.Errorf("data section %s: duplicate variable %s", sec.Name, sym.Name) } // Skip symbols starting with a dot, they are compiler-internal symbols // emitted by clang 11 and earlier and are not cleaned up by the bpf // compiler backend (e.g. symbols named .Lconstinit.1 in sections like // .rodata.cst32). Variables in C cannot start with a dot, so filter these // out. if strings.HasPrefix(sym.Name, ".") { continue } if off+sym.Size > uint64(len(data)) { return fmt.Errorf("data section %s: variable %s exceeds section bounds", sec.Name, sym.Name) } if off > math.MaxUint32 { return fmt.Errorf("data section %s: variable %s offset %d exceeds maximum", sec.Name, sym.Name, off) } ec.vars[sym.Name] = &VariableSpec{ SectionName: sec.Name, Name: sym.Name, Offset: uint32(off), Value: slices.Clone(data[off : off+sym.Size]), } } // It is possible for a data section to exist without a corresponding BTF Datasec // if it only contains anonymous values like macro-defined arrays. if ec.btf != nil { var ds *btf.Datasec if ec.btf.TypeByName(sec.Name, &ds) == nil { // Assign the spec's key and BTF only if the Datasec lookup was successful. mapSpec.Key = &btf.Void{} mapSpec.Value = ds // Populate VariableSpecs with type information, if available. for _, v := range ds.Vars { name := v.Type.TypeName() if name == "" { return fmt.Errorf("data section %s: anonymous variable %v", sec.Name, v) } vt, ok := v.Type.(*btf.Var) if !ok { return fmt.Errorf("data section %s: unexpected type %T for variable %s", sec.Name, v.Type, name) } ev := ec.vars[name] if ev == nil { // Hidden symbols appear in the BTF Datasec but don't receive a VariableSpec. continue } if v.Offset != ev.Offset { return fmt.Errorf("data section %s: variable %s datasec offset (%d) doesn't match ELF symbol offset (%d)", sec.Name, name, v.Offset, ev.Offset) } if v.Size != ev.Size() { return fmt.Errorf("data section %s: variable %s size in datasec (%d) doesn't match ELF symbol size (%d)", sec.Name, name, v.Size, ev.Size()) } // Decouple the Var in the VariableSpec from the underlying DataSec in // the MapSpec to avoid modifications from affecting map loads later on. ev.Type = btf.Copy(vt).(*btf.Var) } } } ec.maps[sec.Name] = mapSpec } return nil } // loadKconfigSection handles the 'virtual' Datasec .kconfig that doesn't // have a corresponding ELF section and exist purely in BTF. func (ec *elfCode) loadKconfigSection() error { if ec.btf == nil { return nil } var ds *btf.Datasec err := ec.btf.TypeByName(".kconfig", &ds) if errors.Is(err, btf.ErrNotFound) { return nil } if err != nil { return err } if ds.Size == 0 { return errors.New("zero-length .kconfig") } ec.kconfig = &MapSpec{ Name: ".kconfig", Type: Array, KeySize: uint32(4), ValueSize: ds.Size, MaxEntries: 1, Flags: sys.BPF_F_RDONLY_PROG, Key: &btf.Int{Size: 4}, Value: ds, } return nil } // loadKsymsSection handles the 'virtual' Datasec .ksyms that doesn't // have a corresponding ELF section and exist purely in BTF. func (ec *elfCode) loadKsymsSection() error { if ec.btf == nil { return nil } var ds *btf.Datasec err := ec.btf.TypeByName(".ksyms", &ds) if errors.Is(err, btf.ErrNotFound) { return nil } if err != nil { return err } for _, v := range ds.Vars { switch t := v.Type.(type) { case *btf.Func: ec.kfuncs[t.TypeName()] = t case *btf.Var: ec.ksyms[t.TypeName()] = struct{}{} default: return fmt.Errorf("unexpected variable type in .ksyms: %T", v) } } return nil } // associateStructOpsRelocs handles `.struct_ops.link` // and associates the target function with the correct struct member in the map. func (ec *elfCode) associateStructOpsRelocs(progs map[string]*ProgramSpec) error { for _, sec := range ec.sections { if sec.kind != structOpsSection { continue } userData, err := sec.Data() if err != nil { return fmt.Errorf("failed to read section data: %w", err) } // Resolve the BTF datasec describing variables in this section. var ds *btf.Datasec if err := ec.btf.TypeByName(sec.Name, &ds); err != nil { return fmt.Errorf("datasec %s: %w", sec.Name, err) } // Set flags for .struct_ops.link (BPF_F_LINK). flags := uint32(0) if sec.Name == structOpsLinkSec { flags = sys.BPF_F_LINK } for _, vsi := range ds.Vars { userSt, baseOff, err := ec.createStructOpsMap(vsi, userData, flags) if err != nil { return err } if err := structOpsSetAttachTo(sec, baseOff, userSt, progs); err != nil { return err } } } return nil } // createStructOpsMap() creates and registers a MapSpec for a struct_ops func (ec *elfCode) createStructOpsMap(vsi btf.VarSecinfo, userData []byte, flags uint32) (*btf.Struct, uint32, error) { varType, ok := btf.As[*btf.Var](vsi.Type) if !ok { return nil, 0, fmt.Errorf("vsi: expect var, got %T", vsi.Type) } mapName := varType.Name userSt, ok := btf.As[*btf.Struct](varType.Type) if !ok { return nil, 0, fmt.Errorf("var %s: expect struct, got %T", varType.Name, varType.Type) } userSize := userSt.Size baseOff := vsi.Offset if baseOff+userSize > uint32(len(userData)) { return nil, 0, fmt.Errorf("%s exceeds section", mapName) } // Register the MapSpec for this struct_ops instance if doesn't exist if _, exists := ec.maps[mapName]; exists { return nil, 0, fmt.Errorf("struct_ops map %s: already exists", mapName) } ec.maps[mapName] = &MapSpec{ Name: mapName, Type: StructOpsMap, Key: &btf.Int{Size: 4}, KeySize: structOpsKeySize, ValueSize: userSize, // length of the user-struct type Value: userSt, Flags: flags, MaxEntries: 1, Contents: []MapKV{ { Key: uint32(0), Value: append([]byte(nil), userData[baseOff:baseOff+userSize]...), }, }, } return userSt, baseOff, nil } type libbpfElfSectionDef struct { pattern string programType sys.ProgType attachType sys.AttachType flags libbpfElfSectionFlag } type libbpfElfSectionFlag uint32 // The values correspond to enum sec_def_flags in libbpf. const ( _SEC_NONE libbpfElfSectionFlag = 0 _SEC_EXP_ATTACH_OPT libbpfElfSectionFlag = 1 << (iota - 1) _SEC_ATTACHABLE _SEC_ATTACH_BTF _SEC_SLEEPABLE _SEC_XDP_FRAGS _SEC_USDT _SEC_ATTACHABLE_OPT = _SEC_ATTACHABLE | _SEC_EXP_ATTACH_OPT ) func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) { // Skip optional program marking for now. sectionName = strings.TrimPrefix(sectionName, "?") for _, t := range elfSectionDefs { extra, ok := matchSectionName(sectionName, t.pattern) if !ok { continue } programType := ProgramType(t.programType) attachType := AttachType(t.attachType) var flags uint32 if t.flags&_SEC_SLEEPABLE > 0 { flags |= sys.BPF_F_SLEEPABLE } if t.flags&_SEC_XDP_FRAGS > 0 { flags |= sys.BPF_F_XDP_HAS_FRAGS } // The libbpf documentation on program types states: 'The struct_ops attach // format supports struct_ops[.s]/ convention, but name is ignored and // it is recommended to just use plain SEC("struct_ops[.s]").' // // Ignore any extra for struct_ops to match libbpf behaviour. if programType == StructOps { extra = "" } return programType, attachType, flags, extra } return UnspecifiedProgram, AttachNone, 0, "" } // matchSectionName checks a section name against a pattern. // // It's behaviour mirrors that of libbpf's sec_def_matches. func matchSectionName(sectionName, pattern string) (extra string, found bool) { have, extra, found := strings.Cut(sectionName, "/") want := strings.TrimRight(pattern, "+/") if strings.HasSuffix(pattern, "/") { // Section name must have a slash and extra may be empty. return extra, have == want && found } else if strings.HasSuffix(pattern, "+") { // Section name may have a slash and extra may be empty. return extra, have == want } // Section name must have a prefix. extra is ignored. return "", strings.HasPrefix(sectionName, pattern) } func (ec *elfCode) loadSectionRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) { rels := make(map[uint64]elf.Symbol) if sec.Entsize < 16 { return nil, fmt.Errorf("section %s: relocations are less than 16 bytes", sec.Name) } r := bufio.NewReader(sec.Open()) for off := uint64(0); off < sec.Size; off += sec.Entsize { ent := io.LimitReader(r, int64(sec.Entsize)) var rel elf.Rel64 if binary.Read(ent, ec.ByteOrder, &rel) != nil { return nil, fmt.Errorf("can't parse relocation at offset %v", off) } symNo := int(elf.R_SYM64(rel.Info) - 1) if symNo >= len(symbols) { return nil, fmt.Errorf("offset %d: symbol %d doesn't exist", off, symNo) } symbol := symbols[symNo] rels[rel.Off] = symbol } return rels, nil } ================================================ FILE: elf_reader_test.go ================================================ package ebpf import ( "bytes" "encoding/binary" "errors" "flag" "fmt" "maps" "os" "path/filepath" "strings" "syscall" "testing" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/kallsyms" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/go-quicktest/qt" ) var csCmpOpts = cmp.Options{ // Dummy Comparer that works with empty readers to support test cases. cmp.Comparer(func(a, b bytes.Reader) bool { if a.Len() == 0 && b.Len() == 0 { return true } return false }), cmpopts.IgnoreTypes(btf.Spec{}), cmpopts.IgnoreFields(CollectionSpec{}, "ByteOrder", "Types"), cmpopts.IgnoreFields(ProgramSpec{}, "Instructions", "ByteOrder"), cmpopts.IgnoreFields(MapSpec{}, "Key", "Value", "Contents"), cmpopts.IgnoreFields(VariableSpec{}, "Type", "Value"), cmpopts.IgnoreUnexported(ProgramSpec{}), } func TestLoadCollectionSpec(t *testing.T) { coll := &CollectionSpec{ Maps: map[string]*MapSpec{ "hash_map": { Name: "hash_map", Type: Hash, KeySize: 4, ValueSize: 8, MaxEntries: 1, Flags: sys.BPF_F_NO_PREALLOC, }, "hash_map2": { Name: "hash_map2", Type: Hash, KeySize: 4, ValueSize: 8, MaxEntries: 2, }, "perf_event_array": { Name: "perf_event_array", Type: PerfEventArray, MaxEntries: 4096, }, ".bss": { Name: ".bss", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, ".data": { Name: ".data", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, ".data.test": { Name: ".data.test", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, ".rodata": { Name: ".rodata", Type: Array, KeySize: 4, ValueSize: 24, MaxEntries: 1, Flags: sys.BPF_F_RDONLY_PROG, }, ".rodata.test": { Name: ".rodata.test", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, Flags: sys.BPF_F_RDONLY_PROG, }, ".rodata.cst32": { Name: ".rodata.cst32", Type: Array, KeySize: 4, ValueSize: 32, MaxEntries: 1, Flags: sys.BPF_F_RDONLY_PROG, }, }, Programs: map[string]*ProgramSpec{ "xdp_prog": { Name: "xdp_prog", Type: XDP, SectionName: "xdp", AttachType: AttachXDP, License: "MIT", }, "no_relocation": { Name: "no_relocation", Type: SocketFilter, SectionName: "socket", License: "MIT", }, "asm_relocation": { Name: "asm_relocation", Type: SocketFilter, SectionName: "socket/2", License: "MIT", }, "data_sections": { Name: "data_sections", Type: SocketFilter, SectionName: "socket/3", License: "MIT", }, "global_fn3": { Name: "global_fn3", Type: UnspecifiedProgram, SectionName: "other", License: "MIT", }, "static_fn": { Name: "static_fn", Type: UnspecifiedProgram, SectionName: "static", License: "MIT", }, "anon_const": { Name: "anon_const", Type: SocketFilter, SectionName: "socket/4", License: "MIT", }, }, Variables: map[string]*VariableSpec{ "arg": {Name: "arg", SectionName: ".rodata", Offset: 4}, "arg2": {Name: "arg2", SectionName: ".rodata.test", Offset: 0}, "arg3": {Name: "arg3", SectionName: ".data.test", Offset: 0}, "key1": {Name: "key1", SectionName: ".bss", Offset: 0}, "key2": {Name: "key2", SectionName: ".data", Offset: 0}, "key3": {Name: "key3", SectionName: ".rodata", Offset: 0}, "neg": {Name: "neg", SectionName: ".rodata", Offset: 12}, "uneg": {Name: "uneg", SectionName: ".rodata", Offset: 8}, }, } // BTF-only maps. btfOnly := map[string]*MapSpec{ "btf_pin": { Name: "btf_pin", Type: Hash, KeySize: 4, ValueSize: 8, MaxEntries: 1, Pinning: PinByName, }, "bpf_decl_map": { Name: "bpf_decl_map", Type: Array, KeySize: 4, ValueSize: 8, MaxEntries: 1, Tags: []string{"a", "b"}, }, "btf_decl_map": { Name: "btf_decl_map", Type: Array, KeySize: 4, ValueSize: 8, MaxEntries: 1, Tags: []string{"a", "b"}, }, "btf_outer_map": { Name: "btf_outer_map", Type: ArrayOfMaps, KeySize: 4, ValueSize: 4, MaxEntries: 1, InnerMap: &MapSpec{ Name: "btf_outer_map_inner", Type: Hash, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, "btf_outer_map_anon": { Name: "btf_outer_map_anon", Type: ArrayOfMaps, KeySize: 4, ValueSize: 4, MaxEntries: 1, InnerMap: &MapSpec{ Name: "btf_outer_map_anon_inner", Type: Hash, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, }, "btf_typedef_map": { Name: "btf_typedef_map", Type: Array, KeySize: 4, ValueSize: 8, MaxEntries: 1, }, } testutils.Files(t, testutils.Glob(t, "testdata/loader-*.elf"), func(t *testing.T, file string) { got, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) // BTF map definition contains a value type, but the size should remain 0. // The value type needs to be reflected in the MapSpec. qt.Assert(t, qt.Equals(got.Maps["perf_event_array"].ValueSize, 0)) qt.Assert(t, qt.IsNotNil(got.Maps["perf_event_array"].Value)) // Copy and extend the CollectionSpec with BTF-only objects. want := coll.Copy() maps.Copy(want.Maps, btfOnly) testLoadCollectionSpec(t, got, want) }) testutils.Files(t, testutils.Glob(t, "testdata/loader_nobtf-*.elf"), func(t *testing.T, file string) { got, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) testLoadCollectionSpec(t, got, coll.Copy()) }) } func testLoadCollectionSpec(t *testing.T, got, want *CollectionSpec) { t.Helper() qt.Assert(t, qt.CmpEquals(got, want, csCmpOpts)) coll, err := newCollection(t, got, &CollectionOptions{ Maps: MapOptions{PinPath: testutils.TempBPFFS(t)}, Programs: ProgramOptions{LogLevel: LogLevelBranch}, }) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) ret := mustRun(t, coll.Programs["xdp_prog"], nil) qt.Assert(t, qt.Equals(ret, 7)) } func BenchmarkELFLoader(b *testing.B) { b.ReportAllocs() for b.Loop() { _, _ = LoadCollectionSpec("testdata/loader-el.elf") } } func TestDataSections(t *testing.T) { file := testutils.NativeFile(t, "testdata/loader-%s.elf") coll, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } t.Log(coll.Programs["data_sections"].Instructions) var obj struct { Program *Program `ebpf:"data_sections"` } mustLoadAndAssign(t, coll, &obj, nil) defer obj.Program.Close() ret := mustRun(t, obj.Program, nil) if ret != 0 { t.Error("BPF assertion failed on line", ret) } } func TestInlineASMConstant(t *testing.T) { file := testutils.NativeFile(t, "testdata/loader-%s.elf") coll, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } spec := coll.Programs["asm_relocation"] if spec.Instructions[0].Reference() != "MY_CONST" { t.Fatal("First instruction is not a reference to MY_CONST") } // -1 is used by the loader to find unrewritten maps. spec.Instructions[0].Constant = -1 t.Log(spec.Instructions) var obj struct { Program *Program `ebpf:"asm_relocation"` } mustLoadAndAssign(t, coll, &obj, nil) obj.Program.Close() } func TestFreezeRodata(t *testing.T) { testutils.SkipOnOldKernel(t, "5.9", "sk_lookup program type") file := testutils.NativeFile(t, "testdata/constants-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Program *Program `ebpf:"freeze_rodata"` } qt.Assert(t, qt.IsNil(spec.Variables["ret"].Set(uint32(1)))) mustLoadAndAssign(t, spec, &obj, nil) obj.Program.Close() } func TestCollectionSpecDetach(t *testing.T) { coll := Collection{ Maps: map[string]*Map{ "foo": new(Map), }, Programs: map[string]*Program{ "bar": new(Program), }, } foo := coll.DetachMap("foo") if foo == nil { t.Error("Program not returned from DetachMap") } if _, ok := coll.Programs["foo"]; ok { t.Error("DetachMap doesn't remove map from Maps") } bar := coll.DetachProgram("bar") if bar == nil { t.Fatal("Program not returned from DetachProgram") } if _, ok := coll.Programs["bar"]; ok { t.Error("DetachProgram doesn't remove program from Programs") } } func TestLoadInvalidMap(t *testing.T) { file := testutils.NativeFile(t, "testdata/invalid_map-%s.elf") cs, err := LoadCollectionSpec(file) if err != nil { t.Fatal("Can't load CollectionSpec", err) } ms, ok := cs.Maps["invalid_map"] if !ok { t.Fatal("invalid_map not found in CollectionSpec") } m, err := NewMap(ms) t.Log(err) if err == nil { m.Close() t.Fatal("Creating a Map from a MapSpec with non-zero Extra is expected to fail.") } } func TestLoadInvalidMapMissingSymbol(t *testing.T) { file := testutils.NativeFile(t, "testdata/invalid_map_static-%s.elf") _, err := LoadCollectionSpec(file) t.Log(err) if err == nil { t.Fatal("Loading a map with static qualifier should fail") } } func TestLoadInitializedBTFMap(t *testing.T) { testutils.Files(t, testutils.Glob(t, "testdata/btf_map_init-*.elf"), func(t *testing.T, file string) { coll, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } t.Run("NewCollection", func(t *testing.T) { _, err := newCollection(t, coll, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("NewCollection failed:", err) } }) t.Run("prog_array", func(t *testing.T) { m, ok := coll.Maps["prog_array_init"] if !ok { t.Fatal("map prog_array_init not found in program") } if len(m.Contents) != 1 { t.Error("expecting exactly 1 item in MapSpec contents") } p := m.Contents[0] if cmp.Equal(p.Key, 1) { t.Errorf("expecting MapSpec entry Key to equal 1, got %v", p.Key) } if _, ok := p.Value.(string); !ok { t.Errorf("expecting MapSpec entry Value to be a string, got %T", p.Value) } if p.Value != "tail_1" { t.Errorf("expected MapSpec entry Value 'tail_1', got: %s", p.Value) } }) t.Run("array_of_maps", func(t *testing.T) { m, ok := coll.Maps["outer_map_init"] if !ok { t.Fatal("map outer_map_init not found in program") } if len(m.Contents) != 1 { t.Error("expecting exactly 1 item in MapSpec contents") } if m.Key == nil { t.Error("Expected non-nil key") } if m.Value == nil { t.Error("Expected non-nil value") } if m.InnerMap.Key == nil { t.Error("Expected non-nil InnerMap key") } if m.InnerMap.Value == nil { t.Error("Expected non-nil InnerMap value") } p := m.Contents[0] if cmp.Equal(p.Key, 1) { t.Errorf("expecting MapSpec entry Key to equal 1, got %v", p.Key) } if _, ok := p.Value.(string); !ok { t.Errorf("expecting MapSpec entry Value to be a string, got %T", p.Value) } if p.Value != "inner_map" { t.Errorf("expected MapSpec entry Value 'inner_map', got: %s", p.Value) } }) }) } func TestLoadInvalidInitializedBTFMap(t *testing.T) { file := testutils.NativeFile(t, "testdata/invalid_btf_map_init-%s.elf") _, err := LoadCollectionSpec(file) t.Log(err) if !errors.Is(err, internal.ErrNotSupported) { t.Fatal("Loading an initialized BTF map should be unsupported") } } func TestStringSection(t *testing.T) { file := testutils.NativeFile(t, "testdata/strings-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatalf("load collection spec: %s", err) } for name := range spec.Maps { t.Log(name) } strMap := spec.Maps[".rodata.str1.1"] if strMap == nil { t.Fatal("Unable to find map '.rodata.str1.1' in loaded collection") } if !strMap.readOnly() { t.Fatal("Read only data maps should be frozen") } if strMap.Flags != sys.BPF_F_RDONLY_PROG { t.Fatal("Read only data maps should have the prog-read-only flag set") } coll, err := newCollection(t, spec, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("new collection: %s", err) } prog := coll.Programs["filter"] if prog == nil { t.Fatal("program not found") } testMap := coll.Maps["my_map"] if testMap == nil { t.Fatal("test map not found") } _, err = prog.Run(&RunOptions{ Data: internal.EmptyBPFContext, // Min size for XDP programs }) if err != nil { t.Fatalf("prog run: %s", err) } key := []byte("This string is allocated in the string section\n\x00") var value uint32 if err = testMap.Lookup(&key, &value); err != nil { t.Fatalf("test map lookup: %s", err) } if value != 1 { t.Fatal("Test map value not 1!") } } func TestLoadRawTracepoint(t *testing.T) { testutils.SkipOnOldKernel(t, "4.17", "BPF_RAW_TRACEPOINT API") file := testutils.NativeFile(t, "testdata/raw_tracepoint-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal("Can't parse ELF:", err) } coll, err := NewCollectionWithOptions(spec, CollectionOptions{ Programs: ProgramOptions{ LogLevel: LogLevelBranch, }, }) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create collection:", err) } coll.Close() } func TestTailCall(t *testing.T) { file := testutils.NativeFile(t, "testdata/btf_map_init-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { TailMain *Program `ebpf:"tail_main"` ProgArray *Map `ebpf:"prog_array_init"` // Windows evicts programs from the tail call array when the last // user space reference is closed. This is not the case on Linux. Tail *Program `ebpf:"tail_1"` } err = loadAndAssign(t, spec, &obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer obj.TailMain.Close() defer obj.Tail.Close() defer obj.ProgArray.Close() ret := mustRun(t, obj.Tail, nil) // Expect the tail_1 tail call to be taken, returning value 42. if ret != 42 { t.Fatalf("Expected tail call to return value 42, got %d", ret) } } func TestKconfig(t *testing.T) { file := testutils.NativeFile(t, "testdata/kconfig-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Main *Program `ebpf:"kconfig"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer obj.Main.Close() ret := mustRun(t, obj.Main, nil) qt.Assert(t, qt.Equals(ret, 0), qt.Commentf("Failed assertion at line %d in testdata/kconfig.c", ret)) } func TestKsym(t *testing.T) { file := testutils.NativeFile(t, "testdata/ksym-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) var obj struct { Main *Program `ebpf:"ksym_test"` ArrayMap *Map `ebpf:"array_map"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer obj.Main.Close() defer obj.ArrayMap.Close() mustRun(t, obj.Main, nil) ksyms := map[string]uint64{ "bpf_init": 0, "bpf_trace_run1": 0, } qt.Assert(t, qt.IsNil(kallsyms.AssignAddresses(ksyms))) qt.Assert(t, qt.Not(qt.Equals(ksyms["bpf_init"], 0))) qt.Assert(t, qt.Not(qt.Equals(ksyms["bpf_trace_run1"], 0))) var value uint64 qt.Assert(t, qt.IsNil(obj.ArrayMap.Lookup(uint32(0), &value))) qt.Assert(t, qt.Equals(value, ksyms["bpf_init"])) qt.Assert(t, qt.IsNil(obj.ArrayMap.Lookup(uint32(1), &value))) qt.Assert(t, qt.Equals(value, ksyms["bpf_trace_run1"])) } func TestKsymWeakMissing(t *testing.T) { file := testutils.NativeFile(t, "testdata/ksym-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) var obj struct { Main *Program `ebpf:"ksym_missing_test"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer obj.Main.Close() ret := mustRun(t, obj.Main, nil) qt.Assert(t, qt.Equals(ret, 1)) } func TestKfunc(t *testing.T) { testutils.SkipOnOldKernel(t, "5.18", "kfunc support") file := testutils.NativeFile(t, "testdata/kfunc-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Main *Program `ebpf:"call_kfunc"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("%+v", err) } defer obj.Main.Close() ret := mustRun(t, obj.Main, nil) if ret != 1 { t.Fatalf("Expected kfunc to return value 1, got %d", ret) } } func TestWeakKfunc(t *testing.T) { testutils.SkipOnOldKernel(t, "5.18", "kfunc support") // CAP_SYS_ADMIN is required to load kfuncs implemented in kernel modules. // Assert that when kfuncs are weak, loading still works without the capability. // CAP_BPF and CAP_PERFMON are still required to load BPF raw tracepoints programs // such as the one in this test. testutils.WithCapabilities(t, []testutils.Capability{testutils.CAP_BPF, testutils.CAP_PERFMON}, func() { file := testutils.NativeFile(t, "testdata/kfunc-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Missing *Program `ebpf:"weak_kfunc_missing"` Calling *Program `ebpf:"call_weak_kfunc"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("%+v", err) } defer obj.Missing.Close() defer obj.Calling.Close() }) } func TestInvalidKfunc(t *testing.T) { testutils.SkipOnOldKernel(t, "5.18", "kfunc support") requireTestmod(t) file := testutils.NativeFile(t, "testdata/invalid-kfunc-%s.elf") coll, err := LoadCollection(file) if err == nil { coll.Close() t.Fatal("Expected an error") } var ike *incompatibleKfuncError if !errors.As(err, &ike) { t.Fatalf("Expected an error wrapping incompatibleKfuncError, got %s", err) } } func TestKfuncKmod(t *testing.T) { testutils.SkipOnOldKernel(t, "5.18", "Kernel module function calls") requireTestmod(t) file := testutils.NativeFile(t, "testdata/kfunc-kmod-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Main *Program `ebpf:"call_kfunc"` } err = spec.LoadAndAssign(&obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("%v+", err) } defer obj.Main.Close() ret := mustRun(t, obj.Main, nil) if ret != 1 { t.Fatalf("Expected kfunc to return value 1, got %d", ret) } } func TestSubprogRelocation(t *testing.T) { testutils.SkipOnOldKernel(t, "5.13", "bpf_for_each_map_elem") file := testutils.NativeFile(t, "testdata/subprog_reloc-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } var obj struct { Main *Program `ebpf:"fp_relocation"` HashMap *Map `ebpf:"hash_map"` } err = loadAndAssign(t, spec, &obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer obj.Main.Close() defer obj.HashMap.Close() ret := mustRun(t, obj.Main, nil) if ret != 42 { t.Fatalf("Expected subprog reloc to return value 42, got %d", ret) } } func TestUnassignedProgArray(t *testing.T) { file := testutils.NativeFile(t, "testdata/btf_map_init-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } // tail_main references a ProgArray that is not being assigned // to this struct. Normally, this would clear all its entries // and make any tail calls into the ProgArray result in a miss. // The library needs to explicitly refuse such operations. var obj struct { TailMain *Program `ebpf:"tail_main"` // ProgArray *Map `ebpf:"prog_array_init"` } err = loadAndAssign(t, spec, &obj, nil) testutils.SkipIfNotSupported(t, err) defer obj.TailMain.Close() qt.Assert(t, qt.IsNotNil(err)) } func TestIPRoute2Compat(t *testing.T) { file := testutils.NativeFile(t, "testdata/iproute2_map_compat-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal("Can't parse ELF:", err) } ms, ok := spec.Maps["hash_map"] if !ok { t.Fatal("Map hash_map not found") } var id, pinning, innerID, innerIndex uint32 if ms.Extra == nil { t.Fatal("missing extra bytes") } switch { case binary.Read(ms.Extra, spec.ByteOrder, &id) != nil: t.Fatal("missing id") case binary.Read(ms.Extra, spec.ByteOrder, &pinning) != nil: t.Fatal("missing pinning") case binary.Read(ms.Extra, spec.ByteOrder, &innerID) != nil: t.Fatal("missing inner_id") case binary.Read(ms.Extra, spec.ByteOrder, &innerIndex) != nil: t.Fatal("missing inner_idx") } if id != 0 || innerID != 0 || innerIndex != 0 { t.Fatal("expecting id, inner_id and inner_idx to be zero") } if pinning != 2 { t.Fatal("expecting pinning field to be 2 (PIN_GLOBAL_NS)") } // iproute2 (tc) pins maps in /sys/fs/bpf/tc/globals with PIN_GLOBAL_NS, // which needs to be configured in this library using MapOptions.PinPath. // For the sake of the test, we use a tempdir on bpffs below. ms.Pinning = PinByName coll, err := NewCollectionWithOptions(spec, CollectionOptions{ Maps: MapOptions{ PinPath: testutils.TempBPFFS(t), }, }) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create collection:", err) } coll.Close() } func TestArena(t *testing.T) { file := testutils.NativeFile(t, "testdata/arena-%s.elf") coll, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) want := &CollectionSpec{ Maps: map[string]*MapSpec{ "arena": { Name: "arena", Type: Arena, MaxEntries: 100, Flags: sys.BPF_F_MMAPABLE, MapExtra: 1 << 44, }, }, Programs: map[string]*ProgramSpec{}, Variables: map[string]*VariableSpec{}, } qt.Assert(t, qt.CmpEquals(coll, want, csCmpOpts)) testutils.SkipOnOldKernel(t, "6.9", "arena maps") mustNewCollection(t, coll, nil) } func TestStructOps(t *testing.T) { file := testutils.NativeFile(t, "testdata/struct_ops-%s.elf") coll, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) userData := []byte{ // test_1 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // test_2 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // data (4B) + padding (4B) 0xef, 0xbe, 0xad, 0xde, 0x00, 0x00, 0x00, 0x00, } want := &CollectionSpec{ Maps: map[string]*MapSpec{ "testmod_ops": { Name: "testmod_ops", Type: StructOpsMap, MaxEntries: 1, Flags: sys.BPF_F_LINK, Key: &btf.Int{Size: 4}, KeySize: 4, ValueSize: 24, Value: &btf.Struct{ Name: "bpf_testmod_ops", Size: 24, Members: []btf.Member{ { Name: "test_1", Type: &btf.Pointer{ Target: &btf.FuncProto{ Params: []btf.FuncParam{}, Return: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}}, Offset: 0, }, { Name: "test_2", Type: &btf.Pointer{ Target: &btf.FuncProto{ Params: []btf.FuncParam{ {Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}, {Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}, }, Return: (*btf.Void)(nil), }, }, Offset: 64, }, { Name: "data", Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}, Offset: 128, // bits }, }, }, Contents: []MapKV{ { Key: uint32(0), Value: userData, }, }, }, }, Programs: map[string]*ProgramSpec{ "test_1": { Name: "test_1", Type: StructOps, AttachTo: "bpf_testmod_ops:test_1", License: "GPL", SectionName: "struct_ops/test_1", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }, }, Variables: map[string]*VariableSpec{}, } testModOps, ok := coll.Maps["testmod_ops"] if !ok { t.Fatalf("testmod_ops doesn't exist") } data, ok := testModOps.Contents[0].Value.([]byte) if !ok { t.Fatalf("Contents[0].Value should be an array of byte") } qt.Assert(t, qt.CmpEquals(coll.Programs, want.Programs, csCmpOpts)) qt.Assert(t, qt.CmpEquals(coll.Maps, want.Maps, csCmpOpts)) qt.Assert(t, qt.CmpEquals(testModOps.Value, want.Maps["testmod_ops"].Value, csCmpOpts)) qt.Assert(t, qt.CmpEquals(data, userData, csCmpOpts)) } var ( elfPath = flag.String("elfs", os.Getenv("CI_KERNEL_SELFTESTS"), "`Path` containing libbpf-compatible ELFs (defaults to $CI_KERNEL_SELFTESTS)") elfPattern = flag.String("elf-pattern", "*.o", "Glob `pattern` for object files that should be tested") ) func TestLibBPFCompat(t *testing.T) { if *elfPath == "" { // Specify the path to the directory containing the eBPF for // the kernel's selftests if you want to run this test. // As of 5.2 that is tools/testing/selftests/bpf/ t.Skip("No path specified") } load := func(t *testing.T, spec *CollectionSpec, opts CollectionOptions, valid bool) { // Disable retrying a program load with the log enabled, it leads // to OOM kills. opts.Programs.LogDisabled = true coll, err := NewCollectionWithOptions(spec, opts) testutils.SkipIfNotSupported(t, err) var errno syscall.Errno if errors.As(err, &errno) { // This error is most likely from a syscall and caused by us not // replicating some fixups done in the selftests or the test // intentionally failing. This is expected, so skip the test // instead of failing. t.Skip("Skipping since the kernel rejected the program:", err) } if err == nil { coll.Close() } if !valid { if err == nil { t.Fatal("Expected an error during load") } } else if err != nil { t.Fatal("Error during loading:", err) } } files := testutils.Glob(t, filepath.Join(*elfPath, *elfPattern), // These files are only used as a source of btf. "btf__core_reloc_*", ) testutils.Files(t, files, func(t *testing.T, path string) { name := selftestName(path) switch name { case "test_map_in_map", "test_select_reuseport_kern": t.Skip("Skipping due to missing InnerMap in map definition") case "test_core_autosize": t.Skip("Skipping since the test generates dynamic BTF") case "test_static_linked": t.Skip("Skipping since .text contains 'subprog' twice") case "netif_receive_skb", "local_kptr_stash", "local_kptr_stash_fail", "type_cast", "preempted_bpf_ma_op", "percpu_alloc_fail": // Error message like // fixup for CORERelocation(local_type_id, Struct:"bin_data"[0], // local_id=27): invalid immediate 31, expected 27 (fixup: local_type_id=27->1) // See https://github.com/cilium/ebpf/issues/739 t.Skip("Skipping due to bug in libbpf type deduplication") case "test_usdt", "test_urandom_usdt", "test_usdt_multispec": t.Skip("Skipping due to missing support for usdt.bpf.h") case "lsm_cgroup", "bpf_iter_ipv6_route", "test_core_extern", "profiler1", "profiler2", "profiler3": t.Skip("Skipping due to using weak CONFIG_* variables") case "linked_maps", "linked_maps1", "linked_maps2", "linked_funcs1", "linked_funcs2", "test_subskeleton", "test_subskeleton_lib": t.Skip("Skipping due to relying on cross ELF linking") case "test_log_fixup": t.Skip("Skipping due to intentionally broken CO-RE relocations") } t.Parallel() spec, err := LoadCollectionSpec(path) testutils.SkipIfNotSupported(t, err) if errors.Is(err, errUnsupportedBinding) { t.Skip(err) } if err != nil { t.Fatal(err) } switch name { case "test_sk_assign": // Test contains a legacy iproute2 bpf_elf_map definition. for _, m := range spec.Maps { if m.Extra == nil || m.Extra.Len() == 0 { t.Fatalf("Expected extra bytes in map %s", m.Name) } m.Extra = nil } case "fexit_bpf2bpf", "freplace_get_constant", "freplace_global_func": loadTargetProgram(t, spec, "test_pkt_access.bpf.o", "test_pkt_access") case "freplace_cls_redirect": loadTargetProgram(t, spec, "test_cls_redirect.bpf.o", "cls_redirect") case "test_trace_ext": loadTargetProgram(t, spec, "test_pkt_md_access.bpf.o", "test_pkt_md_access") case "freplace_progmap": loadTargetProgram(t, spec, "xdp_dummy.bpf.o", "xdp_dummy_prog") if prog := spec.Programs["xdp_cpumap_prog"]; prog.AttachTo == "" { prog.AttachTo = "xdp_dummy_prog" } case "freplace_attach_probe": // Looks like the test should have a target, but 6.6 selftests don't // seem to be using it. } var opts CollectionOptions for _, mapSpec := range spec.Maps { if mapSpec.Pinning != PinNone { opts.Maps.PinPath = testutils.TempBPFFS(t) break } } coreFiles := sourceOfBTF(t, path) if len(coreFiles) == 0 { // NB: test_core_reloc_kernel.o doesn't have dedicated BTF and // therefore goes via this code path. load(t, spec, opts, true) return } for _, coreFile := range coreFiles { name := selftestName(coreFile) t.Run(name, func(t *testing.T) { // Some files like btf__core_reloc_arrays___err_too_small.o // trigger an error on purpose. Use the name to infer whether // the test should succeed. var valid bool switch name { case "btf__core_reloc_existence___err_wrong_arr_kind", "btf__core_reloc_existence___err_wrong_arr_value_type", "btf__core_reloc_existence___err_wrong_int_kind", "btf__core_reloc_existence___err_wrong_int_sz", "btf__core_reloc_existence___err_wrong_int_type", "btf__core_reloc_existence___err_wrong_struct_type": // These tests are buggy upstream, see https://lore.kernel.org/bpf/20210420111639.155580-1-lmb@cloudflare.com/ valid = true case "btf__core_reloc_ints___err_wrong_sz_16", "btf__core_reloc_ints___err_wrong_sz_32", "btf__core_reloc_ints___err_wrong_sz_64", "btf__core_reloc_ints___err_wrong_sz_8", "btf__core_reloc_arrays___err_wrong_val_type1", "btf__core_reloc_arrays___err_wrong_val_type2": // These tests are valid according to current libbpf behaviour, // see commit 42765ede5c54ca915de5bfeab83be97207e46f68. valid = true case "btf__core_reloc_type_id___missing_targets", "btf__core_reloc_flavors__err_wrong_name": valid = false case "btf__core_reloc_ints___err_bitfield": // Bitfields are now valid. valid = true default: valid = !strings.Contains(name, "___err_") } fh, err := os.Open(coreFile) if err != nil { t.Fatal(err) } defer fh.Close() btfSpec, err := btf.LoadSpec(coreFile) if err != nil { t.Fatal(err) } opts := opts // copy opts.Programs.KernelTypes = btfSpec load(t, spec, opts, valid) }) } }) } func loadTargetProgram(tb testing.TB, spec *CollectionSpec, file, program string) { targetSpec, err := LoadCollectionSpec(filepath.Join(*elfPath, file)) if errors.Is(err, os.ErrNotExist) && strings.HasSuffix(file, ".bpf.o") { // Prior to v6.1 BPF ELF used a plain .o suffix. file = strings.TrimSuffix(file, ".bpf.o") + ".o" targetSpec, err = LoadCollectionSpec(filepath.Join(*elfPath, file)) } if err != nil { tb.Fatalf("Can't read %s: %s", file, err) } qt.Assert(tb, qt.IsNotNil(targetSpec.Programs[program])) coll, err := NewCollectionWithOptions(targetSpec, CollectionOptions{ Programs: ProgramOptions{LogDisabled: true}, }) if err != nil { tb.Fatalf("Can't load target: %s", err) } tb.Cleanup(func() { coll.Close() }) target := coll.Programs[program] for _, prog := range spec.Programs { if prog.Type == Extension && prog.AttachType == AttachNone { prog.AttachTarget = target continue } if prog.Type == Tracing { switch prog.AttachType { case AttachTraceFEntry, AttachTraceFExit, AttachModifyReturn: prog.AttachTarget = target continue } } } } func sourceOfBTF(tb testing.TB, path string) []string { const testPrefix = "test_core_reloc_" const btfPrefix = "btf__core_reloc_" dir, base := filepath.Split(path) if !strings.HasPrefix(base, testPrefix) { return nil } base = strings.TrimSuffix(base[len(testPrefix):], ".o") switch base { case "bitfields_direct", "bitfields_probed": base = "bitfields" } return testutils.Glob(tb, filepath.Join(dir, btfPrefix+base+"*.o")) } func TestELFSectionProgramTypes(t *testing.T) { type testcase struct { Section string ProgramType ProgramType AttachType AttachType Flags uint32 Extra string } testcases := []testcase{ {"socket", SocketFilter, AttachNone, 0, ""}, {"socket/garbage", SocketFilter, AttachNone, 0, ""}, {"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0, ""}, {"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0, ""}, {"kprobe/", Kprobe, AttachNone, 0, ""}, {"kprobe/func", Kprobe, AttachNone, 0, "func"}, {"uprobe/", Kprobe, AttachNone, 0, ""}, {"kretprobe/", Kprobe, AttachNone, 0, ""}, {"uretprobe/", Kprobe, AttachNone, 0, ""}, {"tc", SchedCLS, AttachNone, 0, ""}, {"classifier", SchedCLS, AttachNone, 0, ""}, {"action", SchedACT, AttachNone, 0, ""}, {"tracepoint/", TracePoint, AttachNone, 0, ""}, {"tp/", TracePoint, AttachNone, 0, ""}, {"raw_tracepoint/", RawTracepoint, AttachNone, 0, ""}, {"raw_tp/", RawTracepoint, AttachNone, 0, ""}, {"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0, ""}, {"raw_tp.w/", RawTracepointWritable, AttachNone, 0, ""}, {"tp_btf/", Tracing, AttachTraceRawTp, 0, ""}, {"fentry/", Tracing, AttachTraceFEntry, 0, ""}, {"fmod_ret/", Tracing, AttachModifyReturn, 0, ""}, {"fexit/", Tracing, AttachTraceFExit, 0, ""}, {"fentry.s/", Tracing, AttachTraceFEntry, sys.BPF_F_SLEEPABLE, ""}, {"fmod_ret.s/", Tracing, AttachModifyReturn, sys.BPF_F_SLEEPABLE, ""}, {"fexit.s/", Tracing, AttachTraceFExit, sys.BPF_F_SLEEPABLE, ""}, {"freplace/", Extension, AttachNone, 0, ""}, {"lsm/foo", LSM, AttachLSMMac, 0, "foo"}, {"lsm.s/foo", LSM, AttachLSMMac, sys.BPF_F_SLEEPABLE, "foo"}, {"iter/bpf_map", Tracing, AttachTraceIter, 0, "bpf_map"}, {"iter.s/", Tracing, AttachTraceIter, sys.BPF_F_SLEEPABLE, ""}, {"syscall", Syscall, AttachNone, sys.BPF_F_SLEEPABLE, ""}, {"xdp.frags/devmap", XDP, AttachXDPDevMap, sys.BPF_F_XDP_HAS_FRAGS, ""}, {"xdp/devmap", XDP, AttachXDPDevMap, 0, ""}, {"xdp.frags/cpumap", XDP, AttachXDPCPUMap, sys.BPF_F_XDP_HAS_FRAGS, ""}, {"xdp/cpumap", XDP, AttachXDPCPUMap, 0, ""}, {"xdp.frags", XDP, AttachXDP, sys.BPF_F_XDP_HAS_FRAGS, ""}, {"xdp", XDP, AttachXDP, 0, ""}, {"perf_event", PerfEvent, AttachNone, 0, ""}, {"lwt_in", LWTIn, AttachNone, 0, ""}, {"lwt_out", LWTOut, AttachNone, 0, ""}, {"lwt_xmit", LWTXmit, AttachNone, 0, ""}, {"lwt_seg6local", LWTSeg6Local, AttachNone, 0, ""}, {"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0, ""}, {"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0, ""}, {"cgroup/skb", CGroupSKB, AttachNone, 0, ""}, {"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0, ""}, {"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0, ""}, {"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0, ""}, {"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0, ""}, {"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0, ""}, {"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0, ""}, {"sockops", SockOps, AttachCGroupSockOps, 0, ""}, {"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0, ""}, {"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0, ""}, {"sk_skb/stream_verdict/foo", SkSKB, AttachSkSKBStreamVerdict, 0, ""}, {"sk_skb", SkSKB, AttachNone, 0, ""}, {"sk_skb/bar", SkSKB, AttachNone, 0, ""}, {"sk_msg", SkMsg, AttachSkMsgVerdict, 0, ""}, {"lirc_mode2", LircMode2, AttachLircMode2, 0, ""}, {"flow_dissector", FlowDissector, AttachFlowDissector, 0, ""}, {"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0, ""}, {"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0, ""}, {"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0, ""}, {"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0, ""}, {"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0, ""}, {"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0, ""}, {"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0, ""}, {"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0, ""}, {"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0, ""}, {"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0, ""}, {"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0, ""}, {"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0, ""}, {"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0, ""}, {"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0, ""}, {"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0, ""}, {"sk_lookup/", SkLookup, AttachSkLookup, 0, ""}, {"kprobe.multi", Kprobe, AttachTraceKprobeMulti, 0, ""}, {"kretprobe.multi", Kprobe, AttachTraceKprobeMulti, 0, ""}, {"struct_ops", StructOps, AttachNone, 0, ""}, {"struct_ops.s", StructOps, AttachNone, sys.BPF_F_SLEEPABLE, ""}, {"struct_ops/foo", StructOps, AttachNone, 0, ""}, } for _, tc := range testcases { t.Run(tc.Section, func(t *testing.T) { pt, at, fl, extra := getProgType(tc.Section) have := testcase{tc.Section, pt, at, fl, extra} qt.Assert(t, qt.DeepEquals(have, tc)) }) } } func TestMatchSectionName(t *testing.T) { for _, testcase := range []struct { pattern string input string matches bool extra string }{ {"prefix/", "prefix/", true, ""}, {"prefix/", "prefix/a", true, "a"}, {"prefix/", "prefix/b", true, "b"}, {"prefix/", "prefix", false, ""}, {"prefix/", "junk", false, ""}, {"prefix+", "prefix/", true, ""}, {"prefix+", "prefix/a", true, "a"}, {"prefix+", "prefix/b", true, "b"}, {"prefix+", "prefix", true, ""}, {"prefix+", "junk", false, ""}, {"exact", "exact", true, ""}, {"exact", "exact/", true, ""}, {"exact", "exact/a", true, ""}, {"exact", "exactement", true, ""}, {"exact", "junk", false, ""}, } { name := fmt.Sprintf("%s:%s", testcase.pattern, testcase.input) t.Run(name, func(t *testing.T) { extra, matches := matchSectionName(testcase.input, testcase.pattern) qt.Assert(t, qt.Equals(matches, testcase.matches)) if testcase.matches { qt.Assert(t, qt.Equals(extra, testcase.extra)) } }) } } // selftestName takes a path to a file and derives a canonical name from it. // // It strips various suffixes used by the selftest build system. func selftestName(path string) string { file := filepath.Base(path) name := strings.TrimSuffix(file, ".o") // Strip various suffixes. // Various linking suffixes. name = strings.TrimSuffix(name, ".linked3") name = strings.TrimSuffix(name, ".llinked1") name = strings.TrimSuffix(name, ".llinked2") name = strings.TrimSuffix(name, ".llinked3") // v6.1 started adding .bpf to all BPF ELF. name = strings.TrimSuffix(name, ".bpf") return name } ================================================ FILE: elf_sections.go ================================================ // Code generated by internal/cmd/gensections.awk; DO NOT EDIT. package ebpf // Code in this file is derived from libbpf, available under BSD-2-Clause. import "github.com/cilium/ebpf/internal/sys" var elfSectionDefs = []libbpfElfSectionDef{ {"socket", sys.BPF_PROG_TYPE_SOCKET_FILTER, 0, _SEC_NONE}, {"sk_reuseport/migrate", sys.BPF_PROG_TYPE_SK_REUSEPORT, sys.BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, _SEC_ATTACHABLE}, {"sk_reuseport", sys.BPF_PROG_TYPE_SK_REUSEPORT, sys.BPF_SK_REUSEPORT_SELECT, _SEC_ATTACHABLE}, {"kprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"uprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"uprobe.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_SLEEPABLE}, {"kretprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"uretprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"uretprobe.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_SLEEPABLE}, {"kprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_KPROBE_MULTI, _SEC_NONE}, {"kretprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_KPROBE_MULTI, _SEC_NONE}, {"kprobe.session+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_KPROBE_SESSION, _SEC_NONE}, {"uprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_NONE}, {"uretprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_NONE}, {"uprobe.session+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_SESSION, _SEC_NONE}, {"uprobe.multi.s+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_SLEEPABLE}, {"uretprobe.multi.s+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_SLEEPABLE}, {"uprobe.session.s+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_SESSION, _SEC_SLEEPABLE}, {"ksyscall+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"kretsyscall+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, {"usdt+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_USDT}, {"usdt.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_USDT | _SEC_SLEEPABLE}, {"tc/ingress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_INGRESS, _SEC_NONE}, {"tc/egress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_EGRESS, _SEC_NONE}, {"tcx/ingress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_INGRESS, _SEC_NONE}, {"tcx/egress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_EGRESS, _SEC_NONE}, {"tc", sys.BPF_PROG_TYPE_SCHED_CLS, 0, _SEC_NONE}, {"classifier", sys.BPF_PROG_TYPE_SCHED_CLS, 0, _SEC_NONE}, {"action", sys.BPF_PROG_TYPE_SCHED_ACT, 0, _SEC_NONE}, {"netkit/primary", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_NETKIT_PRIMARY, _SEC_NONE}, {"netkit/peer", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_NETKIT_PEER, _SEC_NONE}, {"tracepoint+", sys.BPF_PROG_TYPE_TRACEPOINT, 0, _SEC_NONE}, {"tp+", sys.BPF_PROG_TYPE_TRACEPOINT, 0, _SEC_NONE}, {"raw_tracepoint+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT, 0, _SEC_NONE}, {"raw_tp+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT, 0, _SEC_NONE}, {"raw_tracepoint.w+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 0, _SEC_NONE}, {"raw_tp.w+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 0, _SEC_NONE}, {"tp_btf+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_RAW_TP, _SEC_ATTACH_BTF}, {"fentry+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FENTRY, _SEC_ATTACH_BTF}, {"fmod_ret+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_MODIFY_RETURN, _SEC_ATTACH_BTF}, {"fexit+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FEXIT, _SEC_ATTACH_BTF}, {"fentry.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FENTRY, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, {"fmod_ret.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_MODIFY_RETURN, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, {"fexit.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FEXIT, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, {"freplace+", sys.BPF_PROG_TYPE_EXT, 0, _SEC_ATTACH_BTF}, {"lsm+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_MAC, _SEC_ATTACH_BTF}, {"lsm.s+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_MAC, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, {"lsm_cgroup+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_CGROUP, _SEC_ATTACH_BTF}, {"iter+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_ITER, _SEC_ATTACH_BTF}, {"iter.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_ITER, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, {"syscall", sys.BPF_PROG_TYPE_SYSCALL, 0, _SEC_SLEEPABLE}, {"xdp.frags/devmap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, _SEC_XDP_FRAGS}, {"xdp/devmap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, _SEC_ATTACHABLE}, {"xdp.frags/cpumap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, _SEC_XDP_FRAGS}, {"xdp/cpumap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, _SEC_ATTACHABLE}, {"xdp.frags", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP, _SEC_XDP_FRAGS}, {"xdp", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP, _SEC_ATTACHABLE_OPT}, {"perf_event", sys.BPF_PROG_TYPE_PERF_EVENT, 0, _SEC_NONE}, {"lwt_in", sys.BPF_PROG_TYPE_LWT_IN, 0, _SEC_NONE}, {"lwt_out", sys.BPF_PROG_TYPE_LWT_OUT, 0, _SEC_NONE}, {"lwt_xmit", sys.BPF_PROG_TYPE_LWT_XMIT, 0, _SEC_NONE}, {"lwt_seg6local", sys.BPF_PROG_TYPE_LWT_SEG6LOCAL, 0, _SEC_NONE}, {"sockops", sys.BPF_PROG_TYPE_SOCK_OPS, sys.BPF_CGROUP_SOCK_OPS, _SEC_ATTACHABLE_OPT}, {"sk_skb/stream_parser", sys.BPF_PROG_TYPE_SK_SKB, sys.BPF_SK_SKB_STREAM_PARSER, _SEC_ATTACHABLE_OPT}, {"sk_skb/stream_verdict", sys.BPF_PROG_TYPE_SK_SKB, sys.BPF_SK_SKB_STREAM_VERDICT, _SEC_ATTACHABLE_OPT}, {"sk_skb/verdict", sys.BPF_PROG_TYPE_SK_SKB, sys.BPF_SK_SKB_VERDICT, _SEC_ATTACHABLE_OPT}, {"sk_skb", sys.BPF_PROG_TYPE_SK_SKB, 0, _SEC_NONE}, {"sk_msg", sys.BPF_PROG_TYPE_SK_MSG, sys.BPF_SK_MSG_VERDICT, _SEC_ATTACHABLE_OPT}, {"lirc_mode2", sys.BPF_PROG_TYPE_LIRC_MODE2, sys.BPF_LIRC_MODE2, _SEC_ATTACHABLE_OPT}, {"flow_dissector", sys.BPF_PROG_TYPE_FLOW_DISSECTOR, sys.BPF_FLOW_DISSECTOR, _SEC_ATTACHABLE_OPT}, {"cgroup_skb/ingress", sys.BPF_PROG_TYPE_CGROUP_SKB, sys.BPF_CGROUP_INET_INGRESS, _SEC_ATTACHABLE_OPT}, {"cgroup_skb/egress", sys.BPF_PROG_TYPE_CGROUP_SKB, sys.BPF_CGROUP_INET_EGRESS, _SEC_ATTACHABLE_OPT}, {"cgroup/skb", sys.BPF_PROG_TYPE_CGROUP_SKB, 0, _SEC_NONE}, {"cgroup/sock_create", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_CREATE, _SEC_ATTACHABLE}, {"cgroup/sock_release", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_RELEASE, _SEC_ATTACHABLE}, {"cgroup/sock", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_CREATE, _SEC_ATTACHABLE_OPT}, {"cgroup/post_bind4", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET4_POST_BIND, _SEC_ATTACHABLE}, {"cgroup/post_bind6", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET6_POST_BIND, _SEC_ATTACHABLE}, {"cgroup/bind4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_BIND, _SEC_ATTACHABLE}, {"cgroup/bind6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_BIND, _SEC_ATTACHABLE}, {"cgroup/connect4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_CONNECT, _SEC_ATTACHABLE}, {"cgroup/connect6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_CONNECT, _SEC_ATTACHABLE}, {"cgroup/connect_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_CONNECT, _SEC_ATTACHABLE}, {"cgroup/sendmsg4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP4_SENDMSG, _SEC_ATTACHABLE}, {"cgroup/sendmsg6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP6_SENDMSG, _SEC_ATTACHABLE}, {"cgroup/sendmsg_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_SENDMSG, _SEC_ATTACHABLE}, {"cgroup/recvmsg4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP4_RECVMSG, _SEC_ATTACHABLE}, {"cgroup/recvmsg6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP6_RECVMSG, _SEC_ATTACHABLE}, {"cgroup/recvmsg_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_RECVMSG, _SEC_ATTACHABLE}, {"cgroup/getpeername4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_GETPEERNAME, _SEC_ATTACHABLE}, {"cgroup/getpeername6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_GETPEERNAME, _SEC_ATTACHABLE}, {"cgroup/getpeername_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_GETPEERNAME, _SEC_ATTACHABLE}, {"cgroup/getsockname4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_GETSOCKNAME, _SEC_ATTACHABLE}, {"cgroup/getsockname6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_GETSOCKNAME, _SEC_ATTACHABLE}, {"cgroup/getsockname_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_GETSOCKNAME, _SEC_ATTACHABLE}, {"cgroup/sysctl", sys.BPF_PROG_TYPE_CGROUP_SYSCTL, sys.BPF_CGROUP_SYSCTL, _SEC_ATTACHABLE}, {"cgroup/getsockopt", sys.BPF_PROG_TYPE_CGROUP_SOCKOPT, sys.BPF_CGROUP_GETSOCKOPT, _SEC_ATTACHABLE}, {"cgroup/setsockopt", sys.BPF_PROG_TYPE_CGROUP_SOCKOPT, sys.BPF_CGROUP_SETSOCKOPT, _SEC_ATTACHABLE}, {"cgroup/dev", sys.BPF_PROG_TYPE_CGROUP_DEVICE, sys.BPF_CGROUP_DEVICE, _SEC_ATTACHABLE_OPT}, {"struct_ops+", sys.BPF_PROG_TYPE_STRUCT_OPS, 0, _SEC_NONE}, {"struct_ops.s+", sys.BPF_PROG_TYPE_STRUCT_OPS, 0, _SEC_SLEEPABLE}, {"sk_lookup", sys.BPF_PROG_TYPE_SK_LOOKUP, sys.BPF_SK_LOOKUP, _SEC_ATTACHABLE}, {"netfilter", sys.BPF_PROG_TYPE_NETFILTER, sys.BPF_NETFILTER, _SEC_NONE}, } ================================================ FILE: example_sock_elf_test.go ================================================ //go:build linux package ebpf_test import ( "bytes" "encoding/binary" "flag" "fmt" "syscall" "time" "unsafe" "github.com/cilium/ebpf" ) var program = [...]byte{ 0177, 0105, 0114, 0106, 0002, 0001, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0367, 0000, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0340, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0100, 0000, 0000, 0000, 0000, 0000, 0100, 0000, 0010, 0000, 0001, 0000, 0277, 0026, 0000, 0000, 0000, 0000, 0000, 0000, 0060, 0000, 0000, 0000, 0027, 0000, 0000, 0000, 0143, 0012, 0374, 0377, 0000, 0000, 0000, 0000, 0141, 0141, 0004, 0000, 0000, 0000, 0000, 0000, 0125, 0001, 0010, 0000, 0004, 0000, 0000, 0000, 0277, 0242, 0000, 0000, 0000, 0000, 0000, 0000, 0007, 0002, 0000, 0000, 0374, 0377, 0377, 0377, 0030, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0205, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0025, 0000, 0002, 0000, 0000, 0000, 0000, 0000, 0141, 0141, 0000, 0000, 0000, 0000, 0000, 0000, 0333, 0020, 0000, 0000, 0000, 0000, 0000, 0000, 0267, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0225, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0002, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0010, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0002, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0010, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0107, 0120, 0114, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0065, 0000, 0000, 0000, 0000, 0000, 0003, 0000, 0150, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0034, 0000, 0000, 0000, 0020, 0000, 0006, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0110, 0000, 0000, 0000, 0020, 0000, 0003, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0014, 0000, 0000, 0000, 0020, 0000, 0005, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0023, 0000, 0000, 0000, 0020, 0000, 0005, 0000, 0024, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0070, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0000, 0056, 0164, 0145, 0170, 0164, 0000, 0155, 0141, 0160, 0163, 0000, 0155, 0171, 0137, 0155, 0141, 0160, 0000, 0164, 0145, 0163, 0164, 0137, 0155, 0141, 0160, 0000, 0137, 0154, 0151, 0143, 0145, 0156, 0163, 0145, 0000, 0056, 0163, 0164, 0162, 0164, 0141, 0142, 0000, 0056, 0163, 0171, 0155, 0164, 0141, 0142, 0000, 0114, 0102, 0102, 0060, 0137, 0063, 0000, 0056, 0162, 0145, 0154, 0163, 0157, 0143, 0153, 0145, 0164, 0061, 0000, 0142, 0160, 0146, 0137, 0160, 0162, 0157, 0147, 0061, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0045, 0000, 0000, 0000, 0003, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0210, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0122, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0006, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0100, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0100, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0006, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0100, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0170, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0010, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0074, 0000, 0000, 0000, 0011, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0170, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0020, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0007, 0000, 0000, 0000, 0003, 0000, 0000, 0000, 0010, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0020, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0007, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0003, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0270, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0050, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0035, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0003, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0340, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0004, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0055, 0000, 0000, 0000, 0002, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0350, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0220, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0001, 0000, 0000, 0000, 0002, 0000, 0000, 0000, 0010, 0000, 0000, 0000, 0000, 0000, 0000, 0000, 0030, 0000, 0000, 0000, 0000, 0000, 0000, 0000, } // ExampleSocketELF demonstrates how to load an eBPF program from an ELF, // and attach it to a raw socket. func Example_socketELF() { const SO_ATTACH_BPF = 50 index := flag.Int("index", 0, "specify ethernet index") flag.Parse() spec, err := ebpf.LoadCollectionSpecFromReader(bytes.NewReader(program[:])) if err != nil { panic(err) } var objs struct { Prog *ebpf.Program `ebpf:"bpf_prog1"` Stats *ebpf.Map `ebpf:"my_map"` } if err := spec.LoadAndAssign(&objs, nil); err != nil { panic(err) } defer objs.Prog.Close() defer objs.Stats.Close() sock, err := openRawSock(*index) if err != nil { panic(err) } defer syscall.Close(sock) if err := syscall.SetsockoptInt(sock, syscall.SOL_SOCKET, SO_ATTACH_BPF, objs.Prog.FD()); err != nil { panic(err) } fmt.Printf("Filtering on eth index: %d\n", *index) fmt.Println("Packet stats:") for { const ( ICMP = 0x01 TCP = 0x06 UDP = 0x11 ) time.Sleep(time.Second) var icmp uint64 var tcp uint64 var udp uint64 err := objs.Stats.Lookup(uint32(ICMP), &icmp) if err != nil { panic(err) } err = objs.Stats.Lookup(uint32(TCP), &tcp) if err != nil { panic(err) } err = objs.Stats.Lookup(uint32(UDP), &udp) if err != nil { panic(err) } fmt.Printf("\r\033[m\tICMP: %d TCP: %d UDP: %d", icmp, tcp, udp) } } func openRawSock(index int) (int, error) { sock, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, int(htons(syscall.ETH_P_ALL))) if err != nil { return 0, err } sll := syscall.SockaddrLinklayer{ Ifindex: index, Protocol: htons(syscall.ETH_P_ALL), } if err := syscall.Bind(sock, &sll); err != nil { return 0, err } return sock, nil } // htons converts the unsigned short integer hostshort from host byte order to network byte order. func htons(i uint16) uint16 { b := make([]byte, 2) binary.BigEndian.PutUint16(b, i) return *(*uint16)(unsafe.Pointer(&b[0])) } ================================================ FILE: example_sock_extract_dist_test.go ================================================ //go:build linux package ebpf_test // This code is derived from https://github.com/cloudflare/cloudflare-blog/tree/master/2018-03-ebpf // // Copyright (c) 2015-2017 Cloudflare, Inc. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of the Cloudflare, Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import ( "fmt" "net" "syscall" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" ) // ExampleExtractDistance shows how to attach an eBPF socket filter to // extract the network distance of an IP host. func Example_extractDistance() { filter, TTLs, err := newDistanceFilter() if err != nil { panic(err) } defer filter.Close() defer TTLs.Close() // Attach filter before the call to connect() dialer := net.Dialer{ Control: func(network, address string, c syscall.RawConn) (err error) { const SO_ATTACH_BPF = 50 err = c.Control(func(fd uintptr) { err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, SO_ATTACH_BPF, filter.FD()) }) return err }, } conn, err := dialer.Dial("tcp", "1.1.1.1:53") if err != nil { panic(err) } conn.Close() minDist, err := minDistance(TTLs) if err != nil { panic(err) } fmt.Println("1.1.1.1:53 is", minDist, "hops away") } func newDistanceFilter() (*ebpf.Program, *ebpf.Map, error) { const ETH_P_IPV6 uint16 = 0x86DD ttls, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.Hash, KeySize: 4, ValueSize: 8, MaxEntries: 4, }) if err != nil { return nil, nil, err } insns := asm.Instructions{ // r1 has ctx // r0 = ctx[16] (aka protocol) asm.LoadMem(asm.R0, asm.R1, 16, asm.Word), // Perhaps ipv6 asm.LoadImm(asm.R2, int64(ETH_P_IPV6), asm.DWord), asm.HostTo(asm.BE, asm.R2, asm.Half), asm.JEq.Reg(asm.R0, asm.R2, "ipv6"), // otherwise assume ipv4 // 8th byte in IPv4 is TTL // LDABS requires ctx in R6 asm.Mov.Reg(asm.R6, asm.R1), asm.LoadAbs(-0x100000+8, asm.Byte), asm.Ja.Label("store-ttl"), // 7th byte in IPv6 is Hop count // LDABS requires ctx in R6 asm.Mov.Reg(asm.R6, asm.R1).WithSymbol("ipv6"), asm.LoadAbs(-0x100000+7, asm.Byte), // stash the load result into FP[-4] asm.StoreMem(asm.RFP, -4, asm.R0, asm.Word).WithSymbol("store-ttl"), // stash the &FP[-4] into r2 asm.Mov.Reg(asm.R2, asm.RFP), asm.Add.Imm(asm.R2, -4), // r1 must point to map asm.LoadMapPtr(asm.R1, ttls.FD()), asm.FnMapLookupElem.Call(), // load ok? inc. Otherwise? jmp to mapupdate asm.JEq.Imm(asm.R0, 0, "update-map"), asm.Mov.Imm(asm.R1, 1), asm.StoreXAdd(asm.R0, asm.R1, asm.DWord), asm.Ja.Label("exit"), // MapUpdate // r1 has map ptr asm.LoadMapPtr(asm.R1, ttls.FD()).WithSymbol("update-map"), // r2 has key -> &FP[-4] asm.Mov.Reg(asm.R2, asm.RFP), asm.Add.Imm(asm.R2, -4), // r3 has value -> &FP[-16] , aka 1 asm.StoreImm(asm.RFP, -16, 1, asm.DWord), asm.Mov.Reg(asm.R3, asm.RFP), asm.Add.Imm(asm.R3, -16), // r4 has flags, 0 asm.Mov.Imm(asm.R4, 0), asm.FnMapUpdateElem.Call(), // set exit code to -1, don't trunc packet asm.Mov.Imm(asm.R0, -1).WithSymbol("exit"), asm.Return(), } prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "distance_filter", Type: ebpf.SocketFilter, License: "GPL", Instructions: insns, }) if err != nil { ttls.Close() return nil, nil, err } return prog, ttls, nil } func minDistance(TTLs *ebpf.Map) (int, error) { var ( entries = TTLs.Iterate() ttl uint32 minDist uint32 = 255 count uint64 ) for entries.Next(&ttl, &count) { var dist uint32 switch { case ttl > 128: dist = 255 - ttl case ttl > 64: dist = 128 - ttl case ttl > 32: dist = 64 - ttl default: dist = 32 - ttl } if minDist > dist { minDist = dist } } return int(minDist), entries.Err() } ================================================ FILE: examples/README.md ================================================ # Examples A collection of programs showing how to use the library. Please see our [guide on what makes a good example](https://ebpf-go.dev/contributing/new-example/) if you think something is missing. * Kprobe - Attach a program to the entry or exit of an arbitrary kernel symbol (function). * [kprobe](kprobe/) - Kprobe using bpf2go. * [kprobepin](kprobepin/) - Reuse a pinned map for the kprobe example. It assumes the BPF FS is mounted at `/sys/fs/bpf`. * [kprobe_percpu](kprobe_percpu/) - Use a `BPF_MAP_TYPE_PERCPU_ARRAY` map. * [ringbuffer](ringbuffer/) - Use a `BPF_MAP_TYPE_RINGBUF` map. * Uprobe - Attach a program to the entry or exit of an arbitrary userspace binary symbol (function). * [uretprobe](uretprobe/) - Uretprobe using bpf2go. * Tracepoint - Attach a program to predetermined kernel tracepoints. * [tracepoint_in_c](tracepoint_in_c/) - Tracepoint using bpf2go. * [tracepoint_in_go](tracepoint_in_go/) - Tracepoint using the `ebpf.NewProgram` API and Go eBPF assembler. * Cgroup - Attach a program to control groups (cgroups). * [cgroup_skb](cgroup_skb/) - Count packets egressing the current cgroup. * Fentry - Attach a program to the entrypoint of a kernel function. Like kprobes, but with better performance and usability, for kernels 5.5 and later. * [tcp_connect](fentry/) - Trace outgoing IPv4 TCP connections. * [tcp_close](tcprtt/) - Log RTT of IPv4 TCP connections using eBPF CO-RE helpers. * TCx - Attach a program to Linux TC (Traffic Control) to process incoming and outgoing packets. * [tcx](./tcx/) - Print packet counts for ingress and egress. * XDP - Attach a program to a network interface to process incoming packets. * [xdp](xdp/) - Print packet counts by IPv4 source address. * [xdp_live_frame](xdp_live_frame/) - XDP-based traffic generator that uses live frame mode. * sched_ext - Attach a StructOpsMap to register a custom task scheduler. This feature is supported by kernel version starting from version 6.12. * [sched_ext](sched_ext/) - Minimal sched_ext_ops. ## How to run ```bash cd ebpf/examples/ go run -exec sudo [./kprobe, ./uretprobe, ./ringbuffer, ...] ``` ## How to recompile The examples are built via `go generate` invoked by the Makefile in the project root. ``` make -C ../ ``` ================================================ FILE: examples/cgroup_skb/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { CountEgressPackets *ebpf.ProgramSpec `ebpf:"count_egress_packets"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { PktCount *ebpf.MapSpec `ebpf:"pkt_count"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { PktCount *ebpf.Map `ebpf:"pkt_count"` } func (m *bpfMaps) Close() error { return _BpfClose( m.PktCount, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { CountEgressPackets *ebpf.Program `ebpf:"count_egress_packets"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.CountEgressPackets, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/cgroup_skb/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { CountEgressPackets *ebpf.ProgramSpec `ebpf:"count_egress_packets"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { PktCount *ebpf.MapSpec `ebpf:"pkt_count"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { PktCount *ebpf.Map `ebpf:"pkt_count"` } func (m *bpfMaps) Close() error { return _BpfClose( m.PktCount, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { CountEgressPackets *ebpf.Program `ebpf:"count_egress_packets"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.CountEgressPackets, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/cgroup_skb/cgroup_skb.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); __type(value, u64); __uint(max_entries, 1); } pkt_count SEC(".maps"); SEC("cgroup_skb/egress") int count_egress_packets(struct __sk_buff *skb) { u32 key = 0; u64 init_val = 1; u64 *count = bpf_map_lookup_elem(&pkt_count, &key); if (!count) { bpf_map_update_elem(&pkt_count, &key, &init_val, BPF_ANY); return 1; } __sync_fetch_and_add(count, 1); return 1; } ================================================ FILE: examples/cgroup_skb/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a control group. // The eBPF program will be attached as an egress filter, // receiving an `__sk_buff` pointer for each outgoing packet. // It prints the count of total packets every second. package main import ( "bufio" "errors" "log" "os" "strings" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf cgroup_skb.c -- -I../headers func main() { // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Get the first-mounted cgroupv2 path. cgroupPath, err := detectCgroupPath() if err != nil { log.Fatal(err) } // Link the count_egress_packets program to the cgroup. l, err := link.AttachCgroup(link.CgroupOptions{ Path: cgroupPath, Attach: ebpf.AttachCGroupInetEgress, Program: objs.CountEgressPackets, }) if err != nil { log.Fatal(err) } defer l.Close() log.Println("Counting packets...") // Read loop reporting the total amount of times the kernel // function was entered, once per second. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() for range ticker.C { var value uint64 if err := objs.PktCount.Lookup(uint32(0), &value); err != nil { log.Fatalf("reading map: %v", err) } log.Printf("number of packets: %d\n", value) } } // detectCgroupPath returns the first-found mount point of type cgroup2 // and stores it in the cgroupPath global variable. func detectCgroupPath() (string, error) { f, err := os.Open("/proc/mounts") if err != nil { return "", err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { // example fields: cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime 0 0 fields := strings.Split(scanner.Text(), " ") if len(fields) >= 3 && fields[2] == "cgroup2" { return fields[1], nil } } return "", errors.New("cgroup2 not mounted") } ================================================ FILE: examples/fentry/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Comm [16]uint8 Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { TcpConnect *ebpf.ProgramSpec `ebpf:"tcp_connect"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { TcpConnect *ebpf.Program `ebpf:"tcp_connect"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.TcpConnect, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/fentry/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Comm [16]uint8 Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { TcpConnect *ebpf.ProgramSpec `ebpf:"tcp_connect"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { TcpConnect *ebpf.Program `ebpf:"tcp_connect"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.TcpConnect, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/fentry/fentry.c ================================================ //go:build ignore #include "common.h" #include "bpf_endian.h" #include "bpf_tracing.h" #define AF_INET 2 #define TASK_COMM_LEN 16 char __license[] SEC("license") = "Dual MIT/GPL"; /** * This example copies parts of struct sock_common and struct sock from * the Linux kernel, but doesn't cause any CO-RE information to be emitted * into the ELF object. This requires the struct layout (up until the fields * that are being accessed) to match the kernel's, and the example will break * or misbehave when this is no longer the case. * * Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf, * lsm, etc. declared using the BPF_PROG macro can read kernel memory without * needing to call bpf_probe_read*(). */ /** * struct sock_common reflects the start of the kernel's struct sock_common. * It only contains the fields up until skc_family that are accessed in the * program, with padding to match the kernel's declaration. */ struct sock_common { union { struct { __be32 skc_daddr; __be32 skc_rcv_saddr; }; }; union { // Padding out union skc_hash. __u32 _; }; union { struct { __be16 skc_dport; __u16 skc_num; }; }; short unsigned int skc_family; }; /** * struct sock reflects the start of the kernel's struct sock. */ struct sock { struct sock_common __sk_common; }; struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 24); __type(value, struct event); } events SEC(".maps"); /** * The sample submitted to userspace over a ring buffer. * Emit struct event's type info into the ELF's BTF so bpf2go * can generate a Go type from it. */ struct event { u8 comm[16]; __u16 sport; __be16 dport; __be32 saddr; __be32 daddr; }; SEC("fentry/tcp_connect") int BPF_PROG(tcp_connect, struct sock *sk) { if (sk->__sk_common.skc_family != AF_INET) { return 0; } struct event *tcp_info; tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); if (!tcp_info) { return 0; } tcp_info->saddr = sk->__sk_common.skc_rcv_saddr; tcp_info->daddr = sk->__sk_common.skc_daddr; tcp_info->dport = sk->__sk_common.skc_dport; tcp_info->sport = bpf_htons(sk->__sk_common.skc_num); bpf_get_current_comm(&tcp_info->comm, TASK_COMM_LEN); bpf_ringbuf_submit(tcp_info, 0); return 0; } ================================================ FILE: examples/fentry/main.go ================================================ //go:build linux // This program demonstrates attaching a fentry eBPF program to // tcp_connect. It prints the command/IPs/ports information // once the host sent a TCP SYN packet to a destination. // It supports IPv4 at this example. // // Sample output: // // examples# go run -exec sudo ./fentry // 2021/11/06 17:51:15 Comm Src addr Port -> Dest addr Port // 2021/11/06 17:51:25 wget 10.0.2.15 49850 -> 142.250.72.228 443 // 2021/11/06 17:51:46 ssh 10.0.2.15 58854 -> 10.0.2.1 22 // 2021/11/06 18:13:15 curl 10.0.2.15 54268 -> 104.21.1.217 80 package main import ( "bytes" "encoding/binary" "errors" "log" "net" "os" "os/signal" "syscall" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/ringbuf" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf fentry.c -- -I../headers func main() { stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() l, err := link.AttachTracing(link.TracingOptions{ Program: objs.TcpConnect, }) if err != nil { log.Fatal(err) } defer l.Close() rd, err := ringbuf.NewReader(objs.Events) if err != nil { log.Fatalf("opening ringbuf reader: %s", err) } defer rd.Close() go func() { <-stopper if err := rd.Close(); err != nil { log.Fatalf("closing ringbuf reader: %s", err) } }() log.Printf("%-16s %-15s %-6s -> %-15s %-6s", "Comm", "Src addr", "Port", "Dest addr", "Port", ) // bpfEvent is generated by bpf2go. var event bpfEvent for { record, err := rd.Read() if err != nil { if errors.Is(err, ringbuf.ErrClosed) { log.Println("received signal, exiting..") return } log.Printf("reading from reader: %s", err) continue } // Parse the ringbuf event entry into a bpfEvent structure. if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.BigEndian, &event); err != nil { log.Printf("parsing ringbuf event: %s", err) continue } log.Printf("%-16s %-15s %-6d -> %-15s %-6d", event.Comm, intToIP(event.Saddr), event.Sport, intToIP(event.Daddr), event.Dport, ) } } // intToIP converts IPv4 number to net.IP func intToIP(ipNum uint32) net.IP { ip := make(net.IP, 4) binary.BigEndian.PutUint32(ip, ipNum) return ip } ================================================ FILE: examples/headers/LICENSE.BSD-2-Clause ================================================ Valid-License-Identifier: BSD-2-Clause SPDX-URL: https://spdx.org/licenses/BSD-2-Clause.html Usage-Guide: To use the BSD 2-clause "Simplified" License put the following SPDX tag/value pair into a comment according to the placement guidelines in the licensing rules documentation: SPDX-License-Identifier: BSD-2-Clause License-Text: Copyright (c) . All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: examples/headers/bpf_endian.h ================================================ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_ENDIAN__ #define __BPF_ENDIAN__ /* * Isolate byte #n and put it into byte #m, for __u##b type. * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 */ #define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) #define ___bpf_swab16(x) ((__u16)( \ ___bpf_mvb(x, 16, 0, 1) | \ ___bpf_mvb(x, 16, 1, 0))) #define ___bpf_swab32(x) ((__u32)( \ ___bpf_mvb(x, 32, 0, 3) | \ ___bpf_mvb(x, 32, 1, 2) | \ ___bpf_mvb(x, 32, 2, 1) | \ ___bpf_mvb(x, 32, 3, 0))) #define ___bpf_swab64(x) ((__u64)( \ ___bpf_mvb(x, 64, 0, 7) | \ ___bpf_mvb(x, 64, 1, 6) | \ ___bpf_mvb(x, 64, 2, 5) | \ ___bpf_mvb(x, 64, 3, 4) | \ ___bpf_mvb(x, 64, 4, 3) | \ ___bpf_mvb(x, 64, 5, 2) | \ ___bpf_mvb(x, 64, 6, 1) | \ ___bpf_mvb(x, 64, 7, 0))) /* LLVM's BPF target selects the endianness of the CPU * it compiles on, or the user specifies (bpfel/bpfeb), * respectively. The used __BYTE_ORDER__ is defined by * the compiler, we cannot rely on __BYTE_ORDER from * libc headers, since it doesn't reflect the actual * requested byte order. * * Note, LLVM's BPF target has different __builtin_bswapX() * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE * in bpfel and bpfeb case, which means below, that we map * to cpu_to_be16(). We could use it unconditionally in BPF * case, but better not rely on it, so that this header here * can be used from application and BPF program side, which * use different targets. */ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define __bpf_ntohs(x) __builtin_bswap16(x) # define __bpf_htons(x) __builtin_bswap16(x) # define __bpf_constant_ntohs(x) ___bpf_swab16(x) # define __bpf_constant_htons(x) ___bpf_swab16(x) # define __bpf_ntohl(x) __builtin_bswap32(x) # define __bpf_htonl(x) __builtin_bswap32(x) # define __bpf_constant_ntohl(x) ___bpf_swab32(x) # define __bpf_constant_htonl(x) ___bpf_swab32(x) # define __bpf_be64_to_cpu(x) __builtin_bswap64(x) # define __bpf_cpu_to_be64(x) __builtin_bswap64(x) # define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) # define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ # define __bpf_ntohs(x) (x) # define __bpf_htons(x) (x) # define __bpf_constant_ntohs(x) (x) # define __bpf_constant_htons(x) (x) # define __bpf_ntohl(x) (x) # define __bpf_htonl(x) (x) # define __bpf_constant_ntohl(x) (x) # define __bpf_constant_htonl(x) (x) # define __bpf_be64_to_cpu(x) (x) # define __bpf_cpu_to_be64(x) (x) # define __bpf_constant_be64_to_cpu(x) (x) # define __bpf_constant_cpu_to_be64(x) (x) #else # error "Fix your compiler's __BYTE_ORDER__?!" #endif #define bpf_htons(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_htons(x) : __bpf_htons(x)) #define bpf_ntohs(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_ntohs(x) : __bpf_ntohs(x)) #define bpf_htonl(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_htonl(x) : __bpf_htonl(x)) #define bpf_ntohl(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_ntohl(x) : __bpf_ntohl(x)) #define bpf_cpu_to_be64(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) #define bpf_be64_to_cpu(x) \ (__builtin_constant_p(x) ? \ __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) #endif /* __BPF_ENDIAN__ */ ================================================ FILE: examples/headers/bpf_helper_defs.h ================================================ /* This is auto-generated file. See bpf_doc.py for details. */ /* Forward declarations of BPF structs */ struct bpf_fib_lookup; struct bpf_sk_lookup; struct bpf_perf_event_data; struct bpf_perf_event_value; struct bpf_pidns_info; struct bpf_redir_neigh; struct bpf_sock; struct bpf_sock_addr; struct bpf_sock_ops; struct bpf_sock_tuple; struct bpf_spin_lock; struct bpf_sysctl; struct bpf_tcp_sock; struct bpf_tunnel_key; struct bpf_xfrm_state; struct linux_binprm; struct pt_regs; struct sk_reuseport_md; struct sockaddr; struct tcphdr; struct seq_file; struct tcp6_sock; struct tcp_sock; struct tcp_timewait_sock; struct tcp_request_sock; struct udp6_sock; struct unix_sock; struct task_struct; struct __sk_buff; struct sk_msg_md; struct xdp_md; struct path; struct btf_ptr; struct inode; struct socket; struct file; struct bpf_timer; /* * bpf_map_lookup_elem * * Perform a lookup in *map* for an entry associated to *key*. * * Returns * Map value associated to *key*, or **NULL** if no entry was * found. */ static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1; /* * bpf_map_update_elem * * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: * * **BPF_NOEXIST** * The entry for *key* must not exist in the map. * **BPF_EXIST** * The entry for *key* must already exist in the map. * **BPF_ANY** * No condition on the existence of the entry for *key*. * * Flag value **BPF_NOEXIST** cannot be used for maps of types * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all * elements always exist), the helper would return an error. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2; /* * bpf_map_delete_elem * * Delete entry with *key* from *map*. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3; /* * bpf_probe_read * * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. * * Generally, use **bpf_probe_read_user**\ () or * **bpf_probe_read_kernel**\ () instead. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4; /* * bpf_ktime_get_ns * * Return the time elapsed since system boot, in nanoseconds. * Does not include time the system was suspended. * See: **clock_gettime**\ (**CLOCK_MONOTONIC**) * * Returns * Current *ktime*. */ static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5; /* * bpf_trace_printk * * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in * *\/sys/kernel/debug/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * * :: * * telnet-470 [001] .N.. 419421.045894: 0x00000001: * * In the above: * * * ``telnet`` is the name of the current task. * * ``470`` is the PID of the current task. * * ``001`` is the CPU number on which the task is * running. * * In ``.N..``, each character refers to a set of * options (whether irqs are enabled, scheduling * options, whether hard/softirqs are running, level of * preempt_disabled respectively). **N** means that * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** * are set. * * ``419421.045894`` is a timestamp. * * ``0x00000001`` is a fake value used by BPF for the * instruction pointer register. * * ```` is the message formatted with * *fmt*. * * The conversion specifiers supported by *fmt* are similar, but * more limited than for printk(). They are **%d**, **%i**, * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size * of field, padding with zeroes, etc.) is available, and the * helper will return **-EINVAL** (but print nothing) if it * encounters an unknown specifier. * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values * to user space, perf events should be preferred. * * Returns * The number of bytes written to the buffer, or a negative error * in case of failure. */ static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6; /* * bpf_get_prandom_u32 * * Get a pseudo-random number. * * From a security point of view, this helper uses its own * pseudo-random internal state, and cannot be used to infer the * seed of other random functions in the kernel. However, it is * essential to note that the generator used by the helper is not * cryptographically secure. * * Returns * A random 32-bit unsigned value. */ static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; /* * bpf_get_smp_processor_id * * Get the SMP (symmetric multiprocessing) processor id. Note that * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * * Returns * The SMP id of the processor running the program. */ static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8; /* * bpf_skb_store_bytes * * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the * checksum for the packet after storing the bytes) and * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ * **->swhash** and *skb*\ **->l4hash** to 0). * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9; /* * bpf_l3_csum_replace * * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper * must know the former value of the header field that was * modified (*from*), the new value of this field (*to*), and the * number of bytes (2 or 4) for this field, stored in *size*. * Alternatively, it is possible to store the difference between * the previous and the new values of the header field in *to*, by * setting *from* and *size* to 0. For both methods, *offset* * indicates the location of the IP checksum within the packet. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10; /* * bpf_l4_csum_replace * * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the * helper must know the former value of the header field that was * modified (*from*), the new value of this field (*to*), and the * number of bytes (2 or 4) for this field, stored on the lowest * four bits of *flags*. Alternatively, it is possible to store * the difference between the previous and the new values of the * header field in *to*, by setting *from* and the four lowest * bits of *flags* to 0. For both methods, *offset* indicates the * location of the IP checksum within the packet. In addition to * the size of the field, *flags* can be added (bitwise OR) actual * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * the checksum is to be computed against a pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more * flexibility and can handle sizes larger than 2 or 4 for the * checksum to update. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11; /* * bpf_tail_call * * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack * frame is used (but values on stack and in registers for the * caller are not accessible to the callee). This mechanism allows * for program chaining, either for raising the maximum number of * available eBPF instructions, or to execute given programs in * conditional blocks. For security reasons, there is an upper * limit to the number of successive tail calls that can be * performed. * * Upon call of this helper, the program attempts to jump into a * program referenced at index *index* in *prog_array_map*, a * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes * *ctx*, a pointer to the context. * * If the call succeeds, the kernel immediately runs the first * instruction of the new program. This is not a function call, * and it never returns to the previous program. If the call * fails, then the helper has no effect, and the caller continues * to run its subsequent instructions. A call can fail if the * destination program for the jump does not exist (i.e. *index* * is superior to the number of entries in *prog_array_map*), or * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), * which is currently set to 33. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12; /* * bpf_clone_redirect * * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress * interfaces can be used for redirection. The **BPF_F_INGRESS** * value in *flags* is used to make the distinction (ingress path * is selected if the flag is present, egress path otherwise). * This is the only flag supported for now. * * In comparison with **bpf_redirect**\ () helper, * **bpf_clone_redirect**\ () has the associated cost of * duplicating the packet buffer, but this can be executed out of * the eBPF program. Conversely, **bpf_redirect**\ () is more * efficient, but it is handled through an action code where the * redirection happens only after the eBPF program has returned. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13; /* * bpf_get_current_pid_tgid * * * Returns * A 64-bit integer containing the current tgid and pid, and * created as such: * *current_task*\ **->tgid << 32 \|** * *current_task*\ **->pid**. */ static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14; /* * bpf_get_current_uid_gid * * * Returns * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. */ static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15; /* * bpf_get_current_comm * * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of * the executable (excluding the path) for the current task. The * *size_of_buf* must be strictly positive. On success, the * helper makes sure that the *buf* is NUL-terminated. On failure, * it is filled with zeroes. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16; /* * bpf_get_cgroup_classid * * Retrieve the classid for the current task, i.e. for the net_cls * cgroup to which *skb* belongs. * * This helper can be used on TC egress path, but not on ingress. * * The net_cls cgroup provides an interface to tag network packets * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can * use a mixture of them, but note that the net_cls cgroup is for * cgroup v1 only. This makes it incompatible with BPF programs * run on cgroups, which is a cgroup-v2-only feature (a socket can * only hold data for one version of cgroups at a time). * * This helper is only available is the kernel was compiled with * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to * "**y**" or to "**m**". * * Returns * The classid, or 0 for the default unconfigured classid. */ static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17; /* * bpf_skb_vlan_push * * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update * the checksum. Note that if *vlan_proto* is different from * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to * be **ETH_P_8021Q**. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18; /* * bpf_skb_vlan_pop * * Pop a VLAN header from the packet associated to *skb*. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19; /* * bpf_skb_get_tunnel_key * * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be * filled with tunnel metadata for the packet associated to *skb*. * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which * indicates that the tunnel is based on IPv6 protocol instead of * IPv4. * * The **struct bpf_tunnel_key** is an object that generalizes the * principal parameters used by various tunneling protocols into a * single struct. This way, it can be used to easily make a * decision based on the contents of the encapsulation header, * "summarized" in this struct. In particular, it holds the IP * address of the remote end (IPv4 or IPv6, depending on the case) * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, * this struct exposes the *key*\ **->tunnel_id**, which is * generally mapped to a VNI (Virtual Network Identifier), making * it programmable together with the **bpf_skb_set_tunnel_key**\ * () helper. * * Let's imagine that the following code is part of a program * attached to the TC ingress interface, on one end of a GRE * tunnel, and is supposed to filter out all messages coming from * remote ends with IPv4 address other than 10.0.0.1: * * :: * * int ret; * struct bpf_tunnel_key key = {}; * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices * that can operate in "collect metadata" mode: instead of having * one network device per specific configuration, the "collect * metadata" mode only requires a single device where the * configuration can be extracted from this helper. * * This can be used together with various tunnels such as VXLan, * Geneve, GRE or IP in IP (IPIP). * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20; /* * bpf_skb_set_tunnel_key * * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The * *flags* can be set to a combination of the following values: * * **BPF_F_TUNINFO_IPV6** * Indicate that the tunnel is based on IPv6 protocol * instead of IPv4. * **BPF_F_ZERO_CSUM_TX** * For IPv4 packets, add a flag to tunnel metadata * indicating that checksum computation should be skipped * and checksum set to zeroes. * **BPF_F_DONT_FRAGMENT** * Add a flag to tunnel metadata indicating that the * packet should not be fragmented. * **BPF_F_SEQ_NUMBER** * Add a flag to tunnel metadata indicating that a * sequence number should be added to tunnel header before * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. * * Here is a typical usage on the transmit path: * * :: * * struct bpf_tunnel_key key; * populate key ... * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); * * See also the description of the **bpf_skb_get_tunnel_key**\ () * helper for additional information. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21; /* * bpf_perf_event_read * * Read the value of a perf event counter. This helper relies on a * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of * the perf event counter is selected when *map* is updated with * perf event file descriptors. The *map* is an array whose size * is the number of available CPUs, and each cell contains a value * relative to one CPU. The value to retrieve is indicated by * *flags*, that contains the index of the CPU to look up, masked * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to * **BPF_F_CURRENT_CPU** to indicate that the value for the * current CPU should be retrieved. * * Note that before Linux 4.13, only hardware perf event can be * retrieved. * * Also, be aware that the newer helper * **bpf_perf_event_read_value**\ () is recommended over * **bpf_perf_event_read**\ () in general. The latter has some ABI * quirks where error and counter value are used as a return code * (which is wrong to do since ranges may overlap). This issue is * fixed with **bpf_perf_event_read_value**\ (), which at the same * time provides more features over the **bpf_perf_event_read**\ * () interface. Please refer to the description of * **bpf_perf_event_read_value**\ () for details. * * Returns * The value of the perf event counter read from the map, or a * negative error code in case of failure. */ static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22; /* * bpf_redirect * * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ * (), except that the packet is not cloned, which provides * increased performance. * * Except for XDP, both ingress and egress interfaces can be used * for redirection. The **BPF_F_INGRESS** value in *flags* is used * to make the distinction (ingress path is selected if the flag * is present, egress path otherwise). Currently, XDP only * supports redirection to the egress interface, and accepts no * flag at all. * * The same effect can also be attained with the more generic * **bpf_redirect_map**\ (), which uses a BPF map to store the * redirect target instead of providing it directly to the helper. * * Returns * For XDP, the helper returns **XDP_REDIRECT** on success or * **XDP_ABORTED** on error. For other program types, the values * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on * error. */ static long (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23; /* * bpf_get_route_realm * * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. * * Retrieving this identifier works with the clsact TC egress hook * (see also **tc-bpf(8)**), or alternatively on conventional * classful egress qdiscs, but not on TC ingress path. In case of * clsact TC egress hook, this has the advantage that, internally, * the destination entry has not been dropped yet in the transmit * path. Therefore, the destination entry does not need to be * artificially held via **netif_keep_dst**\ () for a classful * qdisc until the *skb* is freed. * * This helper is available only if the kernel was compiled with * **CONFIG_IP_ROUTE_CLASSID** configuration option. * * Returns * The realm of the route for the packet associated to *skb*, or 0 * if none was found. */ static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24; /* * bpf_perf_event_output * * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf * event must have the following attributes: **PERF_SAMPLE_RAW** * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. * * The *flags* are used to indicate the index in *map* for which * the value must be put, masked with **BPF_F_INDEX_MASK**. * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** * to indicate that the index of the current CPU core should be * used. * * The value to write, of *size*, is passed through eBPF stack and * pointed by *data*. * * The context of the program *ctx* needs also be passed to the * helper. * * On user space, a program willing to read the values needs to * call **perf_event_open**\ () on the perf event (either for * one or for all CPUs) and to store the file descriptor into the * *map*. This must be done before the eBPF program can send data * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in * *samples/bpf/trace_output_kern.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user * space, and is much better suitable for streaming data from eBPF * programs. * * Note that this helper is not restricted to tracing use cases * and can be used with programs attached to TC or XDP as well, * where it allows for passing data to user space listeners. Data * can be: * * * Only custom structs, * * Only the packet payload, or * * A combination of both. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25; /* * bpf_skb_load_bytes * * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from * the packet associated to *skb*, into the buffer pointed by * *to*. * * Since Linux 4.7, usage of this helper has mostly been replaced * by "direct packet access", enabling packet data to be * manipulated with *skb*\ **->data** and *skb*\ **->data_end** * pointing respectively to the first byte of packet data and to * the byte after the last byte of packet data. However, it * remains useful if one wishes to read large quantities of data * at once from a packet into the eBPF stack. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26; /* * bpf_get_stackid * * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context * on which the tracing program is executed, and a pointer to a * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set * a combination of the following flags: * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_FAST_STACK_CMP** * Compare stacks by hash only. * **BPF_F_REUSE_STACKID** * If two different stacks hash into the same *stackid*, * discard the old one. * * The stack id retrieved is a 32 bit long integer handle which * can be further combined with other data (including other stack * ids) and used as a key into maps. This can be useful for * generating a variety of graphs (such as flame graphs or off-cpu * graphs). * * For walking a stack, this helper is an improvement over * **bpf_probe_read**\ (), which can be used with unrolled loops * but is not efficient and consumes a lot of eBPF instructions. * Instead, **bpf_get_stackid**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that * this limit can be controlled with the **sysctl** program, and * that it should be manually increased in order to profile long * user stacks (such as stacks for Java programs). To do so, use: * * :: * * # sysctl kernel.perf_event_max_stack= * * Returns * The positive or null stack id on success, or a negative error * in case of failure. */ static long (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27; /* * bpf_csum_diff * * Compute a checksum difference, from the raw buffer pointed by * *from*, of length *from_size* (that must be a multiple of 4), * towards the raw buffer pointed by *to*, of size *to_size* * (same remark). An optional *seed* can be added to the value * (this can be cascaded, the seed may come from a previous call * to the helper). * * This is flexible enough to be used in several ways: * * * With *from_size* == 0, *to_size* > 0 and *seed* set to * checksum, it can be used when pushing new data. * * With *from_size* > 0, *to_size* == 0 and *seed* set to * checksum, it can be used when removing data from a packet. * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it * can be used to compute a diff. Note that *from_size* and * *to_size* do not need to be equal. * * This helper can be used in combination with * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to * which one can feed in the difference computed with * **bpf_csum_diff**\ (). * * Returns * The checksum result, or a negative error code in case of * failure. */ static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28; /* * bpf_skb_get_tunnel_opt * * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* * of *size*. * * This helper can be used with encapsulation devices that can * operate in "collect metadata" mode (please refer to the related * note in the description of **bpf_skb_get_tunnel_key**\ () for * more details). A particular example where this can be used is * in combination with the Geneve encapsulation protocol, where it * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) * and retrieving arbitrary TLVs (Type-Length-Value headers) from * the eBPF program. This allows for full customization of these * headers. * * Returns * The size of the option data retrieved. */ static long (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29; /* * bpf_skb_set_tunnel_opt * * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. * * See also the description of the **bpf_skb_get_tunnel_opt**\ () * helper for additional information. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30; /* * bpf_skb_change_proto * * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to * IPv4. The helper takes care of the groundwork for the * transition, including resizing the socket buffer. The eBPF * program is expected to fill the new headers, if any, via * **skb_store_bytes**\ () and to recompute the checksums with * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ * (). The main case for this helper is to perform NAT64 * operations out of an eBPF program. * * Internally, the GSO type is marked as dodgy so that headers are * checked and segments are recalculated by the GSO/GRO engine. * The size for GSO target is adapted as well. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31; /* * bpf_skb_change_type * * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except * the eBPF program does not have a write access to *skb*\ * **->pkt_type** beside this helper. Using a helper here allows * for graceful handling of errors. * * The major use case is to change incoming *skb*s to * **PACKET_HOST** in a programmatic way instead of having to * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for * example. * * Note that *type* only allows certain values. At this time, they * are: * * **PACKET_HOST** * Packet is for us. * **PACKET_BROADCAST** * Send packet to all. * **PACKET_MULTICAST** * Send packet to group. * **PACKET_OTHERHOST** * Send packet to someone else. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32; /* * bpf_skb_under_cgroup * * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. * * Returns * The return value depends on the result of the test, and can be: * * * 0, if the *skb* failed the cgroup2 descendant test. * * 1, if the *skb* succeeded the cgroup2 descendant test. * * A negative error code, if an error occurred. */ static long (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33; /* * bpf_get_hash_recalc * * Retrieve the hash of the packet, *skb*\ **->hash**. If it is * not set, in particular if the hash was cleared due to mangling, * recompute this hash. Later accesses to the hash can be done * directly with *skb*\ **->hash**. * * Calling **bpf_set_hash_invalid**\ (), changing a packet * prototype with **bpf_skb_change_proto**\ (), or calling * **bpf_skb_store_bytes**\ () with the * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear * the hash and to trigger a new computation for the next call to * **bpf_get_hash_recalc**\ (). * * Returns * The 32-bit hash. */ static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34; /* * bpf_get_current_task * * * Returns * A pointer to the current task struct. */ static __u64 (*bpf_get_current_task)(void) = (void *) 35; /* * bpf_probe_write_user * * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in * user context, and *dst* must be a valid user space address. * * This helper should not be used to implement any kind of * security mechanism because of TOC-TOU attacks, but rather to * debug, divert, and manipulate execution of semi-cooperative * processes. * * Keep in mind that this feature is meant for experiments, and it * has a risk of crashing the system and running programs. * Therefore, when an eBPF program using this helper is attached, * a warning including PID and process name is printed to kernel * logs. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36; /* * bpf_current_task_under_cgroup * * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. * * Returns * The return value depends on the result of the test, and can be: * * * 0, if current task belongs to the cgroup2. * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. */ static long (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37; /* * bpf_skb_change_tail * * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must * be left at zero. * * The basic idea is that the helper performs the needed work to * change the size of the packet, then the eBPF program rewrites * the rest via helpers like **bpf_skb_store_bytes**\ (), * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () * and others. This helper is a slow path utility intended for * replies with control messages. And because it is targeted for * slow path, the helper itself can afford to be slow: it * implicitly linearizes, unclones and drops offloads from the * *skb*. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38; /* * bpf_skb_pull_data * * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for * *len*, then the whole length of the *skb* is pulled. * * This helper is only needed for reading and writing with direct * packet access. * * For direct packet access, testing that offsets to access * are within packet boundaries (test on *skb*\ **->data_end**) is * susceptible to fail if offsets are invalid, or if the requested * data is in non-linear parts of the *skb*. On failure the * program can just bail out, or in the case of a non-linear * buffer, use a helper to make the data available. The * **bpf_skb_load_bytes**\ () helper is a first solution to access * the data. Another one consists in using **bpf_skb_pull_data** * to pull in once the non-linear parts, then retesting and * eventually access the data. * * At the same time, this also makes sure the *skb* is uncloned, * which is a necessary condition for direct write. As this needs * to be an invariant for the write part only, the verifier * detects writes and adds a prologue that is calling * **bpf_skb_pull_data()** to effectively unclone the *skb* from * the very beginning in case it is indeed cloned. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39; /* * bpf_csum_update * * Add the checksum *csum* into *skb*\ **->csum** in case the * driver has supplied a checksum for the entire packet into that * field. Return an error otherwise. This helper is intended to be * used in combination with **bpf_csum_diff**\ (), in particular * when the checksum needs to be updated after data has been * written into the packet through direct packet access. * * Returns * The checksum on success, or a negative error code in case of * failure. */ static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40; /* * bpf_set_hash_invalid * * Invalidate the current *skb*\ **->hash**. It can be used after * mangling on headers through direct packet access, in order to * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * */ static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41; /* * bpf_get_numa_node_id * * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA * node, when the program is attached to sockets using the * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), * but the helper is also available to other eBPF program types, * similarly to **bpf_get_smp_processor_id**\ (). * * Returns * The id of current NUMA node. */ static long (*bpf_get_numa_node_id)(void) = (void *) 42; /* * bpf_skb_change_head * * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of * space. It automatically extends and reallocates memory as * required. * * This helper can be used on a layer 3 *skb* to push a MAC header * for redirection into a layer 2 device. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43; /* * bpf_xdp_adjust_head * * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper * can be used to prepare the packet for pushing or popping * headers. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44; /* * bpf_probe_read_str * * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for * more details. * * Generally, use **bpf_probe_read_user_str**\ () or * **bpf_probe_read_kernel_str**\ () instead. * * Returns * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative * value. */ static long (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45; /* * bpf_get_socket_cookie * * If the **struct sk_buff** pointed by *skb* has a known socket, * retrieve the cookie (generated by the kernel) of this socket. * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * * Returns * A 8-byte long unique number on success, or 0 if the socket * field is missing inside *skb*. */ static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46; /* * bpf_get_socket_uid * * * Returns * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a * time-wait or a request socket instead), **overflowuid** value * is returned (note that **overflowuid** might also be the actual * UID value for the socket). */ static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47; /* * bpf_set_hash * * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * * Returns * 0 */ static long (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48; /* * bpf_setsockopt * * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at * which the option resides and the name *optname* of the option * must be specified, see **setsockopt(2)** for more information. * The option value of length *optlen* is pointed by *optval*. * * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_setsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49; /* * bpf_skb_adjust_room * * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * * By default, the helper will reset any offloaded checksum * indicator of the skb to CHECKSUM_NONE. This can be avoided * by the following flag: * * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded * checksum data of the skb to CHECKSUM_NONE. * * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * * The following flags are supported at this time: * * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. * Adjusting mss in this way is not allowed for datagrams. * * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: * Any new space is reserved to hold a tunnel header. * Configure skb offsets and other fields accordingly. * * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: * Use with ENCAP_L3 flags to further specify the tunnel type. * * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the * L2 type as Ethernet. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50; /* * bpf_redirect_map * * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain * references to net devices (for forwarding packets through other * ports), or to CPUs (for redirecting XDP frames to another CPU; * but this is only implemented for native XDP (with driver * support) as of this writing). * * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen * by the caller. The higher bits of *flags* can be set to * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. * * With BPF_F_BROADCAST the packet will be broadcasted to all the * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. * * Returns * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. */ static long (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51; /* * bpf_sk_redirect_map * * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and * egress interfaces can be used for redirection. The * **BPF_F_INGRESS** value in *flags* is used to make the * distinction (ingress path is selected if the flag is present, * egress path otherwise). This is the only flag supported for now. * * Returns * **SK_PASS** on success, or **SK_DROP** on error. */ static long (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52; /* * bpf_sock_map_update * * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to * *key*. *flags* is one of: * * **BPF_NOEXIST** * The entry for *key* must not exist in the map. * **BPF_EXIST** * The entry for *key* must already exist in the map. * **BPF_ANY** * No condition on the existence of the entry for *key*. * * If the *map* has eBPF programs (parser and verdict), those will * be inherited by the socket being added. If the socket is * already attached to eBPF programs, this results in an error. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53; /* * bpf_xdp_adjust_meta * * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this * operation modifies the address stored in *xdp_md*\ **->data**, * so the latter must be loaded only after the helper has been * called. * * The use of *xdp_md*\ **->data_meta** is optional and programs * are not required to use it. The rationale is that when the * packet is processed with XDP (e.g. as DoS filter), it is * possible to push further meta data along with it before passing * to the stack, and to give the guarantee that an ingress eBPF * program attached as a TC classifier on the same device can pick * this up for further post-processing. Since TC works with socket * buffers, it remains possible to set from XDP the **mark** or * **priority** pointers, or other pointers for the socket buffer. * Having this scratch space generic and programmable allows for * more flexibility as the user is free to store whatever meta * data they need. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54; /* * bpf_perf_event_read_value * * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event * counter is selected when *map* is updated with perf event file * descriptors. The *map* is an array whose size is the number of * available CPUs, and each cell contains a value relative to one * CPU. The value to retrieve is indicated by *flags*, that * contains the index of the CPU to look up, masked with * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to * **BPF_F_CURRENT_CPU** to indicate that the value for the * current CPU should be retrieved. * * This helper behaves in a way close to * **bpf_perf_event_read**\ () helper, save that instead of * just returning the value observed, it fills the *buf* * structure. This allows for additional data to be retrieved: in * particular, the enabled and running times (in *buf*\ * **->enabled** and *buf*\ **->running**, respectively) are * copied. In general, **bpf_perf_event_read_value**\ () is * recommended over **bpf_perf_event_read**\ (), which has some * ABI issues and provides fewer functionalities. * * These values are interesting, because hardware PMU (Performance * Monitoring Unit) counters are limited resources. When there are * more PMU based perf events opened than available counters, * kernel will multiplex these events so each event gets certain * percentage (but not all) of the PMU time. In case that * multiplexing happens, the number of samples or counter value * will not reflect the case compared to when no multiplexing * occurs. This makes comparison between different runs difficult. * Typically, the counter value should be normalized before * comparing to other experiments. The usual normalization is done * as follows. * * :: * * normalized_counter = counter * t_enabled / t_running * * Where t_enabled is the time enabled for event and t_running is * the time running for event since last normalization. The * enabled and running times are accumulated since the perf event * open. To achieve scaling factor between two invocations of an * eBPF program, users can use CPU id as the key (which is * typical for perf array usage model) to remember the previous * value and do the calculation inside the eBPF program. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55; /* * bpf_perf_prog_read_value * * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in * the structure pointed by *buf* and of size *buf_size*. Enabled * and running times are also stored in the structure (see * description of helper **bpf_perf_event_read_value**\ () for * more details). * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56; /* * bpf_getsockopt * * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at * which the option resides and the name *optname* of the option * must be specified, see **getsockopt(2)** for more information. * The retrieved value is stored in the structure pointed by * *opval* and of length *optlen*. * * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the following *level*\ s: * * * **IPPROTO_TCP**, which supports *optname* * **TCP_CONGESTION**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_getsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57; /* * bpf_override_return * * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. * The first argument is the context *regs* on which the kprobe * works. * * This helper works by setting the PC (program counter) * to an override function which is run in place of the original * probed function. This means the probed function is not run at * all. The replacement function just returns with the required * value. * * This helper has security implications, and thus is subject to * restrictions. It is only available if the kernel was compiled * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. * * Also, the helper is only available for the architectures having * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, * x86 architecture is the only one to support this feature. * * Returns * 0 */ static long (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58; /* * bpf_sock_ops_cb_flags_set * * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to * *argval*. * * The primary use of this field is to determine if there should * be calls to eBPF programs of type * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP * code. A program of the same type can change its value, per * connection and as necessary, when the connection is * established. This field is directly accessible for reading, but * this helper must be used for updates in order to return an * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO * callback: * * **bpf_sock_ops_cb_flags_set(bpf_sock,** * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** * * Here are some examples of where one could call such eBPF * program: * * * When RTO fires. * * When a packet is retransmitted. * * When the connection terminates. * * When a packet is sent. * * When a packet is received. * * Returns * Code **-EINVAL** if the socket is not a full TCP socket; * otherwise, a positive number containing the bits that could not * be set is returned (which comes down to 0 if all bits were set * as required). */ static long (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59; /* * bpf_msg_redirect_map * * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if * the verdict eBPF program returns **SK_PASS**), redirect it to * the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and * egress interfaces can be used for redirection. The * **BPF_F_INGRESS** value in *flags* is used to make the * distinction (ingress path is selected if the flag is present, * egress path otherwise). This is the only flag supported for now. * * Returns * **SK_PASS** on success, or **SK_DROP** on error. */ static long (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60; /* * bpf_msg_apply_bytes * * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. * * For example, this helper can be used in the following cases: * * * A single **sendmsg**\ () or **sendfile**\ () system call * contains multiple logical messages that the eBPF program is * supposed to read and for which it should apply a verdict. * * An eBPF program only cares to read the first *bytes* of a * *msg*. If the message has a large payload, then setting up * and calling the eBPF program repeatedly for all bytes, even * though the verdict is already known, would create unnecessary * overhead. * * When called from within an eBPF program, the helper sets a * counter internal to the BPF infrastructure, that is used to * apply the last verdict to the next *bytes*. If *bytes* is * smaller than the current data being processed from a * **sendmsg**\ () or **sendfile**\ () system call, the first * *bytes* will be sent and the eBPF program will be re-run with * the pointer for start of data pointing to byte number *bytes* * **+ 1**. If *bytes* is larger than the current data being * processed, then the eBPF verdict will be applied to multiple * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are * consumed. * * Note that if a socket closes with the internal counter holding * a non-zero value, this is not a problem because data is not * being buffered for *bytes* and is sent as it is received. * * Returns * 0 */ static long (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61; /* * bpf_msg_cork_bytes * * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been * accumulated. * * This can be used when one needs a specific number of bytes * before a verdict can be assigned, even if the data spans * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme * case would be a user calling **sendmsg**\ () repeatedly with * 1-byte long message segments. Obviously, this is bad for * performance, but it is still valid. If the eBPF program needs * *bytes* bytes to validate a header, this helper can be used to * prevent the eBPF program to be called again until *bytes* have * been accumulated. * * Returns * 0 */ static long (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62; /* * bpf_msg_pull_data * * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ * **->data_end** to *start* and *end* bytes offsets into *msg*, * respectively. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a * *msg* it can only parse data that the (**data**, **data_end**) * pointers have already consumed. For **sendmsg**\ () hooks this * is likely the first scatterlist element. But for calls relying * on the **sendpage** handler (e.g. **sendfile**\ ()) this will * be the range (**0**, **0**) because the data is shared with * user space and by default the objective is to avoid allowing * user space to modify data while (or after) eBPF verdict is * being decided. This helper can be used to pull in data and to * set the start and end pointer to given values. Data will be * copied if necessary (i.e. if data was not linear and if start * and end pointers do not point to the same chunk). * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63; /* * bpf_bind * * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing * connection from the desired IP address, which can be useful for * example when all processes inside a cgroup should use one * single IP address on a host that has multiple IP configured. * * This helper works for IPv4 and IPv6, TCP and UDP sockets. The * domain (*addr*\ **->sa_family**) must be **AF_INET** (or * **AF_INET6**). It's advised to pass zero port (**sin_port** * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like * behavior and lets the kernel efficiently pick up an unused * port as long as 4-tuple is unique. Passing non-zero port might * lead to degraded performance. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64; /* * bpf_xdp_adjust_tail * * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. * Shrink done via *delta* being a negative integer. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65; /* * bpf_skb_get_xfrm_state * * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. * * The retrieved value is stored in the **struct bpf_xfrm_state** * pointed by *xfrm_state* and of length *size*. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * This helper is available only if the kernel was compiled with * **CONFIG_XFRM** configuration option. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66; /* * bpf_get_stack * * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer * to the context on which the tracing program is executed. * To store the stacktrace, the bpf program provides *buf* with * a nonnegative *size*. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set * the following flags: * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject * to sufficient large buffer size. Note that * this limit can be controlled with the **sysctl** program, and * that it should be manually increased in order to profile long * user stacks (such as stacks for Java programs). To do so, use: * * :: * * # sysctl kernel.perf_event_max_stack= * * Returns * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. */ static long (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67; /* * bpf_skb_load_bytes_relative * * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* * from the packet associated to *skb*, into the buffer pointed * by *to*. The difference to **bpf_skb_load_bytes**\ () is that * a fifth argument *start_header* exists in order to select a * base offset to start from. *start_header* can be one of: * * **BPF_HDR_START_MAC** * Base offset to load data from is *skb*'s mac header. * **BPF_HDR_START_NET** * Base offset to load data from is *skb*'s network header. * * In general, "direct packet access" is the preferred method to * access packet data, however, this helper is in particular useful * in socket filters where *skb*\ **->data** does not always point * to the start of the mac header and where "direct packet access" * is not available. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68; /* * bpf_fib_lookup * * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be * forwarded, the neighbor tables are searched for the nexthop. * If successful (ie., FIB lookup shows forwarding and nexthop * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * * Returns * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU * was exceeded and output params->mtu_result contains the MTU. */ static long (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69; /* * bpf_sock_hash_update * * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to * *key*. *flags* is one of: * * **BPF_NOEXIST** * The entry for *key* must not exist in the map. * **BPF_EXIST** * The entry for *key* must already exist in the map. * **BPF_ANY** * No condition on the existence of the entry for *key*. * * If the *map* has eBPF programs (parser and verdict), those will * be inherited by the socket being added. If the socket is * already attached to eBPF programs, this results in an error. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70; /* * bpf_msg_redirect_hash * * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if * the verdict eBPF program returns **SK_PASS**), redirect it to * the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The * **BPF_F_INGRESS** value in *flags* is used to make the * distinction (ingress path is selected if the flag is present, * egress path otherwise). This is the only flag supported for now. * * Returns * **SK_PASS** on success, or **SK_DROP** on error. */ static long (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71; /* * bpf_sk_redirect_hash * * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The * **BPF_F_INGRESS** value in *flags* is used to make the * distinction (ingress path is selected if the flag is present, * egress otherwise). This is the only flag supported for now. * * Returns * **SK_PASS** on success, or **SK_DROP** on error. */ static long (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72; /* * bpf_lwt_push_encap * * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at * address *hdr*, with *len* its size in bytes. *type* indicates * the protocol of the header and can be one of: * * **BPF_LWT_ENCAP_SEG6** * IPv6 encapsulation with Segment Routing Header * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, * the IPv6 header is computed by the kernel. * **BPF_LWT_ENCAP_SEG6_INLINE** * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. * **BPF_LWT_ENCAP_IP** * IP encapsulation (GRE/GUE/IPIP/etc). The outer header * must be IPv4 or IPv6, followed by zero or more * additional headers, up to **LWT_BPF_MAX_HEADROOM** * total bytes in all prepended headers. Please note that * if **skb_is_gso**\ (*skb*) is true, no more than two * headers can be prepended, and the inner header, if * present, should be either GRE or UDP/GUE. * * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and * **BPF_PROG_TYPE_LWT_XMIT**. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73; /* * bpf_lwt_seg6_store_bytes * * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs * inside the outermost IPv6 Segment Routing Header can be * modified through this helper. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74; /* * bpf_lwt_seg6_adjust_srh * * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to * *skb*, at position *offset* by *delta* bytes. Only offsets * after the segments are accepted. *delta* can be as well * positive (growing) as negative (shrinking). * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75; /* * bpf_lwt_seg6_action * * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter * contained at address *param*, and of length *param_len* bytes. * *action* can be one of: * * **SEG6_LOCAL_ACTION_END_X** * End.X action: Endpoint with Layer-3 cross-connect. * Type of *param*: **struct in6_addr**. * **SEG6_LOCAL_ACTION_END_T** * End.T action: Endpoint with specific IPv6 table lookup. * Type of *param*: **int**. * **SEG6_LOCAL_ACTION_END_B6** * End.B6 action: Endpoint bound to an SRv6 policy. * Type of *param*: **struct ipv6_sr_hdr**. * **SEG6_LOCAL_ACTION_END_B6_ENCAP** * End.B6.Encap action: Endpoint bound to an SRv6 * encapsulation policy. * Type of *param*: **struct ipv6_sr_hdr**. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be * performed again, if the helper is used in combination with * direct packet access. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76; /* * bpf_rc_repeat * * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays * the generation of a key up event for previously generated * key down event. * * Some IR protocols like NEC have a special IR message for * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * * Returns * 0 */ static long (*bpf_rc_repeat)(void *ctx) = (void *) 77; /* * bpf_rc_keydown * * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, * *toggle* value in the given *protocol*. The scancode will be * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either * **bpf_rc_keydown**\ () again with the same values, or calling * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * * The *protocol* is the decoded protocol number (see * **enum rc_proto** for some predefined values). * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * * Returns * 0 */ static long (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78; /* * bpf_skb_cgroup_id * * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () * helper for cgroup v1 by providing a tag resp. identifier that * can be matched on or used for map lookups e.g. to implement * policy. The cgroup v2 id of a given path in the hierarchy is * exposed in user space through the f_handle API in order to get * to the same 64-bit id. * * This helper can be used on TC egress path, but not on ingress, * and is available only if the kernel was compiled with the * **CONFIG_SOCK_CGROUP_DATA** configuration option. * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79; /* * bpf_get_current_cgroup_id * * * Returns * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. */ static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80; /* * bpf_get_local_storage * * Get the pointer to the local storage area. * The type and the size of the local storage is defined * by the *map* argument. * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * * Depending on the BPF program type, a local storage area * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * * Returns * A pointer to the local storage area. */ static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81; /* * bpf_sk_select_reuseport * * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82; /* * bpf_skb_ancestor_cgroup_id * * Return id of cgroup v2 that is ancestor of cgroup associated * with the *skb* at the *ancestor_level*. The root cgroup is at * *ancestor_level* zero and each step down the hierarchy * increments the level. If *ancestor_level* == level of cgroup * associated with *skb*, then return value will be same as that * of **bpf_skb_cgroup_id**\ (). * * The helper is useful to implement policies based on cgroups * that are upper in hierarchy than immediate cgroup associated * with *skb*. * * The format of returned id and helper limitations are same as in * **bpf_skb_cgroup_id**\ (). * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83; /* * bpf_sk_lookup_tcp * * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used * to determine the base network namespace for the lookup. * * *tuple_size* must be one of: * * **sizeof**\ (*tuple*\ **->ipv4**) * Look for an IPv4 socket. * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or * equal to zero then it specifies the ID of the netns relative to * the netns associated with the *ctx*. *netns* values beyond the * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * * Returns * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. */ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84; /* * bpf_sk_lookup_udp * * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used * to determine the base network namespace for the lookup. * * *tuple_size* must be one of: * * **sizeof**\ (*tuple*\ **->ipv4**) * Look for an IPv4 socket. * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or * equal to zero then it specifies the ID of the netns relative to * the netns associated with the *ctx*. *netns* values beyond the * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. * * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * * Returns * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. */ static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85; /* * bpf_sk_release * * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from * **bpf_sk_lookup_xxx**\ (). * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_sk_release)(void *sock) = (void *) 86; /* * bpf_map_push_elem * * Push an element *value* in *map*. *flags* is one of: * * **BPF_EXIST** * If the queue/stack is full, the oldest element is * removed to make room for this. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87; /* * bpf_map_pop_elem * * Pop an element from *map*. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88; /* * bpf_map_peek_elem * * Get an element from *map* without removing it. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89; /* * bpf_msg_push_data * * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90; /* * bpf_msg_pop_data * * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation * if possible. Other errors can occur if input parameters are * invalid either due to *start* byte not being valid part of *msg* * payload and/or *pop* value being to large. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91; /* * bpf_rc_pointer_rel * * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. * * The *ctx* should point to the lirc sample as passed into * the program. * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * * Returns * 0 */ static long (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92; /* * bpf_spin_lock * * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to * safely update the rest of the fields in that value. The * spinlock can (and must) later be released with a call to * **bpf_spin_unlock**\ (\ *lock*\ ). * * Spinlocks in BPF programs come with a number of restrictions * and constraints: * * * **bpf_spin_lock** objects are only allowed inside maps of * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this * list could be extended in the future). * * BTF description of the map is mandatory. * * The BPF program can take ONE lock at a time, since taking two * or more could cause dead locks. * * Only one **struct bpf_spin_lock** is allowed per map element. * * When the lock is taken, calls (either BPF to BPF or helpers) * are not allowed. * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not * allowed inside a spinlock-ed region. * * The BPF program MUST call **bpf_spin_unlock**\ () to release * the lock, on all execution paths, before it returns. * * The BPF program can access **struct bpf_spin_lock** only via * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () * helpers. Loading or storing data into the **struct * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. * * To use the **bpf_spin_lock**\ () helper, the BTF description * of the map value must be a struct and have **struct * bpf_spin_lock** *anyname*\ **;** field at the top level. * Nested lock inside another struct is not allowed. * * The **struct bpf_spin_lock** *lock* field in a map value must * be aligned on a multiple of 4 bytes in that value. * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy * the **bpf_spin_lock** field to user space. * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from * a BPF program, do not update the **bpf_spin_lock** field. * * **bpf_spin_lock** cannot be on the stack or inside a * networking packet (it can only be inside of a map values). * * **bpf_spin_lock** is available to root only. * * Tracing programs and socket filter programs cannot use * **bpf_spin_lock**\ () due to insufficient preemption checks * (but this may change in the future). * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. * * Returns * 0 */ static long (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93; /* * bpf_spin_unlock * * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). * * Returns * 0 */ static long (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94; /* * bpf_sk_fullsock * * This helper gets a **struct bpf_sock** pointer such * that all the fields in this **bpf_sock** can be accessed. * * Returns * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95; /* * bpf_tcp_sock * * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. * * Returns * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. */ static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96; /* * bpf_skb_ecn_set_ce * * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 * and IPv4. * * Returns * 1 if the **CE** flag is set (either by the current helper call * or because it was already present), 0 if it is not set. */ static long (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97; /* * bpf_get_listener_sock * * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. * **bpf_sk_release**\ () is unnecessary and not allowed. * * Returns * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98; /* * bpf_skc_lookup_tcp * * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, * and if non-**NULL**, released via **bpf_sk_release**\ (). * * This function is identical to **bpf_sk_lookup_tcp**\ (), except * that it also returns timewait or request sockets. Use * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the * full structure. * * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * * Returns * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. */ static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99; /* * bpf_tcp_check_syncookie * * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or * **sizeof**\ (**struct ip6hdr**). * * *th* points to the start of the TCP header, while *th_len* * contains **sizeof**\ (**struct tcphdr**). * * Returns * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. */ static long (*bpf_tcp_check_syncookie)(void *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100; /* * bpf_sysctl_get_name * * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. * * The buffer is always NUL terminated, unless it's zero-sized. * * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name * only (e.g. "tcp_mem"). * * Returns * Number of character copied (not including the trailing NUL). * * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). */ static long (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101; /* * bpf_sysctl_get_current_value * * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided * by program buffer *buf* of size *buf_len*. * * The whole value is copied, no matter what file position user * space issued e.g. sys_read at. * * The buffer is always NUL terminated, unless it's zero-sized. * * Returns * Number of character copied (not including the trailing NUL). * * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. */ static long (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102; /* * bpf_sysctl_get_new_value * * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into * provided by program buffer *buf* of size *buf_len*. * * User space may write new value at file position > 0. * * The buffer is always NUL terminated, unless it's zero-sized. * * Returns * Number of character copied (not including the trailing NUL). * * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * * **-EINVAL** if sysctl is being read. */ static long (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103; /* * bpf_sysctl_set_new_value * * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. * * *buf* should contain a string in same form as provided by user * space on sysctl write. * * User space may write new value at file position > 0. To override * the whole sysctl value file position should be set to zero. * * Returns * 0 on success. * * **-E2BIG** if the *buf_len* is too big. * * **-EINVAL** if sysctl is being read. */ static long (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104; /* * bpf_strtol * * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base * and save the result in *res*. * * The string may begin with an arbitrary amount of white space * (as determined by **isspace**\ (3)) followed by a single * optional '**-**' sign. * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically * similar to user space **strtol**\ (3). * * Returns * Number of characters consumed on success. Must be positive but * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. * * **-ERANGE** if resulting value was out of range. */ static long (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105; /* * bpf_strtoul * * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the * given base and save the result in *res*. * * The string may begin with an arbitrary amount of white space * (as determined by **isspace**\ (3)). * * Five least significant bits of *flags* encode base, other bits * are currently unused. * * Base must be either 8, 10, 16 or 0 to detect it automatically * similar to user space **strtoul**\ (3). * * Returns * Number of characters consumed on success. Must be positive but * no more than *buf_len*. * * **-EINVAL** if no valid digits were found or unsupported base * was provided. * * **-ERANGE** if resulting value was out of range. */ static long (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106; /* * bpf_sk_storage_get * * Get a bpf-local-storage from a *sk*. * * Logically, it could be thought of getting the value from * a *map* with *sk* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this * helper enforces the key must be a full socket and the map must * be a **BPF_MAP_TYPE_SK_STORAGE** also. * * Underneath, the value is stored locally at *sk* instead of * the *map*. The *map* is used as the bpf-local-storage * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * * *sk* is a kernel **struct sock** pointer for LSM program. * *sk* is a **struct bpf_sock** pointer for other program types. * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify * the initial value of a bpf-local-storage. If *value* is * **NULL**, the new bpf-local-storage will be zero initialized. * * Returns * A bpf-local-storage pointer is returned on success. * * **NULL** if not found or there was an error in adding * a new bpf-local-storage. */ static void *(*bpf_sk_storage_get)(void *map, void *sk, void *value, __u64 flags) = (void *) 107; /* * bpf_sk_storage_delete * * Delete a bpf-local-storage from a *sk*. * * Returns * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). */ static long (*bpf_sk_storage_delete)(void *map, void *sk) = (void *) 108; /* * bpf_send_signal * * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. * * Returns * 0 on success or successfully queued. * * **-EBUSY** if work queue under nmi is full. * * **-EINVAL** if *sig* is invalid. * * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. */ static long (*bpf_send_signal)(__u32 sig) = (void *) 109; /* * bpf_tcp_gen_syncookie * * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or * **sizeof**\ (**struct ip6hdr**). * * *th* points to the start of the TCP header, while *th_len* * contains the length of the TCP header. * * Returns * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, * and the top 16 bits are unused. * * On failure, the returned value is one of the following: * * **-EINVAL** SYN cookie cannot be issued due to error * * **-ENOENT** SYN cookie should not be issued (no SYN flood) * * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ static __s64 (*bpf_tcp_gen_syncookie)(void *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110; /* * bpf_skb_output * * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf * event must have the following attributes: **PERF_SAMPLE_RAW** * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. * * The *flags* are used to indicate the index in *map* for which * the value must be put, masked with **BPF_F_INDEX_MASK**. * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** * to indicate that the index of the current CPU core should be * used. * * The value to write, of *size*, is passed through eBPF stack and * pointed by *data*. * * *ctx* is a pointer to in-kernel struct sk_buff. * * This helper is similar to **bpf_perf_event_output**\ () but * restricted to raw_tracepoint bpf programs. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111; /* * bpf_probe_read_user * * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; /* * bpf_probe_read_kernel * * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; /* * bpf_probe_read_user_str * * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the * terminating NUL byte. In case the string length is smaller than * *size*, the target is not padded with further NUL bytes. If the * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * * On success, returns the number of bytes that were written, * including the terminal NUL. This makes this helper useful in * tracing programs for reading strings, and more importantly to * get its length at runtime. See the following snippet: * * :: * * SEC("kprobe/sys_open") * void bpf_sys_open(struct pt_regs *ctx) * { * char buf[PATHLEN]; // PATHLEN is defined to 256 * int res = bpf_probe_read_user_str(buf, sizeof(buf), * ctx->di); * * // Consume buf, for example push it to * // userspace via bpf_perf_event_output(); we * // can use res (the string length) as event * // size, after checking its boundaries. * } * * In comparison, using **bpf_probe_read_user**\ () helper here * instead to read the string would require to estimate the length * at compile time, and would often result in copying more memory * than necessary. * * Another useful use case is when parsing individual process * arguments or individual environment variables navigating * *current*\ **->mm->arg_start** and *current*\ * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * * Returns * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. */ static long (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114; /* * bpf_probe_read_kernel_str * * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. * * Returns * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. */ static long (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115; /* * bpf_tcp_send_ack * * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_tcp_send_ack)(void *tp, __u32 rcv_nxt) = (void *) 116; /* * bpf_send_signal_thread * * Send signal *sig* to the thread corresponding to the current task. * * Returns * 0 on success or successfully queued. * * **-EBUSY** if work queue under nmi is full. * * **-EINVAL** if *sig* is invalid. * * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. */ static long (*bpf_send_signal_thread)(__u32 sig) = (void *) 117; /* * bpf_jiffies64 * * Obtain the 64bit jiffies * * Returns * The 64 bit jiffies */ static __u64 (*bpf_jiffies64)(void) = (void *) 118; /* * bpf_read_branch_records * * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* * and store it in the buffer pointed by *buf* up to size * *size* bytes. * * Returns * On success, number of bytes written to *buf*. On error, a * negative value. * * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to * instead return the number of bytes required to store all the * branch entries. If this flag is set, *buf* may be NULL. * * **-EINVAL** if arguments invalid or **size** not a multiple * of **sizeof**\ (**struct perf_branch_entry**\ ). * * **-ENOENT** if architecture does not support branch records. */ static long (*bpf_read_branch_records)(struct bpf_perf_event_data *ctx, void *buf, __u32 size, __u64 flags) = (void *) 119; /* * bpf_get_ns_current_pid_tgid * * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. * * Returns * 0 on success, or one of the following in case of failure: * * **-EINVAL** if dev and inum supplied don't match dev_t and inode number * with nsfs of current task, or if dev conversion to dev_t lost high bits. * * **-ENOENT** if pidns does not exists for the current task. */ static long (*bpf_get_ns_current_pid_tgid)(__u64 dev, __u64 ino, struct bpf_pidns_info *nsdata, __u32 size) = (void *) 120; /* * bpf_xdp_output * * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf * event must have the following attributes: **PERF_SAMPLE_RAW** * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. * * The *flags* are used to indicate the index in *map* for which * the value must be put, masked with **BPF_F_INDEX_MASK**. * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** * to indicate that the index of the current CPU core should be * used. * * The value to write, of *size*, is passed through eBPF stack and * pointed by *data*. * * *ctx* is a pointer to in-kernel struct xdp_buff. * * This helper is similar to **bpf_perf_eventoutput**\ () but * restricted to raw_tracepoint bpf programs. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_xdp_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 121; /* * bpf_get_netns_cookie * * Retrieve the cookie (generated by the kernel) of the network * namespace the input *ctx* is associated with. The network * namespace cookie remains stable for its lifetime and provides * a global identifier that can be assumed unique. If *ctx* is * NULL, then the helper returns the cookie for the initial * network namespace. The cookie itself is very similar to that * of **bpf_get_socket_cookie**\ () helper, but for network * namespaces instead of sockets. * * Returns * A 8-byte long opaque number. */ static __u64 (*bpf_get_netns_cookie)(void *ctx) = (void *) 122; /* * bpf_get_current_ancestor_cgroup_id * * Return id of cgroup v2 that is ancestor of the cgroup associated * with the current task at the *ancestor_level*. The root cgroup * is at *ancestor_level* zero and each step down the hierarchy * increments the level. If *ancestor_level* == level of cgroup * associated with the current task, then return value will be the * same as that of **bpf_get_current_cgroup_id**\ (). * * The helper is useful to implement policies based on cgroups * that are upper in hierarchy than immediate cgroup associated * with the current task. * * The format of returned id and helper limitations are same as in * **bpf_get_current_cgroup_id**\ (). * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_get_current_ancestor_cgroup_id)(int ancestor_level) = (void *) 123; /* * bpf_sk_assign * * Helper is overloaded depending on BPF program type. This * description applies to **BPF_PROG_TYPE_SCHED_CLS** and * **BPF_PROG_TYPE_SCHED_ACT** programs. * * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, * will cause *skb* to be delivered to the specified socket. * Subsequent redirection of *skb* via **bpf_redirect**\ (), * **bpf_clone_redirect**\ () or other methods outside of BPF may * interfere with successful delivery to the socket. * * This operation is only valid from TC ingress path. * * The *flags* argument must be zero. * * Returns * 0 on success, or a negative error in case of failure: * * **-EINVAL** if specified *flags* are not supported. * * **-ENOENT** if the socket is unavailable for assignment. * * **-ENETUNREACH** if the socket is unreachable (wrong netns). * * **-EOPNOTSUPP** if the operation is not supported, for example * a call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** if the socket type is not supported * (reuseport). */ static long (*bpf_sk_assign)(void *ctx, void *sk, __u64 flags) = (void *) 124; /* * bpf_ktime_get_boot_ns * * Return the time elapsed since system boot, in nanoseconds. * Does include the time the system was suspended. * See: **clock_gettime**\ (**CLOCK_BOOTTIME**) * * Returns * Current *ktime*. */ static __u64 (*bpf_ktime_get_boot_ns)(void) = (void *) 125; /* * bpf_seq_printf * * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. * The *m* represents the seq_file. The *fmt* and *fmt_size* are for * the format string itself. The *data* and *data_len* are format string * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or * valid address but requiring a major memory fault. If reading kernel memory * fails, the string for **%s** will be an empty string, and the ip * address for **%p{i,I}{4,6}** will be 0. Not returning error to * bpf program is consistent with what **bpf_trace_printk**\ () does for now. * * Returns * 0 on success, or a negative error in case of failure: * * **-EBUSY** if per-CPU memory copy buffer is busy, can try again * by returning 1 from bpf program. * * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported. * * **-E2BIG** if *fmt* contains too many format specifiers. * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. */ static long (*bpf_seq_printf)(struct seq_file *m, const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 126; /* * bpf_seq_write * * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the * data to write in bytes. * * Returns * 0 on success, or a negative error in case of failure: * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. */ static long (*bpf_seq_write)(struct seq_file *m, const void *data, __u32 len) = (void *) 127; /* * bpf_sk_cgroup_id * * Return the cgroup v2 id of the socket *sk*. * * *sk* must be a non-**NULL** pointer to a socket, e.g. one * returned from **bpf_sk_lookup_xxx**\ (), * **bpf_sk_fullsock**\ (), etc. The format of returned id is * same as in **bpf_skb_cgroup_id**\ (). * * This helper is available only if the kernel was compiled with * the **CONFIG_SOCK_CGROUP_DATA** configuration option. * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_sk_cgroup_id)(void *sk) = (void *) 128; /* * bpf_sk_ancestor_cgroup_id * * Return id of cgroup v2 that is ancestor of cgroup associated * with the *sk* at the *ancestor_level*. The root cgroup is at * *ancestor_level* zero and each step down the hierarchy * increments the level. If *ancestor_level* == level of cgroup * associated with *sk*, then return value will be same as that * of **bpf_sk_cgroup_id**\ (). * * The helper is useful to implement policies based on cgroups * that are upper in hierarchy than immediate cgroup associated * with *sk*. * * The format of returned id and helper limitations are same as in * **bpf_sk_cgroup_id**\ (). * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_sk_ancestor_cgroup_id)(void *sk, int ancestor_level) = (void *) 129; /* * bpf_ringbuf_output * * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification * of new data availability is sent. * * An adaptive notification is a notification sent whenever the user-space * process has caught up and consumed all available payloads. In case the user-space * process is still processing a previous payload, then no notification is needed * as it will process the newly added payload automatically. * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_ringbuf_output)(void *ringbuf, void *data, __u64 size, __u64 flags) = (void *) 130; /* * bpf_ringbuf_reserve * * Reserve *size* bytes of payload in a ring buffer *ringbuf*. * *flags* must be 0. * * Returns * Valid pointer with *size* bytes of memory available; NULL, * otherwise. */ static void *(*bpf_ringbuf_reserve)(void *ringbuf, __u64 size, __u64 flags) = (void *) 131; /* * bpf_ringbuf_submit * * Submit reserved ring buffer sample, pointed to by *data*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification * of new data availability is sent. * * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * * Returns * Nothing. Always succeeds. */ static void (*bpf_ringbuf_submit)(void *data, __u64 flags) = (void *) 132; /* * bpf_ringbuf_discard * * Discard reserved ring buffer sample, pointed to by *data*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification * of new data availability is sent. * * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * * Returns * Nothing. Always succeeds. */ static void (*bpf_ringbuf_discard)(void *data, __u64 flags) = (void *) 133; /* * bpf_ringbuf_query * * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: * * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. * * **BPF_RB_RING_SIZE**: The size of ring buffer. * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). * * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * * Returns * Requested value, or 0, if *flags* are not recognized. */ static __u64 (*bpf_ringbuf_query)(void *ringbuf, __u64 flags) = (void *) 134; /* * bpf_csum_level * * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform * checksum validation. The level is applicable to the following * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | * through **bpf_skb_adjust_room**\ () helper with passing in * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since * the UDP header is removed. Similarly, an encap of the latter * into the former could be accompanied by a helper call to * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the * skb is still intended to be processed in higher layers of the * stack instead of just egressing at tc. * * There are three supported level settings at this time: * * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs * with CHECKSUM_UNNECESSARY. * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs * with CHECKSUM_UNNECESSARY. * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and * sets CHECKSUM_NONE to force checksum validation by the stack. * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current * skb->csum_level. * * Returns * 0 on success, or a negative error in case of failure. In the * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. */ static long (*bpf_csum_level)(struct __sk_buff *skb, __u64 level) = (void *) 135; /* * bpf_skc_to_tcp6_sock * * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct tcp6_sock *(*bpf_skc_to_tcp6_sock)(void *sk) = (void *) 136; /* * bpf_skc_to_tcp_sock * * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct tcp_sock *(*bpf_skc_to_tcp_sock)(void *sk) = (void *) 137; /* * bpf_skc_to_tcp_timewait_sock * * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct tcp_timewait_sock *(*bpf_skc_to_tcp_timewait_sock)(void *sk) = (void *) 138; /* * bpf_skc_to_tcp_request_sock * * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct tcp_request_sock *(*bpf_skc_to_tcp_request_sock)(void *sk) = (void *) 139; /* * bpf_skc_to_udp6_sock * * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct udp6_sock *(*bpf_skc_to_udp6_sock)(void *sk) = (void *) 140; /* * bpf_get_task_stack * * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set * the following flags: * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. * * **bpf_get_task_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject * to sufficient large buffer size. Note that * this limit can be controlled with the **sysctl** program, and * that it should be manually increased in order to profile long * user stacks (such as stacks for Java programs). To do so, use: * * :: * * # sysctl kernel.perf_event_max_stack= * * Returns * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. */ static long (*bpf_get_task_stack)(struct task_struct *task, void *buf, __u32 size, __u64 flags) = (void *) 141; /* * bpf_load_hdr_opt * * Load header option. Support reading a particular TCP header * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). * * If *flags* is 0, it will search the option from the * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** * has details on what skb_data contains under different * *skops*\ **->op**. * * The first byte of the *searchby_res* specifies the * kind that it wants to search. * * If the searching kind is an experimental kind * (i.e. 253 or 254 according to RFC6994). It also * needs to specify the "magic" which is either * 2 bytes or 4 bytes. It then also needs to * specify the size of the magic by using * the 2nd byte which is "kind-length" of a TCP * header option and the "kind-length" also * includes the first 2 bytes "kind" and "kind-length" * itself as a normal TCP header option also does. * * For example, to search experimental kind 254 with * 2 byte magic 0xeB9F, the searchby_res should be * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. * * To search for the standard window scale option (3), * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. * Note, kind-length must be 0 for regular option. * * Searching for No-Op (0) and End-of-Option-List (1) are * not supported. * * *len* must be at least 2 bytes which is the minimal size * of a header option. * * Supported flags: * * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the * saved_syn packet or the just-received syn packet. * * * Returns * > 0 when found, the header option is copied to *searchby_res*. * The return value is the total length copied. On failure, a * negative error code is returned: * * **-EINVAL** if a parameter is invalid. * * **-ENOMSG** if the option is not found. * * **-ENOENT** if no syn packet is available when * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. * * **-ENOSPC** if there is not enough space. Only *len* number of * bytes are copied. * * **-EFAULT** on failure to parse the header options in the * packet. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. */ static long (*bpf_load_hdr_opt)(struct bpf_sock_ops *skops, void *searchby_res, __u32 len, __u64 flags) = (void *) 142; /* * bpf_store_hdr_opt * * Store header option. The data will be copied * from buffer *from* with length *len* to the TCP header. * * The buffer *from* should have the whole option that * includes the kind, kind-length, and the actual * option data. The *len* must be at least kind-length * long. The kind-length does not have to be 4 byte * aligned. The kernel will take care of the padding * and setting the 4 bytes aligned value to th->doff. * * This helper will check for duplicated option * by searching the same option in the outgoing skb. * * This helper can only be called during * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * * * Returns * 0 on success, or negative error in case of failure: * * **-EINVAL** If param is invalid. * * **-ENOSPC** if there is not enough space in the header. * Nothing has been written * * **-EEXIST** if the option already exists. * * **-EFAULT** on failrue to parse the existing header options. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. */ static long (*bpf_store_hdr_opt)(struct bpf_sock_ops *skops, const void *from, __u32 len, __u64 flags) = (void *) 143; /* * bpf_reserve_hdr_opt * * Reserve *len* bytes for the bpf header option. The * space will be used by **bpf_store_hdr_opt**\ () later in * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * * If **bpf_reserve_hdr_opt**\ () is called multiple times, * the total number of bytes will be reserved. * * This helper can only be called during * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. * * * Returns * 0 on success, or negative error in case of failure: * * **-EINVAL** if a parameter is invalid. * * **-ENOSPC** if there is not enough space in the header. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. */ static long (*bpf_reserve_hdr_opt)(struct bpf_sock_ops *skops, __u32 len, __u64 flags) = (void *) 144; /* * bpf_inode_storage_get * * Get a bpf_local_storage from an *inode*. * * Logically, it could be thought of as getting the value from * a *map* with *inode* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this * helper enforces the key must be an inode and the map must also * be a **BPF_MAP_TYPE_INODE_STORAGE**. * * Underneath, the value is stored locally at *inode* instead of * the *map*. The *map* is used as the bpf-local-storage * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf_local_storage residing at *inode*. * * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be * used such that a new bpf_local_storage will be * created if one does not exist. *value* can be used * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify * the initial value of a bpf_local_storage. If *value* is * **NULL**, the new bpf_local_storage will be zero initialized. * * Returns * A bpf_local_storage pointer is returned on success. * * **NULL** if not found or there was an error in adding * a new bpf_local_storage. */ static void *(*bpf_inode_storage_get)(void *map, void *inode, void *value, __u64 flags) = (void *) 145; /* * bpf_inode_storage_delete * * Delete a bpf_local_storage from an *inode*. * * Returns * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. */ static int (*bpf_inode_storage_delete)(void *map, void *inode) = (void *) 146; /* * bpf_d_path * * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is * returned in the provided buffer *buf* of size *sz* and * is zero terminated. * * * Returns * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative * value. */ static long (*bpf_d_path)(struct path *path, char *buf, __u32 sz) = (void *) 147; /* * bpf_copy_from_user * * Read *size* bytes from user space address *user_ptr* and store * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * * Returns * 0 on success, or a negative error in case of failure. */ static long (*bpf_copy_from_user)(void *dst, __u32 size, const void *user_ptr) = (void *) 148; /* * bpf_snprintf_btf * * Use BTF to store a string representation of *ptr*->ptr in *str*, * using *ptr*->type_id. This value should specify the type * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) * can be used to look up vmlinux BTF type ids. Traversing the * data structure using BTF, the type information and values are * stored in the first *str_size* - 1 bytes of *str*. Safe copy of * the pointer data is carried out to avoid kernel crashes during * operation. Smaller types can use string space on the stack; * larger programs can use map data to store the string * representation. * * The string can be subsequently shared with userspace via * bpf_perf_event_output() or ring buffer interfaces. * bpf_trace_printk() is to be avoided as it places too small * a limit on string size to be useful. * * *flags* is a combination of * * **BTF_F_COMPACT** * no formatting around type information * **BTF_F_NONAME** * no struct/union member names/types * **BTF_F_PTR_RAW** * show raw (unobfuscated) pointer values; * equivalent to printk specifier %px. * **BTF_F_ZERO** * show zero-valued struct/union members; they * are not displayed by default * * * Returns * The number of bytes that were written (or would have been * written if output had to be truncated due to string size), * or a negative error in cases of failure. */ static long (*bpf_snprintf_btf)(char *str, __u32 str_size, struct btf_ptr *ptr, __u32 btf_ptr_size, __u64 flags) = (void *) 149; /* * bpf_seq_printf_btf * * Use BTF to write to seq_write a string representation of * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). * *flags* are identical to those used for bpf_snprintf_btf. * * Returns * 0 on success or a negative error in case of failure. */ static long (*bpf_seq_printf_btf)(struct seq_file *m, struct btf_ptr *ptr, __u32 ptr_size, __u64 flags) = (void *) 150; /* * bpf_skb_cgroup_classid * * See **bpf_get_cgroup_classid**\ () for the main description. * This helper differs from **bpf_get_cgroup_classid**\ () in that * the cgroup v1 net_cls class is retrieved only from the *skb*'s * associated socket instead of the current process. * * Returns * The id is returned or 0 in case the id could not be retrieved. */ static __u64 (*bpf_skb_cgroup_classid)(struct __sk_buff *skb) = (void *) 151; /* * bpf_redirect_neigh * * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper * relies on the neighbor lookup for the L2 address of the nexthop. * * The helper will perform a FIB lookup based on the skb's * networking header to get the address of the next hop, unless * this is supplied by the caller in the *params* argument. The * *plen* argument indicates the len of *params* and should be set * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled * for IPv4 and IPv6 protocols. * * Returns * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. */ static long (*bpf_redirect_neigh)(__u32 ifindex, struct bpf_redir_neigh *params, int plen, __u64 flags) = (void *) 152; /* * bpf_per_cpu_ptr * * Take a pointer to a percpu ksym, *percpu_ptr*, and return a * pointer to the percpu kernel variable on *cpu*. A ksym is an * extern variable decorated with '__ksym'. For ksym, there is a * global var (either static or global) defined of the same name * in the kernel. The ksym is percpu if the global var is percpu. * The returned pointer points to the global percpu var on *cpu*. * * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the * kernel, except that bpf_per_cpu_ptr() may return NULL. This * happens if *cpu* is larger than nr_cpu_ids. The caller of * bpf_per_cpu_ptr() must check the returned value. * * Returns * A pointer pointing to the kernel percpu variable on *cpu*, or * NULL, if *cpu* is invalid. */ static void *(*bpf_per_cpu_ptr)(const void *percpu_ptr, __u32 cpu) = (void *) 153; /* * bpf_this_cpu_ptr * * Take a pointer to a percpu ksym, *percpu_ptr*, and return a * pointer to the percpu kernel variable on this cpu. See the * description of 'ksym' in **bpf_per_cpu_ptr**\ (). * * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would * never return NULL. * * Returns * A pointer pointing to the kernel percpu variable on this cpu. */ static void *(*bpf_this_cpu_ptr)(const void *percpu_ptr) = (void *) 154; /* * bpf_redirect_peer * * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_redirect**\ (), except * that the redirection happens to the *ifindex*' peer device and * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the ingress * hook and for veth device types. The peer device must reside in a * different network namespace. * * Returns * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. */ static long (*bpf_redirect_peer)(__u32 ifindex, __u64 flags) = (void *) 155; /* * bpf_task_storage_get * * Get a bpf_local_storage from the *task*. * * Logically, it could be thought of as getting the value from * a *map* with *task* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this * helper enforces the key must be an task_struct and the map must also * be a **BPF_MAP_TYPE_TASK_STORAGE**. * * Underneath, the value is stored locally at *task* instead of * the *map*. The *map* is used as the bpf-local-storage * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf_local_storage residing at *task*. * * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be * used such that a new bpf_local_storage will be * created if one does not exist. *value* can be used * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify * the initial value of a bpf_local_storage. If *value* is * **NULL**, the new bpf_local_storage will be zero initialized. * * Returns * A bpf_local_storage pointer is returned on success. * * **NULL** if not found or there was an error in adding * a new bpf_local_storage. */ static void *(*bpf_task_storage_get)(void *map, struct task_struct *task, void *value, __u64 flags) = (void *) 156; /* * bpf_task_storage_delete * * Delete a bpf_local_storage from a *task*. * * Returns * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. */ static long (*bpf_task_storage_delete)(void *map, struct task_struct *task) = (void *) 157; /* * bpf_get_current_task_btf * * Return a BTF pointer to the "current" task. * This pointer can also be used in helpers that accept an * *ARG_PTR_TO_BTF_ID* of type *task_struct*. * * Returns * Pointer to the current task. */ static struct task_struct *(*bpf_get_current_task_btf)(void) = (void *) 158; /* * bpf_bprm_opts_set * * Set or clear certain options on *bprm*: * * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit * which sets the **AT_SECURE** auxv for glibc. The bit * is cleared if the flag is not specified. * * Returns * **-EINVAL** if invalid *flags* are passed, zero otherwise. */ static long (*bpf_bprm_opts_set)(struct linux_binprm *bprm, __u64 flags) = (void *) 159; /* * bpf_ktime_get_coarse_ns * * Return a coarse-grained version of the time elapsed since * system boot, in nanoseconds. Does not include time the system * was suspended. * * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) * * Returns * Current *ktime*. */ static __u64 (*bpf_ktime_get_coarse_ns)(void) = (void *) 160; /* * bpf_ima_inode_hash * * Returns the stored IMA hash of the *inode* (if it's avaialable). * If the hash is larger than *size*, then only *size* * bytes will be copied to *dst* * * Returns * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. */ static long (*bpf_ima_inode_hash)(struct inode *inode, void *dst, __u32 size) = (void *) 161; /* * bpf_sock_from_file * * If the given file represents a socket, returns the associated * socket. * * Returns * A pointer to a struct socket on success or NULL if the file is * not a socket. */ static struct socket *(*bpf_sock_from_file)(struct file *file) = (void *) 162; /* * bpf_check_mtu * * Check packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. * * The argument *len_diff* can be used for querying with a planned * size change. This allows to check MTU prior to changing packet * ctx. Providing an *len_diff* adjustment that is larger than the * actual packet size (resulting in negative packet size) will in * principle not exceed the MTU, why it is not considered a * failure. Other BPF-helpers are needed for performing the * planned size change, why the responsibility for catch a negative * packet size belong in those helpers. * * Specifying *ifindex* zero means the MTU check is performed * against the current net device. This is practical if this isn't * used prior to redirect. * * On input *mtu_len* must be a valid pointer, else verifier will * reject BPF program. If the value *mtu_len* is initialized to * zero then the ctx packet size is use. When value *mtu_len* is * provided as input this specify the L3 length that the MTU check * is done against. Remember XDP and TC length operate at L2, but * this value is L3 as this correlate to MTU and IP-header tot_len * values which are L3 (similar behavior as bpf_fib_lookup). * * The Linux kernel route table can configure MTUs on a more * specific per route level, which is not provided by this helper. * For route level MTU checks use the **bpf_fib_lookup**\ () * helper. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** for tc cls_act programs. * * The *flags* argument can be a combination of one or more of the * following values: * * **BPF_MTU_CHK_SEGS** * This flag will only works for *ctx* **struct sk_buff**. * If packet context contains extra packet segment buffers * (often knows as GSO skb), then MTU check is harder to * check at this point, because in transmit path it is * possible for the skb packet to get re-segmented * (depending on net device features). This could still be * a MTU violation, so this flag enables performing MTU * check against segments, with a different violation * return code to tell it apart. Check cannot use len_diff. * * On return *mtu_len* pointer contains the MTU value of the net * device. Remember the net device configured MTU is the L3 size, * which is returned here and XDP and TC length operate at L2. * Helper take this into account for you, but remember when using * MTU value in your BPF-code. * * * Returns * * 0 on success, and populate MTU value in *mtu_len* pointer. * * * < 0 if any input argument is invalid (*mtu_len* not updated) * * MTU violations return positive values, but also populate MTU * value in *mtu_len* pointer, as this can be needed for * implementing PMTU handing: * * * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** */ static long (*bpf_check_mtu)(void *ctx, __u32 ifindex, __u32 *mtu_len, __s32 len_diff, __u64 flags) = (void *) 163; /* * bpf_for_each_map_elem * * For each element in **map**, call **callback_fn** function with * **map**, **callback_ctx** and other map-specific parameters. * The **callback_fn** should be a static function and * the **callback_ctx** should be a pointer to the stack. * The **flags** is used to control certain aspects of the helper. * Currently, the **flags** must be 0. * * The following are a list of supported map types and their * respective expected callback signatures: * * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY * * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); * * For per_cpu maps, the map_value is the value on the cpu where the * bpf_prog is running. * * If **callback_fn** return 0, the helper will continue to the next * element. If return value is 1, the helper will skip the rest of * elements and return. Other return values are not used now. * * * Returns * The number of traversed map elements for success, **-EINVAL** for * invalid **flags**. */ static long (*bpf_for_each_map_elem)(void *map, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 164; /* * bpf_snprintf * * Outputs a string into the **str** buffer of size **str_size** * based on a format string stored in a read-only map pointed by * **fmt**. * * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* * array. The *data_len* is the size of *data* in bytes - must be * a multiple of 8. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid * address or valid address but requiring a major memory fault. If * reading kernel memory fails, the string for **%s** will be an * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. * Not returning error to bpf program is consistent with what * **bpf_trace_printk**\ () does for now. * * * Returns * The strictly positive length of the formatted string, including * the trailing zero character. If the return value is greater than * **str_size**, **str** contains a truncated string, guaranteed to * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. */ static long (*bpf_snprintf)(char *str, __u32 str_size, const char *fmt, __u64 *data, __u32 data_len) = (void *) 165; /* * bpf_sys_bpf * * Execute bpf syscall with given arguments. * * Returns * A syscall result. */ static long (*bpf_sys_bpf)(__u32 cmd, void *attr, __u32 attr_size) = (void *) 166; /* * bpf_btf_find_by_name_kind * * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. * * Returns * Returns btf_id and btf_obj_fd in lower and upper 32 bits. */ static long (*bpf_btf_find_by_name_kind)(char *name, int name_sz, __u32 kind, int flags) = (void *) 167; /* * bpf_sys_close * * Execute close syscall for given FD. * * Returns * A syscall result. */ static long (*bpf_sys_close)(__u32 fd) = (void *) 168; /* * bpf_timer_init * * Initialize the timer. * First 4 bits of *flags* specify clockid. * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. * All other bits of *flags* are reserved. * The verifier will reject the program if *timer* is not from * the same *map*. * * Returns * 0 on success. * **-EBUSY** if *timer* is already initialized. * **-EINVAL** if invalid *flags* are passed. * **-EPERM** if *timer* is in a map that doesn't have any user references. * The user space should either hold a file descriptor to a map with timers * or pin such map in bpffs. When map is unpinned or file descriptor is * closed all timers in the map will be cancelled and freed. */ static long (*bpf_timer_init)(struct bpf_timer *timer, void *map, __u64 flags) = (void *) 169; /* * bpf_timer_set_callback * * Configure the timer to call *callback_fn* static function. * * Returns * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. * **-EPERM** if *timer* is in a map that doesn't have any user references. * The user space should either hold a file descriptor to a map with timers * or pin such map in bpffs. When map is unpinned or file descriptor is * closed all timers in the map will be cancelled and freed. */ static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn) = (void *) 170; /* * bpf_timer_start * * Set timer expiration N nanoseconds from the current time. The * configured callback will be invoked in soft irq context on some cpu * and will not repeat unless another bpf_timer_start() is made. * In such case the next invocation can migrate to a different cpu. * Since struct bpf_timer is a field inside map element the map * owns the timer. The bpf_timer_set_callback() will increment refcnt * of BPF program to make sure that callback_fn code stays valid. * When user space reference to a map reaches zero all timers * in a map are cancelled and corresponding program's refcnts are * decremented. This is done to make sure that Ctrl-C of a user * process doesn't leave any timers running. If map is pinned in * bpffs the callback_fn can re-arm itself indefinitely. * bpf_map_update/delete_elem() helpers and user space sys_bpf commands * cancel and free the timer in the given map element. * The map can contain timers that invoke callback_fn-s from different * programs. The same callback_fn can serve different timers from * different maps if key/value layout matches across maps. * Every bpf_timer_set_callback() can have different callback_fn. * * * Returns * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier * or invalid *flags* are passed. */ static long (*bpf_timer_start)(struct bpf_timer *timer, __u64 nsecs, __u64 flags) = (void *) 171; /* * bpf_timer_cancel * * Cancel the timer and wait for callback_fn to finish if it was running. * * Returns * 0 if the timer was not active. * 1 if the timer was active. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its * own timer which would have led to a deadlock otherwise. */ static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *) 172; /* * bpf_get_func_ip * * Get address of the traced function (for tracing and kprobe programs). * * Returns * Address of the traced function. */ static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173; /* * bpf_get_attach_cookie * * Get bpf_cookie value provided (optionally) during the program * attachment. It might be different for each individual * attachment, even if BPF program itself is the same. * Expects BPF program context *ctx* as a first argument. * * Supported for the following program types: * - kprobe/uprobe; * - tracepoint; * - perf_event. * * Returns * Value specified by user at BPF link creation/attachment time * or 0, if it was not specified. */ static __u64 (*bpf_get_attach_cookie)(void *ctx) = (void *) 174; /* * bpf_task_pt_regs * * Get the struct pt_regs associated with **task**. * * Returns * A pointer to struct pt_regs. */ static long (*bpf_task_pt_regs)(struct task_struct *task) = (void *) 175; /* * bpf_get_branch_snapshot * * Get branch trace from hardware engines like Intel LBR. The * hardware engine is stopped shortly after the helper is * called. Therefore, the user need to filter branch entries * based on the actual use case. To capture branch trace * before the trigger point of the BPF program, the helper * should be called at the beginning of the BPF program. * * The data is stored as struct perf_branch_entry into output * buffer *entries*. *size* is the size of *entries* in bytes. * *flags* is reserved for now and must be zero. * * * Returns * On success, number of bytes written to *buf*. On error, a * negative value. * * **-EINVAL** if *flags* is not zero. * * **-ENOENT** if architecture does not support branch records. */ static long (*bpf_get_branch_snapshot)(void *entries, __u32 size, __u64 flags) = (void *) 176; /* * bpf_trace_vprintk * * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 * to format and can handle more format args as a result. * * Arguments are to be used as in **bpf_seq_printf**\ () helper. * * Returns * The number of bytes written to the buffer, or a negative error * in case of failure. */ static long (*bpf_trace_vprintk)(const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 177; /* * bpf_skc_to_unix_sock * * Dynamically cast a *sk* pointer to a *unix_sock* pointer. * * Returns * *sk* if casting is valid, or **NULL** otherwise. */ static struct unix_sock *(*bpf_skc_to_unix_sock)(void *sk) = (void *) 178; /* * bpf_kallsyms_lookup_name * * Get the address of a kernel symbol, returned in *res*. *res* is * set to 0 if the symbol is not found. * * Returns * On success, zero. On error, a negative value. * * **-EINVAL** if *flags* is not zero. * * **-EINVAL** if string *name* is not the same size as *name_sz*. * * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. */ static long (*bpf_kallsyms_lookup_name)(const char *name, int name_sz, int flags, __u64 *res) = (void *) 179; /* * bpf_find_vma * * Find vma of *task* that contains *addr*, call *callback_fn* * function with *task*, *vma*, and *callback_ctx*. * The *callback_fn* should be a static function and * the *callback_ctx* should be a pointer to the stack. * The *flags* is used to control certain aspects of the helper. * Currently, the *flags* must be 0. * * The expected callback signature is * * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); * * * Returns * 0 on success. * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. */ static long (*bpf_find_vma)(struct task_struct *task, __u64 addr, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 180; ================================================ FILE: examples/headers/bpf_helpers.h ================================================ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_HELPERS__ #define __BPF_HELPERS__ /* * Note that bpf programs need to include either * vmlinux.h (auto-generated from BTF) or linux/types.h * in advance since bpf_helper_defs.h uses such types * as __u64. */ #include "bpf_helper_defs.h" #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names * are interpreted by libbpf depending on the context (BPF programs, BPF maps, * extern variables, etc). * To allow use of SEC() with externs (e.g., for extern .maps declarations), * make sure __attribute__((unused)) doesn't trigger compilation warning. */ #define SEC(name) \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ __attribute__((section(name), used)) \ _Pragma("GCC diagnostic pop") \ /* Avoid 'linux/stddef.h' definition of '__always_inline'. */ #undef __always_inline #define __always_inline inline __attribute__((always_inline)) #ifndef __noinline #define __noinline __attribute__((noinline)) #endif #ifndef __weak #define __weak __attribute__((weak)) #endif /* * Use __hidden attribute to mark a non-static BPF subprogram effectively * static for BPF verifier's verification algorithm purposes, allowing more * extensive and permissive BPF verification process, taking into account * subprogram's caller context. */ #define __hidden __attribute__((visibility("hidden"))) /* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include * any system-level headers (such as stddef.h, linux/version.h, etc), and * commonly-used macros like NULL and KERNEL_VERSION aren't available through * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define * them on their own. So as a convenience, provide such definitions here. */ #ifndef NULL #define NULL ((void *)0) #endif #ifndef KERNEL_VERSION #define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) #endif /* * Helper macros to manipulate data structures */ #ifndef offsetof #define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) #endif #ifndef container_of #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *)(ptr); \ ((type *)(__mptr - offsetof(type, member))); \ }) #endif /* * Helper macro to throw a compilation error if __bpf_unreachable() gets * built into the resulting code. This works given BPF back end does not * implement __builtin_trap(). This is useful to assert that certain paths * of the program code are never used and hence eliminated by the compiler. * * For example, consider a switch statement that covers known cases used by * the program. __bpf_unreachable() can then reside in the default case. If * the program gets extended such that a case is not covered in the switch * statement, then it will throw a build error due to the default case not * being compiled out. */ #ifndef __bpf_unreachable # define __bpf_unreachable() __builtin_trap() #endif /* * Helper function to perform a tail call with a constant/immediate map slot. */ #if __clang_major__ >= 8 && defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { if (!__builtin_constant_p(slot)) __bpf_unreachable(); /* * Provide a hard guarantee that LLVM won't optimize setting r2 (map * pointer) and r3 (constant map index) from _different paths_ ending * up at the _same_ call insn as otherwise we won't be able to use the * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key * tracking for prog array pokes") for details on verifier tracking. * * Note on clobber list: we need to stay in-line with BPF calling * convention, so even if we don't end up using r0, r4, r5, we need * to mark them as clobber so that LLVM doesn't end up using them * before / after the call. */ asm volatile("r1 = %[ctx]\n\t" "r2 = %[map]\n\t" "r3 = %[slot]\n\t" "call 12" :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } #endif /* * Helper structure used by eBPF C program * to describe BPF map attributes to libbpf loader */ struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; unsigned int map_flags; }; enum libbpf_pin_type { LIBBPF_PIN_NONE, /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ LIBBPF_PIN_BY_NAME, }; enum libbpf_tristate { TRI_NO = 0, TRI_YES = 1, TRI_MODULE = 2, }; #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif #ifndef ___bpf_apply #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) #endif #ifndef ___bpf_nth #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N #endif #ifndef ___bpf_narg #define ___bpf_narg(...) \ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #endif #define ___bpf_fill0(arr, p, x) do {} while (0) #define ___bpf_fill1(arr, p, x) arr[p] = x #define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) #define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) #define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) #define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) #define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) #define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) #define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) #define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) #define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) #define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) #define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) #define ___bpf_fill(arr, args...) \ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) /* * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values * in a structure. */ #define BPF_SEQ_PRINTF(seq, fmt, args...) \ ({ \ static const char ___fmt[] = fmt; \ unsigned long long ___param[___bpf_narg(args)]; \ \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ _Pragma("GCC diagnostic pop") \ \ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ ___param, sizeof(___param)); \ }) /* * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of * an array of u64. */ #define BPF_SNPRINTF(out, out_size, fmt, args...) \ ({ \ static const char ___fmt[] = fmt; \ unsigned long long ___param[___bpf_narg(args)]; \ \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ _Pragma("GCC diagnostic pop") \ \ bpf_snprintf(out, out_size, ___fmt, \ ___param, sizeof(___param)); \ }) #ifdef BPF_NO_GLOBAL_DATA #define BPF_PRINTK_FMT_MOD #else #define BPF_PRINTK_FMT_MOD static const #endif #define __bpf_printk(fmt, ...) \ ({ \ BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ bpf_trace_printk(____fmt, sizeof(____fmt), \ ##__VA_ARGS__); \ }) /* * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments * instead of an array of u64. */ #define __bpf_vprintk(fmt, args...) \ ({ \ static const char ___fmt[] = fmt; \ unsigned long long ___param[___bpf_narg(args)]; \ \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ _Pragma("GCC diagnostic pop") \ \ bpf_trace_vprintk(___fmt, sizeof(___fmt), \ ___param, sizeof(___param)); \ }) /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args * Otherwise use __bpf_vprintk */ #define ___bpf_pick_printk(...) \ ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ __bpf_printk /*1*/, __bpf_printk /*0*/) /* Helper macro to print out debug messages */ #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) #endif ================================================ FILE: examples/headers/bpf_tracing.h ================================================ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 #define bpf_target_defined #elif defined(__TARGET_ARCH_s390) #define bpf_target_s390 #define bpf_target_defined #elif defined(__TARGET_ARCH_arm) #define bpf_target_arm #define bpf_target_defined #elif defined(__TARGET_ARCH_arm64) #define bpf_target_arm64 #define bpf_target_defined #elif defined(__TARGET_ARCH_mips) #define bpf_target_mips #define bpf_target_defined #elif defined(__TARGET_ARCH_powerpc) #define bpf_target_powerpc #define bpf_target_defined #elif defined(__TARGET_ARCH_sparc) #define bpf_target_sparc #define bpf_target_defined #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined #else /* Fall back to what the compiler says */ #if defined(__x86_64__) #define bpf_target_x86 #define bpf_target_defined #elif defined(__s390__) #define bpf_target_s390 #define bpf_target_defined #elif defined(__arm__) #define bpf_target_arm #define bpf_target_defined #elif defined(__aarch64__) #define bpf_target_arm64 #define bpf_target_defined #elif defined(__mips__) #define bpf_target_mips #define bpf_target_defined #elif defined(__powerpc__) #define bpf_target_powerpc #define bpf_target_defined #elif defined(__sparc__) #define bpf_target_sparc #define bpf_target_defined #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined #endif /* no compiler target */ #endif #ifndef __BPF_TARGET_MISSING #define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\"" #endif #if defined(bpf_target_x86) #if defined(__KERNEL__) || defined(__VMLINUX_H__) #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) #define PT_REGS_PARM4(x) ((x)->cx) #define PT_REGS_PARM5(x) ((x)->r8) #define PT_REGS_RET(x) ((x)->sp) #define PT_REGS_FP(x) ((x)->bp) #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) #else #ifdef __i386__ /* i386 kernel is built with -mregparm=3 */ #define PT_REGS_PARM1(x) ((x)->eax) #define PT_REGS_PARM2(x) ((x)->edx) #define PT_REGS_PARM3(x) ((x)->ecx) #define PT_REGS_PARM4(x) 0 #define PT_REGS_PARM5(x) 0 #define PT_REGS_RET(x) ((x)->esp) #define PT_REGS_FP(x) ((x)->ebp) #define PT_REGS_RC(x) ((x)->eax) #define PT_REGS_SP(x) ((x)->esp) #define PT_REGS_IP(x) ((x)->eip) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) #define PT_REGS_PARM4_CORE(x) 0 #define PT_REGS_PARM5_CORE(x) 0 #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) #else #define PT_REGS_PARM1(x) ((x)->rdi) #define PT_REGS_PARM2(x) ((x)->rsi) #define PT_REGS_PARM3(x) ((x)->rdx) #define PT_REGS_PARM4(x) ((x)->rcx) #define PT_REGS_PARM5(x) ((x)->r8) #define PT_REGS_RET(x) ((x)->rsp) #define PT_REGS_FP(x) ((x)->rbp) #define PT_REGS_RC(x) ((x)->rax) #define PT_REGS_SP(x) ((x)->rsp) #define PT_REGS_IP(x) ((x)->rip) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) #endif #endif #elif defined(bpf_target_s390) /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ struct pt_regs; #define PT_REGS_S390 const volatile user_pt_regs #define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) #define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) #define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) #define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) #define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) #define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) #define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) #elif defined(bpf_target_arm) #define PT_REGS_PARM1(x) ((x)->uregs[0]) #define PT_REGS_PARM2(x) ((x)->uregs[1]) #define PT_REGS_PARM3(x) ((x)->uregs[2]) #define PT_REGS_PARM4(x) ((x)->uregs[3]) #define PT_REGS_PARM5(x) ((x)->uregs[4]) #define PT_REGS_RET(x) ((x)->uregs[14]) #define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_RC(x) ((x)->uregs[0]) #define PT_REGS_SP(x) ((x)->uregs[13]) #define PT_REGS_IP(x) ((x)->uregs[12]) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ struct pt_regs; #define PT_REGS_ARM64 const volatile struct user_pt_regs #define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) #define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) #define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) #define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) #define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) #define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) #define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) #elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) #define PT_REGS_PARM2(x) ((x)->regs[5]) #define PT_REGS_PARM3(x) ((x)->regs[6]) #define PT_REGS_PARM4(x) ((x)->regs[7]) #define PT_REGS_PARM5(x) ((x)->regs[8]) #define PT_REGS_RET(x) ((x)->regs[31]) #define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_RC(x) ((x)->regs[2]) #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) #elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) #define PT_REGS_PARM2(x) ((x)->gpr[4]) #define PT_REGS_PARM3(x) ((x)->gpr[5]) #define PT_REGS_PARM4(x) ((x)->gpr[6]) #define PT_REGS_PARM5(x) ((x)->gpr[7]) #define PT_REGS_RC(x) ((x)->gpr[3]) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) #elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) #define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) #define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) #define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) #define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) #else #define PT_REGS_IP(x) ((x)->pc) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif #elif defined(bpf_target_riscv) struct pt_regs; #define PT_REGS_RV const volatile struct user_regs_struct #define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) #define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) #define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) #define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) #define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) #define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) #define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) #define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) #define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) #define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) #endif #if defined(bpf_target_powerpc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #elif defined(bpf_target_sparc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #elif defined(bpf_target_defined) #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif #if !defined(bpf_target_defined) #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #endif /* !defined(bpf_target_defined) */ #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif #ifndef ___bpf_apply #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) #endif #ifndef ___bpf_nth #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N #endif #ifndef ___bpf_narg #define ___bpf_narg(...) \ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #endif #define ___bpf_ctx_cast0() ctx #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] #define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] #define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] #define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] #define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] #define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] #define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] #define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] #define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] #define ___bpf_ctx_cast(args...) \ ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and * similar kinds of BPF programs, that accept input arguments as a single * pointer to untyped u64 array, where each u64 can actually be a typed * pointer or integer of different size. Instead of requring user to write * manual casts and work with array elements by index, BPF_PROG macro * allows user to declare a list of named and typed input arguments in the * same syntax as for normal C function. All the casting is hidden and * performed transparently, while user code can just assume working with * function arguments of specified type and name. * * Original raw context argument is preserved as well as 'ctx' argument. * This is useful when using BPF helpers that expect original context * as one of the parameters (e.g., for bpf_perf_event_output()). */ #define BPF_PROG(name, args...) \ name(unsigned long long *ctx); \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args); \ typeof(name(0)) name(unsigned long long *ctx) \ { \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ return ____##name(___bpf_ctx_cast(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args) struct pt_regs; #define ___bpf_kprobe_args0() ctx #define ___bpf_kprobe_args1(x) \ ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) #define ___bpf_kprobe_args2(x, args...) \ ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) #define ___bpf_kprobe_args3(x, args...) \ ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) #define ___bpf_kprobe_args4(x, args...) \ ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) #define ___bpf_kprobe_args5(x, args...) \ ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) #define ___bpf_kprobe_args(args...) \ ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific * low-level way of getting kprobe input arguments from struct pt_regs, and * provides a familiar typed and named function arguments syntax and * semantics of accessing kprobe input paremeters. * * Original struct pt_regs* context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). */ #define BPF_KPROBE(name, args...) \ name(struct pt_regs *ctx); \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ return ____##name(___bpf_kprobe_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx #define ___bpf_kretprobe_args1(x) \ ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) #define ___bpf_kretprobe_args(args...) \ ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional * return value (in addition to `struct pt_regs *ctx`), but no input * arguments, because they will be clobbered by the time probed function * returns. */ #define BPF_KRETPROBE(name, args...) \ name(struct pt_regs *ctx); \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ return ____##name(___bpf_kretprobe_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) #endif ================================================ FILE: examples/headers/common.h ================================================ // This is a compact version of `vmlinux.h` to be used in the examples using C code. #pragma once typedef unsigned char __u8; typedef short int __s16; typedef short unsigned int __u16; typedef int __s32; typedef unsigned int __u32; typedef long long int __s64; typedef long long unsigned int __u64; typedef __u8 u8; typedef __s16 s16; typedef __u16 u16; typedef __s32 s32; typedef __u32 u32; typedef __s64 s64; typedef __u64 u64; typedef __u16 __le16; typedef __u16 __be16; typedef __u32 __be32; typedef __u64 __be64; typedef __u32 __wsum; #include "bpf_helpers.h" enum bpf_map_type { BPF_MAP_TYPE_UNSPEC = 0, BPF_MAP_TYPE_HASH = 1, BPF_MAP_TYPE_ARRAY = 2, BPF_MAP_TYPE_PROG_ARRAY = 3, BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4, BPF_MAP_TYPE_PERCPU_HASH = 5, BPF_MAP_TYPE_PERCPU_ARRAY = 6, BPF_MAP_TYPE_STACK_TRACE = 7, BPF_MAP_TYPE_CGROUP_ARRAY = 8, BPF_MAP_TYPE_LRU_HASH = 9, BPF_MAP_TYPE_LRU_PERCPU_HASH = 10, BPF_MAP_TYPE_LPM_TRIE = 11, BPF_MAP_TYPE_ARRAY_OF_MAPS = 12, BPF_MAP_TYPE_HASH_OF_MAPS = 13, BPF_MAP_TYPE_DEVMAP = 14, BPF_MAP_TYPE_SOCKMAP = 15, BPF_MAP_TYPE_CPUMAP = 16, BPF_MAP_TYPE_XSKMAP = 17, BPF_MAP_TYPE_SOCKHASH = 18, BPF_MAP_TYPE_CGROUP_STORAGE = 19, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY = 20, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = 21, BPF_MAP_TYPE_QUEUE = 22, BPF_MAP_TYPE_STACK = 23, BPF_MAP_TYPE_SK_STORAGE = 24, BPF_MAP_TYPE_DEVMAP_HASH = 25, BPF_MAP_TYPE_STRUCT_OPS = 26, BPF_MAP_TYPE_RINGBUF = 27, BPF_MAP_TYPE_INODE_STORAGE = 28, }; enum xdp_action { XDP_ABORTED = 0, XDP_DROP = 1, XDP_PASS = 2, XDP_TX = 3, XDP_REDIRECT = 4, }; enum tc_action { TC_ACT_UNSPEC = -1, TC_ACT_OK = 0, TC_ACT_RECLASSIFY = 1, TC_ACT_SHOT = 2, TC_ACT_PIPE = 3, TC_ACT_STOLEN = 4, TC_ACT_QUEUED = 5, TC_ACT_REPEAT = 6, TC_ACT_REDIRECT = 7, TC_ACT_JUMP = 0x10000000 }; struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; __u32 ingress_ifindex; __u32 rx_queue_index; __u32 egress_ifindex; }; typedef __u16 __sum16; #define ETH_P_IP 0x0800 struct ethhdr { unsigned char h_dest[6]; unsigned char h_source[6]; __be16 h_proto; }; struct iphdr { __u8 ihl: 4; __u8 version: 4; __u8 tos; __be16 tot_len; __be16 id; __be16 frag_off; __u8 ttl; __u8 protocol; __sum16 check; __be32 saddr; __be32 daddr; }; enum { BPF_ANY = 0, BPF_NOEXIST = 1, BPF_EXIST = 2, BPF_F_LOCK = 4, }; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK #if defined(__TARGET_ARCH_x86) struct pt_regs { /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long rbp; unsigned long rbx; /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long rax; unsigned long rcx; unsigned long rdx; unsigned long rsi; unsigned long rdi; /* * On syscall entry, this is syscall#. On CPU exception, this is error code. * On hw interrupt, it's IRQ number: */ unsigned long orig_rax; /* Return frame for iretq */ unsigned long rip; unsigned long cs; unsigned long eflags; unsigned long rsp; unsigned long ss; /* top of stack page */ }; #endif /* __TARGET_ARCH_x86 */ ================================================ FILE: examples/headers/update.sh ================================================ #!/usr/bin/env bash # Version of libbpf to fetch headers from LIBBPF_VERSION=0.6.1 # The headers we want prefix=libbpf-"$LIBBPF_VERSION" headers=( "$prefix"/LICENSE.BSD-2-Clause "$prefix"/src/bpf_endian.h "$prefix"/src/bpf_helper_defs.h "$prefix"/src/bpf_helpers.h "$prefix"/src/bpf_tracing.h ) # Fetch libbpf release and extract the desired headers curl -sL "https://github.com/libbpf/libbpf/archive/refs/tags/v${LIBBPF_VERSION}.tar.gz" | \ tar -xz --xform='s#.*/##' "${headers[@]}" ================================================ FILE: examples/kprobe/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/kprobe/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/kprobe/kprobe.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); __type(value, u64); __uint(max_entries, 1); } kprobe_map SEC(".maps"); SEC("kprobe/sys_execve") int kprobe_execve() { u32 key = 0; u64 initval = 1, *valp; valp = bpf_map_lookup_elem(&kprobe_map, &key); if (!valp) { bpf_map_update_elem(&kprobe_map, &key, &initval, BPF_ANY); return 0; } __sync_fetch_and_add(valp, 1); return 0; } ================================================ FILE: examples/kprobe/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a kernel symbol. // The eBPF program will be attached to the start of the sys_execve // kernel function and prints out the number of times it has been called // every second. package main import ( "log" "time" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf kprobe.c -- -I../headers const mapKey uint32 = 0 func main() { // Name of the kernel function to trace. fn := "sys_execve" // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Open a Kprobe at the entry point of the kernel function and attach the // pre-compiled program. Each time the kernel function enters, the program // will increment the execution counter by 1. The read loop below polls this // map value once per second. kp, err := link.Kprobe(fn, objs.KprobeExecve, nil) if err != nil { log.Fatalf("opening kprobe: %s", err) } defer kp.Close() // Read loop reporting the total amount of times the kernel // function was entered, once per second. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() log.Println("Waiting for events..") for range ticker.C { var value uint64 if err := objs.KprobeMap.Lookup(mapKey, &value); err != nil { log.Fatalf("reading map: %v", err) } log.Printf("%s called %d times\n", fn, value) } } ================================================ FILE: examples/kprobe_percpu/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/kprobe_percpu/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/kprobe_percpu/kprobe_percpu.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __type(key, u32); __type(value, u64); __uint(max_entries, 1); } kprobe_map SEC(".maps"); SEC("kprobe/sys_execve") int kprobe_execve() { u32 key = 0; u64 initval = 1, *valp; valp = bpf_map_lookup_elem(&kprobe_map, &key); if (!valp) { bpf_map_update_elem(&kprobe_map, &key, &initval, BPF_ANY); return 0; } __sync_fetch_and_add(valp, 1); return 0; } ================================================ FILE: examples/kprobe_percpu/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a kernel symbol and // using percpu map to collect data. The eBPF program will be attached to the // start of the sys_execve kernel function and prints out the number of called // times on each cpu every second. package main import ( "log" "time" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf kprobe_percpu.c -- -I../headers const mapKey uint32 = 0 func main() { // Name of the kernel function to trace. fn := "sys_execve" // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Open a Kprobe at the entry point of the kernel function and attach the // pre-compiled program. Each time the kernel function enters, the program // will increment the execution counter by 1. The read loop below polls this // map value once per second. kp, err := link.Kprobe(fn, objs.KprobeExecve, nil) if err != nil { log.Fatalf("opening kprobe: %s", err) } defer kp.Close() // Read loop reporting the total amount of times the kernel // function was entered, once per second. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() log.Println("Waiting for events..") for range ticker.C { var all_cpu_value []uint64 if err := objs.KprobeMap.Lookup(mapKey, &all_cpu_value); err != nil { log.Fatalf("reading map: %v", err) } for cpuid, cpuvalue := range all_cpu_value { log.Printf("%s called %d times on CPU%v\n", fn, cpuvalue, cpuid) } log.Printf("\n") } } ================================================ FILE: examples/kprobepin/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/kprobepin/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { KprobeMap *ebpf.MapSpec `ebpf:"kprobe_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { KprobeMap *ebpf.Map `ebpf:"kprobe_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.KprobeMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/kprobepin/kprobe_pin.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); __type(value, u64); __uint(max_entries, 1); __uint(pinning, LIBBPF_PIN_BY_NAME); } kprobe_map SEC(".maps"); SEC("kprobe/sys_execve") int kprobe_execve() { u32 key = 0; u64 initval = 1, *valp; valp = bpf_map_lookup_elem(&kprobe_map, &key); if (!valp) { bpf_map_update_elem(&kprobe_map, &key, &initval, BPF_ANY); return 0; } __sync_fetch_and_add(valp, 1); return 0; } ================================================ FILE: examples/kprobepin/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a kernel symbol. // The eBPF program will be attached to the start of the sys_execve // kernel function and prints out the number of times it has been called // every second. package main import ( "log" "os" "path" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf kprobe_pin.c -- -I../headers const ( mapKey uint32 = 0 bpfFSPath = "/sys/fs/bpf" ) func main() { // Name of the kernel function to trace. fn := "sys_execve" // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } pinPath := path.Join(bpfFSPath, fn) if err := os.MkdirAll(pinPath, os.ModePerm); err != nil { log.Fatalf("failed to create bpf fs subpath: %+v", err) } var objs bpfObjects if err := loadBpfObjects(&objs, &ebpf.CollectionOptions{ Maps: ebpf.MapOptions{ // Pin the map to the BPF filesystem and configure the // library to automatically re-write it in the BPF // program so it can be re-used if it already exists or // create it if not PinPath: pinPath, }, }); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Open a Kprobe at the entry point of the kernel function and attach the // pre-compiled program. Each time the kernel function enters, the program // will increment the execution counter by 1. The read loop below polls this // map value once per second. kp, err := link.Kprobe(fn, objs.KprobeExecve, nil) if err != nil { log.Fatalf("opening kprobe: %s", err) } defer kp.Close() // Read loop reporting the total amount of times the kernel // function was entered, once per second. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() log.Println("Waiting for events..") for range ticker.C { var value uint64 if err := objs.KprobeMap.Lookup(mapKey, &value); err != nil { log.Fatalf("reading map: %v", err) } log.Printf("%s called %d times\n", fn, value) } } ================================================ FILE: examples/map_in_map/main.go ================================================ //go:build linux // An example of using maps within maps. This example demonstrates a few // features. Firstly, creating eBPF map specifications in pure Go // (typically you'd see them being generated from a loaded ELF). // Additionally, creating maps and placing them in other maps (with // dynamically sized inner maps). package main import ( "log" "math/rand" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/rlimit" ) const BPF_F_INNER_MAP = 0x1000 func main() { // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // We're creating a map spec in pure Go here, but a map spec like // this can be loaded from an ELF too. outerMapSpec := ebpf.MapSpec{ Name: "outer_map", Type: ebpf.ArrayOfMaps, KeySize: 4, // 4 bytes for u32 ValueSize: 4, MaxEntries: 5, // We'll have 5 maps inside this map Contents: make([]ebpf.MapKV, 5), InnerMap: &ebpf.MapSpec{ Name: "inner_map", Type: ebpf.Array, KeySize: 4, // 4 bytes for u32 ValueSize: 4, // 4 bytes for u32 // This flag is required for dynamically sized inner maps. // Added in linux 5.10. Flags: BPF_F_INNER_MAP, // We set this to 1 now, but this inner map spec gets copied // and altered later. MaxEntries: 1, }, } r := rand.New(rand.NewSource(time.Now().UnixNano())) // For each entry we want to create in the outer map... for i := uint32(0); i < outerMapSpec.MaxEntries; i++ { // Copy the inner map spec innerMapSpec := outerMapSpec.InnerMap.Copy() // Randomly generate inner map length innerMapSpec.MaxEntries = uint32(r.Intn(50) + 1) // Can't be zero. // populate the inner map contents innerMapSpec.Contents = make([]ebpf.MapKV, innerMapSpec.MaxEntries) for j := range innerMapSpec.Contents { innerMapSpec.Contents[uint32(j)] = ebpf.MapKV{Key: uint32(j), Value: uint32(0xCAFE)} } // Create the inner map innerMap, err := ebpf.NewMap(innerMapSpec) if err != nil { log.Fatalf("inner_map: %v", err) } // In this example we close all references to maps before exit. // But typically you may actually want to hold on to the map // reference so that you control the lifecycle of the map. For // the inner (nested) map though, it's safe to close the file // descriptor in userspace once the outer map holds a reference // in the kernel. defer innerMap.Close() // Inner map is created successfully and lives in the kernel, // let's add it to the contents of the outer map spec. outerMapSpec.Contents[i] = ebpf.MapKV{Key: i, Value: innerMap} } // All inner maps are created and inserted into the outer map spec, // time to create the outer map. outerMap, err := ebpf.NewMap(&outerMapSpec) if err != nil { log.Fatalf("outer_map: %v", err) } defer outerMap.Close() // The outer map is created successfully and lives happily in the // kernel. Let's iterate over the map in the kernel to see what's // been made. mapIter := outerMap.Iterate() var outerMapKey uint32 var innerMapID ebpf.MapID for mapIter.Next(&outerMapKey, &innerMapID) { // With maps that contain maps, performing a lookup doesn't give // you the map directly, instead it gives you an ID, which you // can then use to get a full map pointer. innerMap, err := ebpf.NewMapFromID(innerMapID) if err != nil { log.Fatal(err) } innerMapInfo, err := innerMap.Info() if err != nil { log.Fatal(err) } log.Printf("outerMapKey %d, innerMap.Info: %+v", outerMapKey, innerMapInfo) } } ================================================ FILE: examples/ringbuffer/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Pid uint32 Comm [16]uint8 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/ringbuffer/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Pid uint32 Comm [16]uint8 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { KprobeExecve *ebpf.ProgramSpec `ebpf:"kprobe_execve"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { KprobeExecve *ebpf.Program `ebpf:"kprobe_execve"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.KprobeExecve, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/ringbuffer/main.go ================================================ //go:build linux package main import ( "bytes" "encoding/binary" "errors" "log" "os" "os/signal" "syscall" "golang.org/x/sys/unix" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/ringbuf" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf ringbuffer.c -- -I../headers func main() { // Name of the kernel function to trace. fn := "sys_execve" // Subscribe to signals for terminating the program. stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Open a Kprobe at the entry point of the kernel function and attach the // pre-compiled program. Each time the kernel function enters, the program // will emit an event containing pid and command of the execved task. kp, err := link.Kprobe(fn, objs.KprobeExecve, nil) if err != nil { log.Fatalf("opening kprobe: %s", err) } defer kp.Close() // Open a ringbuf reader from userspace RINGBUF map described in the // eBPF C program. rd, err := ringbuf.NewReader(objs.Events) if err != nil { log.Fatalf("opening ringbuf reader: %s", err) } defer rd.Close() // Close the reader when the process receives a signal, which will exit // the read loop. go func() { <-stopper if err := rd.Close(); err != nil { log.Fatalf("closing ringbuf reader: %s", err) } }() log.Println("Waiting for events..") // bpfEvent is generated by bpf2go. var event bpfEvent for { record, err := rd.Read() if err != nil { if errors.Is(err, ringbuf.ErrClosed) { log.Println("Received signal, exiting..") return } log.Printf("reading from reader: %s", err) continue } // Parse the ringbuf event entry into a bpfEvent structure. if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, &event); err != nil { log.Printf("parsing ringbuf event: %s", err) continue } log.Printf("pid: %d\tcomm: %s\n", event.Pid, unix.ByteSliceToString(event.Comm[:])) } } ================================================ FILE: examples/ringbuffer/ringbuffer.c ================================================ //go:build ignore #include "common.h" #ifndef TASK_COMM_LEN #define TASK_COMM_LEN 16 #endif char __license[] SEC("license") = "Dual MIT/GPL"; struct event { u32 pid; u8 comm[TASK_COMM_LEN]; }; struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 24); __type(value, struct event); } events SEC(".maps"); SEC("kprobe/sys_execve") int kprobe_execve(struct pt_regs *ctx) { u64 id = bpf_get_current_pid_tgid(); u32 tgid = id >> 32; struct event *task_info; task_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); if (!task_info) { return 0; } task_info->pid = tgid; bpf_get_current_comm(&task_info->comm, TASK_COMM_LEN); bpf_ringbuf_submit(task_info, 0); return 0; } ================================================ FILE: examples/sched_ext/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { MinimalSched *ebpf.MapSpec `ebpf:"minimal_sched"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { MinimalSched *ebpf.Map `ebpf:"minimal_sched"` } func (m *bpfMaps) Close() error { return _BpfClose( m.MinimalSched, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { } func (p *bpfPrograms) Close() error { return _BpfClose() } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/sched_ext/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { MinimalSched *ebpf.MapSpec `ebpf:"minimal_sched"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { MinimalSched *ebpf.Map `ebpf:"minimal_sched"` } func (m *bpfMaps) Close() error { return _BpfClose( m.MinimalSched, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { } func (p *bpfPrograms) Close() error { return _BpfClose() } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/sched_ext/main.go ================================================ //go:build linux package main import ( "log" "os" "os/signal" "syscall" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -no-global-types -tags linux bpf sched_ext.c -- -I../headers/ // Load a minimal defining sched_ext_ops map // // After run this program, you can find the current status of the BPF scheduler can be determined as follows: // // # cat /sys/kernel/sched_ext/state // enabled // # cat /sys/kernel/sched_ext/root/ops // miminal func main() { if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() m := objs.MinimalSched l, err := link.AttachStructOps(link.StructOpsOptions{Map: m}) if err != nil { log.Fatalf("failed to attach sched_ext: %s", err) } defer l.Close() stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) <-stopper log.Print("quit sched_ext") } ================================================ FILE: examples/sched_ext/sched_ext.c ================================================ //go:build ignore #include "bpf_endian.h" #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct sched_ext_ops { char name[128]; }; SEC(".struct_ops.link") struct sched_ext_ops minimal_sched = { .name = "minimal", }; ================================================ FILE: examples/tcprtt/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 Srtt uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { TcpClose *ebpf.ProgramSpec `ebpf:"tcp_close"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { TcpClose *ebpf.Program `ebpf:"tcp_close"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.TcpClose, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/tcprtt/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 Srtt uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { TcpClose *ebpf.ProgramSpec `ebpf:"tcp_close"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { TcpClose *ebpf.Program `ebpf:"tcp_close"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.TcpClose, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/tcprtt/main.go ================================================ //go:build linux // This program demonstrates attaching a fentry eBPF program to // tcp_close and reading the RTT from the TCP socket using CO-RE helpers. // It prints the IPs/ports/RTT information // once the host closes a TCP connection. // It supports only IPv4 for this example. // // Sample output: // // examples# go run -exec sudo ./tcprtt // 2022/03/19 22:30:34 Src addr Port -> Dest addr Port RTT // 2022/03/19 22:30:36 10.0.1.205 50578 -> 117.102.109.186 5201 195 // 2022/03/19 22:30:53 10.0.1.205 0 -> 89.84.1.178 9200 30 // 2022/03/19 22:30:53 10.0.1.205 36022 -> 89.84.1.178 9200 28 package main import ( "bytes" "encoding/binary" "errors" "log" "net" "os" "os/signal" "syscall" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/ringbuf" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf tcprtt.c -- -I../headers func main() { stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() l, err := link.AttachTracing(link.TracingOptions{ Program: objs.TcpClose, }) if err != nil { log.Fatal(err) } defer l.Close() rd, err := ringbuf.NewReader(objs.Events) if err != nil { log.Fatalf("opening ringbuf reader: %s", err) } defer rd.Close() log.Printf("%-15s %-6s -> %-15s %-6s %-6s", "Src addr", "Port", "Dest addr", "Port", "RTT", ) go readLoop(rd) // Wait <-stopper } func readLoop(rd *ringbuf.Reader) { // bpfEvent is generated by bpf2go. var event bpfEvent for { record, err := rd.Read() if err != nil { if errors.Is(err, ringbuf.ErrClosed) { log.Println("received signal, exiting..") return } log.Printf("reading from reader: %s", err) continue } // Parse the ringbuf event entry into a bpfEvent structure. if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.NativeEndian, &event); err != nil { log.Printf("parsing ringbuf event: %s", err) continue } log.Printf("%-15s %-6d -> %-15s %-6d %-6d", intToIP(event.Saddr), event.Sport, intToIP(event.Daddr), event.Dport, event.Srtt, ) } } // intToIP converts IPv4 number to net.IP func intToIP(ipNum uint32) net.IP { ip := make(net.IP, 4) binary.NativeEndian.PutUint32(ip, ipNum) return ip } ================================================ FILE: examples/tcprtt/tcprtt.c ================================================ //go:build ignore #include "common.h" #include "bpf_endian.h" #include "bpf_tracing.h" #define AF_INET 2 char __license[] SEC("license") = "Dual MIT/GPL"; /** * For CO-RE relocatable eBPF programs, __attribute__((preserve_access_index)) * preserves the offset of the specified fields in the original kernel struct. * So here we don't need to include "vmlinux.h". Instead we only need to define * the kernel struct and their fields the eBPF program actually requires. * * Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf, * lsm, etc. declared using the BPF_PROG macro can read kernel memory without * needing to call bpf_probe_read*(). */ /** * struct sock_common is the minimal network layer representation of sockets. * This is a simplified copy of the kernel's struct sock_common. * This copy contains only the fields needed for this example to * fetch the source and destination port numbers and IP addresses. */ struct sock_common { union { struct { // skc_daddr is destination IP address __be32 skc_daddr; // skc_rcv_saddr is the source IP address __be32 skc_rcv_saddr; }; }; union { struct { // skc_dport is the destination TCP/UDP port __be16 skc_dport; // skc_num is the source TCP/UDP port __u16 skc_num; }; }; // skc_family is the network address family (2 for IPV4) short unsigned int skc_family; } __attribute__((preserve_access_index)); /** * struct sock is the network layer representation of sockets. * This is a simplified copy of the kernel's struct sock. * This copy is needed only to access struct sock_common. */ struct sock { struct sock_common __sk_common; } __attribute__((preserve_access_index)); /** * struct tcp_sock is the Linux representation of a TCP socket. * This is a simplified copy of the kernel's struct tcp_sock. * For this example we only need srtt_us to read the smoothed RTT. */ struct tcp_sock { u32 srtt_us; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 24); __type(value, struct event); } events SEC(".maps"); /** * The sample submitted to userspace over a ring buffer. * Emit struct event's type info into the ELF's BTF so bpf2go * can generate a Go type from it. */ struct event { u16 sport; u16 dport; u32 saddr; u32 daddr; u32 srtt; }; SEC("fentry/tcp_close") int BPF_PROG(tcp_close, struct sock *sk) { if (sk->__sk_common.skc_family != AF_INET) { return 0; } // The input struct sock is actually a tcp_sock, so we can type-cast struct tcp_sock *ts = bpf_skc_to_tcp_sock(sk); if (!ts) { return 0; } struct event *tcp_info; tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); if (!tcp_info) { return 0; } tcp_info->saddr = sk->__sk_common.skc_rcv_saddr; tcp_info->daddr = sk->__sk_common.skc_daddr; tcp_info->dport = bpf_ntohs(sk->__sk_common.skc_dport); tcp_info->sport = sk->__sk_common.skc_num; tcp_info->srtt = ts->srtt_us >> 3; tcp_info->srtt /= 1000; bpf_ringbuf_submit(tcp_info, 0); return 0; } ================================================ FILE: examples/tcprtt_sockops/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfRttEvent struct { _ structs.HostLayout Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 Srtt uint32 } type bpfSkInfo struct { _ structs.HostLayout SkKey bpfSkKey SkType uint8 _ [3]byte } type bpfSkKey struct { _ structs.HostLayout LocalIp4 uint32 RemoteIp4 uint32 LocalPort uint32 RemotePort uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { BpfSockopsCb *ebpf.ProgramSpec `ebpf:"bpf_sockops_cb"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { MapEstabSk *ebpf.MapSpec `ebpf:"map_estab_sk"` RttEvents *ebpf.MapSpec `ebpf:"rtt_events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { MapEstabSk *ebpf.Map `ebpf:"map_estab_sk"` RttEvents *ebpf.Map `ebpf:"rtt_events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.MapEstabSk, m.RttEvents, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { BpfSockopsCb *ebpf.Program `ebpf:"bpf_sockops_cb"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.BpfSockopsCb, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/tcprtt_sockops/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfRttEvent struct { _ structs.HostLayout Sport uint16 Dport uint16 Saddr uint32 Daddr uint32 Srtt uint32 } type bpfSkInfo struct { _ structs.HostLayout SkKey bpfSkKey SkType uint8 _ [3]byte } type bpfSkKey struct { _ structs.HostLayout LocalIp4 uint32 RemoteIp4 uint32 LocalPort uint32 RemotePort uint32 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { BpfSockopsCb *ebpf.ProgramSpec `ebpf:"bpf_sockops_cb"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { MapEstabSk *ebpf.MapSpec `ebpf:"map_estab_sk"` RttEvents *ebpf.MapSpec `ebpf:"rtt_events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { MapEstabSk *ebpf.Map `ebpf:"map_estab_sk"` RttEvents *ebpf.Map `ebpf:"rtt_events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.MapEstabSk, m.RttEvents, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { BpfSockopsCb *ebpf.Program `ebpf:"bpf_sockops_cb"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.BpfSockopsCb, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/tcprtt_sockops/bpf_sockops.h ================================================ /* * Note that this header file contains a subset of kernel * definitions needed for the tcprtt_sockops example. */ #ifndef BPF_SOCKOPS_H #define BPF_SOCKOPS_H /* * Copy of TCP states. * See: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/bpf.h#L6347. */ enum { TCP_ESTABLISHED = 1, TCP_SYN_SENT = 2, TCP_SYN_RECV = 3, TCP_FIN_WAIT1 = 4, TCP_FIN_WAIT2 = 5, TCP_TIME_WAIT = 6, TCP_CLOSE = 7, TCP_CLOSE_WAIT = 8, TCP_LAST_ACK = 9, TCP_LISTEN = 10, TCP_CLOSING = 11, TCP_NEW_SYN_RECV = 12, TCP_MAX_STATES = 13, }; /* * Copy of sock_ops operations. * See: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/bpf.h#L6233. */ enum { BPF_SOCK_OPS_VOID = 0, BPF_SOCK_OPS_TIMEOUT_INIT = 1, BPF_SOCK_OPS_RWND_INIT = 2, BPF_SOCK_OPS_TCP_CONNECT_CB = 3, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB = 4, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB = 5, BPF_SOCK_OPS_NEEDS_ECN = 6, BPF_SOCK_OPS_BASE_RTT = 7, BPF_SOCK_OPS_RTO_CB = 8, BPF_SOCK_OPS_RETRANS_CB = 9, BPF_SOCK_OPS_STATE_CB = 10, BPF_SOCK_OPS_TCP_LISTEN_CB = 11, BPF_SOCK_OPS_RTT_CB = 12, BPF_SOCK_OPS_PARSE_HDR_OPT_CB = 13, BPF_SOCK_OPS_HDR_OPT_LEN_CB = 14, BPF_SOCK_OPS_WRITE_HDR_OPT_CB = 15, }; /* * Copy of definitions for bpf_sock_ops_cb_flags. * See: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/bpf.h#L6178. */ enum { BPF_SOCK_OPS_RTO_CB_FLAG = 1, BPF_SOCK_OPS_RETRANS_CB_FLAG = 2, BPF_SOCK_OPS_STATE_CB_FLAG = 4, BPF_SOCK_OPS_RTT_CB_FLAG = 8, BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = 16, BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = 32, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = 64, BPF_SOCK_OPS_ALL_CB_FLAGS = 127, }; /* * Copy of bpf.h's bpf_sock_ops with minimal subset * of fields used by the tcprtt_sockops example. * See: https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/bpf.h#L6101. */ struct bpf_sock_ops { __u32 op; union { __u32 args[4]; __u32 reply; __u32 replylong[4]; }; __u32 family; __u32 remote_ip4; __u32 local_ip4; __u32 remote_port; __u32 local_port; __u32 srtt_us; __u32 bpf_sock_ops_cb_flags; } __attribute__((preserve_access_index)); #endif ================================================ FILE: examples/tcprtt_sockops/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to // a cgroupv2 path and using sockops to process TCP socket events. // It prints the IPs/ports/RTT information every time TCP sockets // update their internal RTT value. // It supports only IPv4 for this example. // // Sample output: // // examples# go run -exec sudo ./tcprtt_sockops // 2022/08/14 20:58:03 eBPF program loaded and attached on cgroup /sys/fs/cgroup/unified // 2022/08/14 20:58:03 Src addr Port -> Dest addr Port RTT (ms) // 2022/08/14 20:58:09 10.0.1.205 54844 -> 20.42.73.25 443 67 // 2022/08/14 20:58:09 10.0.1.205 54844 -> 20.42.73.25 443 67 // 2022/08/14 20:58:33 10.0.1.205 38620 -> 140.82.121.4 443 26 // 2022/08/14 20:58:33 10.0.1.205 38620 -> 140.82.121.4 443 26 // 2022/08/14 20:58:43 34.67.40.146 45380 -> 10.0.1.205 5201 106 // 2022/08/14 20:58:43 34.67.40.146 45380 -> 10.0.1.205 5201 106 package main import ( "bytes" "encoding/binary" "errors" "log" "net" "os" "os/signal" "path/filepath" "syscall" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/ringbuf" "github.com/cilium/ebpf/rlimit" "golang.org/x/sys/unix" ) //go:generate go tool bpf2go -tags linux -tags "linux" bpf tcprtt_sockops.c -- -I../headers func main() { stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Find the path to a cgroup enabled to version 2 cgroupPath, err := findCgroupPath() if err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Attach ebpf program to a cgroupv2 l, err := link.AttachCgroup(link.CgroupOptions{ Path: cgroupPath, Program: objs.BpfSockopsCb, Attach: ebpf.AttachCGroupSockOps, }) if err != nil { log.Fatal(err) } defer l.Close() log.Printf("eBPF program loaded and attached on cgroup %s\n", cgroupPath) rd, err := ringbuf.NewReader(objs.RttEvents) if err != nil { log.Fatalf("opening ringbuf reader: %s", err) } defer rd.Close() log.Printf("%-15s %-6s -> %-15s %-6s %-6s", "Src addr", "Port", "Dest addr", "Port", "RTT (ms)", ) go readLoop(rd) // Wait <-stopper } func findCgroupPath() (string, error) { cgroupPath := "/sys/fs/cgroup" var st syscall.Statfs_t err := syscall.Statfs(cgroupPath, &st) if err != nil { return "", err } isCgroupV2Enabled := st.Type == unix.CGROUP2_SUPER_MAGIC if !isCgroupV2Enabled { cgroupPath = filepath.Join(cgroupPath, "unified") } return cgroupPath, nil } func readLoop(rd *ringbuf.Reader) { // bpfRttEvent is generated by bpf2go. var event bpfRttEvent for { record, err := rd.Read() if err != nil { if errors.Is(err, ringbuf.ErrClosed) { log.Println("received signal, exiting..") return } log.Printf("reading from reader: %s", err) continue } // Parse the ringbuf event entry into a bpfRttEvent structure. if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.NativeEndian, &event); err != nil { log.Printf("parsing ringbuf event: %s", err) continue } log.Printf("%-15s %-6d -> %-15s %-6d %-6d", intToIP(event.Saddr), event.Sport, intToIP(event.Daddr), event.Dport, event.Srtt, ) } } // intToIP converts IPv4 number to net.IP func intToIP(ipNum uint32) net.IP { ip := make(net.IP, 4) binary.BigEndian.PutUint32(ip, ipNum) return ip } ================================================ FILE: examples/tcprtt_sockops/tcprtt_sockops.c ================================================ //go:build ignore #include "common.h" #include "bpf_endian.h" #include "bpf_sockops.h" #include "bpf_tracing.h" #define AF_INET 2 #define SOCKOPS_MAP_SIZE 65535 char __license[] SEC("license") = "Dual MIT/GPL"; enum { SOCK_TYPE_ACTIVE = 0, SOCK_TYPE_PASSIVE = 1, }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, SOCKOPS_MAP_SIZE); __type(key, struct sk_key); __type(value, struct sk_info); } map_estab_sk SEC(".maps"); struct sk_key { u32 local_ip4; u32 remote_ip4; u32 local_port; u32 remote_port; }; struct sk_info { struct sk_key sk_key; u8 sk_type; }; struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 24); __type(value, struct rtt_event); } rtt_events SEC(".maps"); struct rtt_event { u16 sport; u16 dport; u32 saddr; u32 daddr; u32 srtt; }; static inline void init_sk_key(struct bpf_sock_ops *skops, struct sk_key *sk_key) { sk_key->local_ip4 = bpf_ntohl(skops->local_ip4); sk_key->remote_ip4 = bpf_ntohl(skops->remote_ip4); sk_key->local_port = skops->local_port; sk_key->remote_port = bpf_ntohl(skops->remote_port); } static inline void bpf_sock_ops_establish_cb(struct bpf_sock_ops *skops, u8 sock_type) { int err; struct sk_info sk_info = {}; // Only process IPv4 sockets if (skops == NULL || skops->family != AF_INET) return; // Initialize the 4-tuple key init_sk_key(skops, &sk_info.sk_key); sk_info.sk_type = sock_type; // Store the socket info in map using the 4-tuple as key // We keep track of TCP connections in 'established' state err = bpf_map_update_elem(&map_estab_sk, &sk_info.sk_key, &sk_info, BPF_NOEXIST); if (err != 0) { // Storing the 4-tuple in map has failed, return early. // This can happen in case the 4-tuple already exists in the map (i.e. BPF_NOEXIST flag) return; } // Enable sockops callbacks for RTT and TCP state change bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_RTT_CB_FLAG | BPF_SOCK_OPS_STATE_CB_FLAG); } static inline void bpf_sock_ops_rtt_cb(struct bpf_sock_ops *skops) { struct sk_key sk_key = {}; struct sk_info *sk_info; struct rtt_event *rtt_event; // Initialize the 4-tuple key init_sk_key(skops, &sk_key); // Retrieve the socket info from map of established connections sk_info = bpf_map_lookup_elem(&map_estab_sk, &sk_key); if (!sk_info) return; rtt_event = bpf_ringbuf_reserve(&rtt_events, sizeof(struct rtt_event), 0); if (!rtt_event) { return; } switch (sk_info->sk_type) { case SOCK_TYPE_ACTIVE: // If socket is 'active', 'local' means 'source' // and 'remote' means 'destination' rtt_event->saddr = sk_info->sk_key.local_ip4; rtt_event->daddr = sk_info->sk_key.remote_ip4; rtt_event->sport = sk_info->sk_key.local_port; rtt_event->dport = sk_info->sk_key.remote_port; break; case SOCK_TYPE_PASSIVE: // If socket is 'passive', 'local' means 'destination' // and 'remote' means 'source' rtt_event->saddr = sk_info->sk_key.remote_ip4; rtt_event->daddr = sk_info->sk_key.local_ip4; rtt_event->sport = sk_info->sk_key.remote_port; rtt_event->dport = sk_info->sk_key.local_port; break; } // Extract smoothed RTT rtt_event->srtt = skops->srtt_us >> 3; rtt_event->srtt /= 1000; // Send RTT event data to userspace app via ring buffer bpf_ringbuf_submit(rtt_event, 0); } static inline void bpf_sock_ops_state_cb(struct bpf_sock_ops *skops) { struct sk_key sk_key = {}; // Socket changed state. args[0] stores the previous state. // Perform cleanup of map entry if socket is exiting // the 'established' state, if (skops->args[0] == TCP_ESTABLISHED) { init_sk_key(skops, &sk_key); bpf_map_delete_elem(&map_estab_sk, &sk_key); } } SEC("sockops") int bpf_sockops_cb(struct bpf_sock_ops *skops) { u32 op; op = skops->op; switch (op) { case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: bpf_sock_ops_establish_cb(skops, SOCK_TYPE_ACTIVE); break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: bpf_sock_ops_establish_cb(skops, SOCK_TYPE_PASSIVE); break; case BPF_SOCK_OPS_RTT_CB: bpf_sock_ops_rtt_cb(skops); break; case BPF_SOCK_OPS_STATE_CB: bpf_sock_ops_state_cb(skops); break; } return 0; } ================================================ FILE: examples/tcx/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { EgressProgFunc *ebpf.ProgramSpec `ebpf:"egress_prog_func"` IngressProgFunc *ebpf.ProgramSpec `ebpf:"ingress_prog_func"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { EgressPktCount *ebpf.VariableSpec `ebpf:"egress_pkt_count"` IngressPktCount *ebpf.VariableSpec `ebpf:"ingress_pkt_count"` } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { } func (m *bpfMaps) Close() error { return _BpfClose() } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { EgressPktCount *ebpf.Variable `ebpf:"egress_pkt_count"` IngressPktCount *ebpf.Variable `ebpf:"ingress_pkt_count"` } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { EgressProgFunc *ebpf.Program `ebpf:"egress_prog_func"` IngressProgFunc *ebpf.Program `ebpf:"ingress_prog_func"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.EgressProgFunc, p.IngressProgFunc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/tcx/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { EgressProgFunc *ebpf.ProgramSpec `ebpf:"egress_prog_func"` IngressProgFunc *ebpf.ProgramSpec `ebpf:"ingress_prog_func"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { EgressPktCount *ebpf.VariableSpec `ebpf:"egress_pkt_count"` IngressPktCount *ebpf.VariableSpec `ebpf:"ingress_pkt_count"` } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { } func (m *bpfMaps) Close() error { return _BpfClose() } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { EgressPktCount *ebpf.Variable `ebpf:"egress_pkt_count"` IngressPktCount *ebpf.Variable `ebpf:"ingress_pkt_count"` } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { EgressProgFunc *ebpf.Program `ebpf:"egress_prog_func"` IngressProgFunc *ebpf.Program `ebpf:"ingress_prog_func"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.EgressProgFunc, p.IngressProgFunc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/tcx/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a network interface // with Linux TCX (Traffic Control with eBPF). The program counts ingress and egress // packets using two variables. The userspace program (Go code in this file) // prints the contents of the two variables to stdout every second. // This example depends on tcx bpf_link, available in Linux kernel version 6.6 or newer. package main import ( "fmt" "log" "net" "os" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" ) //go:generate go tool bpf2go -tags linux bpf tcx.c -- -I../headers func main() { if len(os.Args) < 2 { log.Fatalf("Please specify a network interface") } // Look up the network interface by name. ifaceName := os.Args[1] iface, err := net.InterfaceByName(ifaceName) if err != nil { log.Fatalf("lookup network iface %q: %s", ifaceName, err) } // Load pre-compiled programs into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %s", err) } defer objs.Close() // Attach the program to Ingress TC. l, err := link.AttachTCX(link.TCXOptions{ Interface: iface.Index, Program: objs.IngressProgFunc, Attach: ebpf.AttachTCXIngress, }) if err != nil { log.Fatalf("could not attach TCx program: %s", err) } defer l.Close() log.Printf("Attached TCx program to INGRESS iface %q (index %d)", iface.Name, iface.Index) // Attach the program to Egress TC. l2, err := link.AttachTCX(link.TCXOptions{ Interface: iface.Index, Program: objs.EgressProgFunc, Attach: ebpf.AttachTCXEgress, }) if err != nil { log.Fatalf("could not attach TCx program: %s", err) } defer l2.Close() log.Printf("Attached TCx program to EGRESS iface %q (index %d)", iface.Name, iface.Index) log.Printf("Press Ctrl-C to exit and remove the program") // Print the contents of the counters maps. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() for range ticker.C { s, err := formatCounters(objs.IngressPktCount, objs.EgressPktCount) if err != nil { log.Printf("Error reading map: %s", err) continue } log.Printf("Packet Count: %s\n", s) } } func formatCounters(ingressVar, egressVar *ebpf.Variable) (string, error) { var ( ingressPacketCount uint64 egressPacketCount uint64 ) // retrieve value from the ingress map if err := ingressVar.Get(&ingressPacketCount); err != nil { return "", err } // retrieve value from the egress map if err := egressVar.Get(&egressPacketCount); err != nil { return "", err } return fmt.Sprintf("%10v Ingress, %10v Egress", ingressPacketCount, egressPacketCount), nil } ================================================ FILE: examples/tcx/tcx.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; __u64 ingress_pkt_count = 0; __u64 egress_pkt_count = 0; SEC("tc") int ingress_prog_func(struct __sk_buff *skb) { __sync_fetch_and_add(&ingress_pkt_count, 1); return TC_ACT_OK; } SEC("tc") int egress_prog_func(struct __sk_buff *skb) { __sync_fetch_and_add(&egress_pkt_count, 1); return TC_ACT_OK; } ================================================ FILE: examples/tracepoint_in_c/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { MmPageAlloc *ebpf.ProgramSpec `ebpf:"mm_page_alloc"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { CountingMap *ebpf.MapSpec `ebpf:"counting_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { CountingMap *ebpf.Map `ebpf:"counting_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.CountingMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { MmPageAlloc *ebpf.Program `ebpf:"mm_page_alloc"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.MmPageAlloc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/tracepoint_in_c/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { MmPageAlloc *ebpf.ProgramSpec `ebpf:"mm_page_alloc"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { CountingMap *ebpf.MapSpec `ebpf:"counting_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { CountingMap *ebpf.Map `ebpf:"counting_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.CountingMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { MmPageAlloc *ebpf.Program `ebpf:"mm_page_alloc"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.MmPageAlloc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/tracepoint_in_c/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a kernel tracepoint. // The eBPF program will be attached to the page allocation tracepoint and // prints out the number of times it has been reached. The tracepoint fields // are printed into /sys/kernel/tracing/trace_pipe. package main import ( "log" "time" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux bpf tracepoint.c -- -I../headers const mapKey uint32 = 0 func main() { // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %v", err) } defer objs.Close() // Open a tracepoint and attach the pre-compiled program. Each time // the kernel function enters, the program will increment the execution // counter by 1. The read loop below polls this map value once per // second. // The first two arguments are taken from the following pathname: // /sys/kernel/tracing/events/kmem/mm_page_alloc kp, err := link.Tracepoint("kmem", "mm_page_alloc", objs.MmPageAlloc, nil) if err != nil { log.Fatalf("opening tracepoint: %s", err) } defer kp.Close() // Read loop reporting the total amount of times the kernel // function was entered, once per second. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() log.Println("Waiting for events..") for range ticker.C { var value uint64 if err := objs.CountingMap.Lookup(mapKey, &value); err != nil { log.Fatalf("reading map: %v", err) } log.Printf("%v times", value) } } ================================================ FILE: examples/tracepoint_in_c/tracepoint.c ================================================ //go:build ignore #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); __type(value, u64); __uint(max_entries, 1); } counting_map SEC(".maps"); // This struct is defined according to the following format file: // /sys/kernel/tracing/events/kmem/mm_page_alloc/format struct alloc_info { /* The first 8 bytes is not allowed to read */ unsigned long pad; unsigned long pfn; unsigned int order; unsigned int gfp_flags; int migratetype; }; // This tracepoint is defined in mm/page_alloc.c:__alloc_pages_nodemask() // Userspace pathname: /sys/kernel/tracing/events/kmem/mm_page_alloc SEC("tracepoint/kmem/mm_page_alloc") int mm_page_alloc(struct alloc_info *info) { u32 key = 0; u64 initval = 1, *valp; valp = bpf_map_lookup_elem(&counting_map, &key); if (!valp) { bpf_map_update_elem(&counting_map, &key, &initval, BPF_ANY); return 0; } __sync_fetch_and_add(valp, 1); return 0; } ================================================ FILE: examples/tracepoint_in_go/main.go ================================================ //go:build linux // This program demonstrates how to attach an eBPF program to a tracepoint. // The program is attached to the syscall/sys_enter_openat tracepoint and // prints out the integer 123 every time the syscall is entered. package main import ( "errors" "log" "os" "os/signal" "syscall" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/perf" "github.com/cilium/ebpf/rlimit" ) // Metadata for the eBPF program used in this example. var progSpec = &ebpf.ProgramSpec{ Name: "my_trace_prog", // non-unique name, will appear in `bpftool prog list` while attached Type: ebpf.TracePoint, // only TracePoint programs can be attached to trace events created by link.Tracepoint() License: "GPL", // license must be GPL for calling kernel helpers like perf_event_output } func main() { // Subscribe to signals for terminating the program. stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Create a perf event array for the kernel to write perf records to. // These records will be read by userspace below. events, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.PerfEventArray, Name: "my_perf_array", }) if err != nil { log.Fatalf("creating perf event array: %s", err) } defer events.Close() // Open a perf reader from userspace into the perf event array // created earlier. rd, err := perf.NewReader(events, os.Getpagesize()) if err != nil { log.Fatalf("creating event reader: %s", err) } defer rd.Close() // Close the reader when the process receives a signal, which will exit // the read loop. go func() { <-stopper rd.Close() }() // Minimal program that writes the static value '123' to the perf ring on // each event. Note that this program refers to the file descriptor of // the perf event array created above, which needs to be created prior to the // program being verified by and inserted into the kernel. progSpec.Instructions = asm.Instructions{ // store the integer 123 at FP[-8] asm.Mov.Imm(asm.R2, 123), asm.StoreMem(asm.RFP, -8, asm.R2, asm.Word), // load registers with arguments for call of FnPerfEventOutput asm.LoadMapPtr(asm.R2, events.FD()), // file descriptor of the perf event array asm.LoadImm(asm.R3, 0xffffffff, asm.DWord), asm.Mov.Reg(asm.R4, asm.RFP), asm.Add.Imm(asm.R4, -8), asm.Mov.Imm(asm.R5, 4), // call FnPerfEventOutput, an eBPF kernel helper asm.FnPerfEventOutput.Call(), // set exit code to 0 asm.Mov.Imm(asm.R0, 0), asm.Return(), } // Instantiate and insert the program into the kernel. prog, err := ebpf.NewProgram(progSpec) if err != nil { log.Fatalf("creating ebpf program: %s", err) } defer prog.Close() // Open a trace event based on a pre-existing kernel hook (tracepoint). // Each time a userspace program uses the 'openat()' syscall, the eBPF // program specified above will be executed and a '123' value will appear // in the perf ring. tp, err := link.Tracepoint("syscalls", "sys_enter_openat", prog, nil) if err != nil { log.Fatalf("opening tracepoint: %s", err) } defer tp.Close() log.Println("Waiting for events..") for { record, err := rd.Read() if err != nil { if errors.Is(err, perf.ErrClosed) { log.Println("Received signal, exiting..") return } log.Printf("reading from reader: %s", err) continue } log.Println("Record:", record) } } ================================================ FILE: examples/uretprobe/bpf_x86_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64) && linux package main import ( "bytes" _ "embed" "fmt" "io" "structs" "github.com/cilium/ebpf" ) type bpfEvent struct { _ structs.HostLayout Pid uint32 Line [80]uint8 } // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { UretprobeBashReadline *ebpf.ProgramSpec `ebpf:"uretprobe_bash_readline"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { Events *ebpf.MapSpec `ebpf:"events"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { Events *ebpf.Map `ebpf:"events"` } func (m *bpfMaps) Close() error { return _BpfClose( m.Events, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { UretprobeBashReadline *ebpf.Program `ebpf:"uretprobe_bash_readline"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.UretprobeBashReadline, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_x86_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/uretprobe/main.go ================================================ // This program demonstrates how to attach an eBPF program to a uretprobe. // The program will be attached to the 'readline' symbol in the binary '/bin/bash' and print out // the line which 'readline' functions returns to the caller. //go:build amd64 && linux package main import ( "bytes" "encoding/binary" "errors" "log" "os" "os/signal" "syscall" "golang.org/x/sys/unix" "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/perf" "github.com/cilium/ebpf/rlimit" ) //go:generate go tool bpf2go -tags linux -target amd64 bpf uretprobe.c -- -I../headers const ( // The path to the ELF binary containing the function to trace. // On some distributions, the 'readline' function is provided by a // dynamically-linked library, so the path of the library will need // to be specified instead, e.g. /usr/lib/libreadline.so.8. // Use `ldd /bin/bash` to find these paths. binPath = "/bin/bash" symbol = "readline" ) func main() { stopper := make(chan os.Signal, 1) signal.Notify(stopper, os.Interrupt, syscall.SIGTERM) // Allow the current process to lock memory for eBPF resources. if err := rlimit.RemoveMemlock(); err != nil { log.Fatal(err) } // Load pre-compiled programs and maps into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %s", err) } defer objs.Close() // Open an ELF binary and read its symbols. ex, err := link.OpenExecutable(binPath) if err != nil { log.Fatalf("opening executable: %s", err) } // Open a Uretprobe at the exit point of the symbol and attach // the pre-compiled eBPF program to it. up, err := ex.Uretprobe(symbol, objs.UretprobeBashReadline, nil) if err != nil { log.Fatalf("creating uretprobe: %s", err) } defer up.Close() // Open a perf event reader from userspace on the PERF_EVENT_ARRAY map // described in the eBPF C program. rd, err := perf.NewReader(objs.Events, os.Getpagesize()) if err != nil { log.Fatalf("creating perf event reader: %s", err) } defer rd.Close() go func() { // Wait for a signal and close the perf reader, // which will interrupt rd.Read() and make the program exit. <-stopper log.Println("Received signal, exiting program..") if err := rd.Close(); err != nil { log.Fatalf("closing perf event reader: %s", err) } }() log.Printf("Listening for events..") // bpfEvent is generated by bpf2go. var event bpfEvent for { record, err := rd.Read() if err != nil { if errors.Is(err, perf.ErrClosed) { return } log.Printf("reading from perf event reader: %s", err) continue } if record.LostSamples != 0 { log.Printf("perf event ring buffer full, dropped %d samples", record.LostSamples) continue } // Parse the perf event entry into a bpfEvent structure. if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, &event); err != nil { log.Printf("parsing perf event: %s", err) continue } log.Printf("%s:%s return value: %s", binPath, symbol, unix.ByteSliceToString(event.Line[:])) } } ================================================ FILE: examples/uretprobe/uretprobe.c ================================================ //go:build ignore #include "common.h" #include "bpf_tracing.h" char __license[] SEC("license") = "Dual MIT/GPL"; struct event { u32 pid; u8 line[80]; }; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __type(value, struct event); } events SEC(".maps"); SEC("uretprobe/bash_readline") int uretprobe_bash_readline(struct pt_regs *ctx) { struct event event; event.pid = bpf_get_current_pid_tgid(); bpf_probe_read(&event.line, sizeof(event.line), (void *)PT_REGS_RC(ctx)); bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); return 0; } ================================================ FILE: examples/xdp/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { XdpStatsMap *ebpf.MapSpec `ebpf:"xdp_stats_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { XdpStatsMap *ebpf.Map `ebpf:"xdp_stats_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.XdpStatsMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.XdpProgFunc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/xdp/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { XdpStatsMap *ebpf.MapSpec `ebpf:"xdp_stats_map"` } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { XdpStatsMap *ebpf.Map `ebpf:"xdp_stats_map"` } func (m *bpfMaps) Close() error { return _BpfClose( m.XdpStatsMap, ) } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.XdpProgFunc, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/xdp/main.go ================================================ //go:build linux // This program demonstrates attaching an eBPF program to a network interface // with XDP (eXpress Data Path). The program parses the IPv4 source address // from packets and writes the packet count by IP to an LRU hash map. // The userspace program (Go code in this file) prints the contents // of the map to stdout every second. // It is possible to modify the XDP program to drop or redirect packets // as well -- give it a try! // This example depends on bpf_link, available in Linux kernel version 5.7 or newer. package main import ( "fmt" "log" "net" "net/netip" "os" "strings" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" ) //go:generate go tool bpf2go -tags linux bpf xdp.c -- -I../headers func main() { if len(os.Args) < 2 { log.Fatalf("Please specify a network interface") } // Look up the network interface by name. ifaceName := os.Args[1] iface, err := net.InterfaceByName(ifaceName) if err != nil { log.Fatalf("lookup network iface %q: %s", ifaceName, err) } // Load pre-compiled programs into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %s", err) } defer objs.Close() // Attach the program. l, err := link.AttachXDP(link.XDPOptions{ Program: objs.XdpProgFunc, Interface: iface.Index, }) if err != nil { log.Fatalf("could not attach XDP program: %s", err) } defer l.Close() log.Printf("Attached XDP program to iface %q (index %d)", iface.Name, iface.Index) log.Printf("Press Ctrl-C to exit and remove the program") // Print the contents of the BPF hash map (source IP address -> packet count). ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() for range ticker.C { s, err := formatMapContents(objs.XdpStatsMap) if err != nil { log.Printf("Error reading map: %s", err) continue } log.Printf("Map contents:\n%s", s) } } func formatMapContents(m *ebpf.Map) (string, error) { var ( sb strings.Builder key netip.Addr val uint32 ) iter := m.Iterate() for iter.Next(&key, &val) { sourceIP := key // IPv4 source address in network byte order. packetCount := val sb.WriteString(fmt.Sprintf("\t%s => %d\n", sourceIP, packetCount)) } return sb.String(), iter.Err() } ================================================ FILE: examples/xdp/xdp.c ================================================ //go:build ignore #include "bpf_endian.h" #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; #define MAX_MAP_ENTRIES 16 /* Define an LRU hash map for storing packet count by source IPv4 address */ struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, MAX_MAP_ENTRIES); __type(key, __u32); // source IPv4 address __type(value, __u32); // packet count } xdp_stats_map SEC(".maps"); /* Attempt to parse the IPv4 source address from the packet. Returns 0 if there is no IPv4 header field; otherwise returns non-zero. */ static __always_inline int parse_ip_src_addr(struct xdp_md *ctx, __u32 *ip_src_addr) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; // First, parse the ethernet header. struct ethhdr *eth = data; if ((void *)(eth + 1) > data_end) { return 0; } if (eth->h_proto != bpf_htons(ETH_P_IP)) { // The protocol is not IPv4, so we can't parse an IPv4 source address. return 0; } // Then parse the IP header. struct iphdr *ip = (void *)(eth + 1); if ((void *)(ip + 1) > data_end) { return 0; } // Return the source IP address in network byte order. *ip_src_addr = (__u32)(ip->saddr); return 1; } SEC("xdp") int xdp_prog_func(struct xdp_md *ctx) { __u32 ip; if (!parse_ip_src_addr(ctx, &ip)) { // Not an IPv4 packet, so don't count it. goto done; } __u32 *pkt_count = bpf_map_lookup_elem(&xdp_stats_map, &ip); if (!pkt_count) { // No entry in the map for this IP address yet, so set the initial value to 1. __u32 init_pkt_count = 1; bpf_map_update_elem(&xdp_stats_map, &ip, &init_pkt_count, BPF_ANY); } else { // Entry already exists for this IP address, // so increment it atomically using an LLVM built-in. __sync_fetch_and_add(pkt_count, 1); } done: // Try changing this to XDP_DROP and see what happens! return XDP_PASS; } ================================================ FILE: examples/xdp_live_frame/bpf_bpfeb.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (mips || mips64 || ppc64 || s390x) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { XdpProgPass *ebpf.ProgramSpec `ebpf:"xdp_prog_pass"` XdpProgTx *ebpf.ProgramSpec `ebpf:"xdp_prog_tx"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { } func (m *bpfMaps) Close() error { return _BpfClose() } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { XdpProgPass *ebpf.Program `ebpf:"xdp_prog_pass"` XdpProgTx *ebpf.Program `ebpf:"xdp_prog_tx"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.XdpProgPass, p.XdpProgTx, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfeb.o var _BpfBytes []byte ================================================ FILE: examples/xdp_live_frame/bpf_bpfel.go ================================================ // Code generated by bpf2go; DO NOT EDIT. //go:build (386 || amd64 || arm || arm64 || loong64 || mips64le || mipsle || ppc64le || riscv64 || wasm) && linux package main import ( "bytes" _ "embed" "fmt" "io" "github.com/cilium/ebpf" ) // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) spec, err := ebpf.LoadCollectionSpecFromReader(reader) if err != nil { return nil, fmt.Errorf("can't load bpf: %w", err) } return spec, err } // loadBpfObjects loads bpf and converts it into a struct. // // The following types are suitable as obj argument: // // *bpfObjects // *bpfPrograms // *bpfMaps // // See ebpf.CollectionSpec.LoadAndAssign documentation for details. func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { spec, err := loadBpf() if err != nil { return err } return spec.LoadAndAssign(obj, opts) } // bpfSpecs contains maps and programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfSpecs struct { bpfProgramSpecs bpfMapSpecs bpfVariableSpecs } // bpfProgramSpecs contains programs before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { XdpProgPass *ebpf.ProgramSpec `ebpf:"xdp_prog_pass"` XdpProgTx *ebpf.ProgramSpec `ebpf:"xdp_prog_tx"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { } // bpfVariableSpecs contains global variables before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfVariableSpecs struct { } // bpfObjects contains all objects after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfObjects struct { bpfPrograms bpfMaps bpfVariables } func (o *bpfObjects) Close() error { return _BpfClose( &o.bpfPrograms, &o.bpfMaps, ) } // bpfMaps contains all maps after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { } func (m *bpfMaps) Close() error { return _BpfClose() } // bpfVariables contains all global variables after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfVariables struct { } // bpfPrograms contains all programs after they have been loaded into the kernel. // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { XdpProgPass *ebpf.Program `ebpf:"xdp_prog_pass"` XdpProgTx *ebpf.Program `ebpf:"xdp_prog_tx"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.XdpProgPass, p.XdpProgTx, ) } func _BpfClose(closers ...io.Closer) error { for _, closer := range closers { if err := closer.Close(); err != nil { return err } } return nil } // Do not access this directly. // //go:embed bpf_bpfel.o var _BpfBytes []byte ================================================ FILE: examples/xdp_live_frame/main.go ================================================ //go:build linux // This program demonstrates using BPF_F_TEST_XDP_LIVE_FRAMES to run an XDP // program in "live frame mode". In this mode, the kernel sends packets directly // to the network interface using the XDP program's return value (e.g., XDP_TX). // This is useful for high-performance packet generation and testing. // // Usage: go run . // // This example requires Linux kernel version 5.18 or newer. package main import ( "encoding/binary" "log" "net" "os" "strconv" "golang.org/x/sys/unix" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/link" ) //go:generate go tool bpf2go -tags linux bpf xdp.c -- -I../headers func main() { if len(os.Args) < 7 { log.Fatalf("Usage: %s ", os.Args[0]) } // Look up the network interface by name. ifaceName := os.Args[1] iface, err := net.InterfaceByName(ifaceName) if err != nil { log.Fatalf("lookup network iface %q: %s", ifaceName, err) } repeat, err := strconv.ParseUint(os.Args[2], 10, 32) if err != nil { log.Fatalf("parsing repeat count %q: %s", os.Args[2], err) } batchSize, err := strconv.ParseUint(os.Args[3], 10, 32) if err != nil { log.Fatalf("parsing batch size %q: %s", os.Args[3], err) } srcIP := net.ParseIP(os.Args[4]).To4() if srcIP == nil { log.Fatalf("invalid source IP address: %s", os.Args[4]) } dstIP := net.ParseIP(os.Args[5]).To4() if dstIP == nil { log.Fatalf("invalid destination IP address: %s", os.Args[5]) } dstMAC, err := net.ParseMAC(os.Args[6]) if err != nil { log.Fatalf("invalid destination MAC address %q: %s", os.Args[6], err) } // Load pre-compiled programs into the kernel. objs := bpfObjects{} if err := loadBpfObjects(&objs, nil); err != nil { log.Fatalf("loading objects: %s", err) } defer objs.Close() // Attach an XDP program to the interface first. // This is required for XDP_TX to work in live frame mode. l, err := link.AttachXDP(link.XDPOptions{ Program: objs.XdpProgPass, Interface: iface.Index, }) if err != nil { log.Fatalf("could not attach XDP program: %s", err) } defer l.Close() log.Printf("Attached XDP program to iface %q (index %d)", iface.Name, iface.Index) log.Printf("Running XDP program in live frame mode with Repeat: %d, BatchSize: %d", repeat, batchSize) log.Printf("Src MAC: %s, Dst MAC: %s", iface.HardwareAddr, dstMAC) log.Printf("Src IP: %s, Dst IP: %s", srcIP, dstIP) // Build a UDP packet with Ethernet header pkt := buildUDPPacket(iface.HardwareAddr, dstMAC, srcIP, dstIP, 12345, 9999, []byte("Hello, XDP!")) xdpmd := &sys.XdpMd{ DataEnd: uint32(len(pkt)), IngressIfindex: uint32(iface.Index), } ret, err := objs.XdpProgTx.Run(&ebpf.RunOptions{ Data: pkt, Repeat: uint32(repeat), Flags: unix.BPF_F_TEST_XDP_LIVE_FRAMES, Context: xdpmd, BatchSize: uint32(batchSize), }) if err != nil { log.Fatalf("running XDP program with BPF_F_TEST_XDP_LIVE_FRAMES: %s", err) } log.Printf("XDP program completed with return value: %d", ret) } // buildUDPPacket creates an Ethernet + IPv4 + UDP packet. func buildUDPPacket(srcMAC, dstMAC net.HardwareAddr, srcIP, dstIP net.IP, srcPort, dstPort uint16, payload []byte) []byte { // Ethernet header (14 bytes) eth := make([]byte, 14) copy(eth[0:6], dstMAC) copy(eth[6:12], srcMAC) binary.BigEndian.PutUint16(eth[12:14], 0x0800) // IPv4 // IPv4 header (20 bytes, no options) ipHeaderLen := 20 udpLen := 8 + len(payload) totalLen := ipHeaderLen + udpLen ip := make([]byte, ipHeaderLen) ip[0] = 0x45 // Version (4) + IHL (5) ip[1] = 0x00 // DSCP + ECN binary.BigEndian.PutUint16(ip[2:4], uint16(totalLen)) // Total length binary.BigEndian.PutUint16(ip[4:6], 0x0000) // Identification binary.BigEndian.PutUint16(ip[6:8], 0x4000) // Flags (Don't Fragment) + Fragment Offset ip[8] = 64 // TTL ip[9] = 17 // Protocol (UDP) // ip[10:12] = checksum (calculated below) copy(ip[12:16], srcIP) copy(ip[16:20], dstIP) // Calculate IP header checksum binary.BigEndian.PutUint16(ip[10:12], ipChecksum(ip)) // UDP header (8 bytes) udp := make([]byte, 8) binary.BigEndian.PutUint16(udp[0:2], srcPort) binary.BigEndian.PutUint16(udp[2:4], dstPort) binary.BigEndian.PutUint16(udp[4:6], uint16(udpLen)) // udp[6:8] = checksum (optional for IPv4, set to 0) // Combine all parts pkt := make([]byte, 0, 14+totalLen) pkt = append(pkt, eth...) pkt = append(pkt, ip...) pkt = append(pkt, udp...) pkt = append(pkt, payload...) return pkt } // ipChecksum calculates the IP header checksum. func ipChecksum(header []byte) uint16 { var sum uint32 for i := 0; i < len(header); i += 2 { sum += uint32(binary.BigEndian.Uint16(header[i : i+2])) } for sum > 0xffff { sum = (sum & 0xffff) + (sum >> 16) } return ^uint16(sum) } ================================================ FILE: examples/xdp_live_frame/xdp.c ================================================ //go:build ignore // XDP program for demonstrating BPF_F_TEST_XDP_LIVE_FRAMES. // When run in live frame mode, the provided packet data is sent directly // to the network interface based on the XDP program's return value. #include "bpf_endian.h" #include "common.h" char __license[] SEC("license") = "Dual MIT/GPL"; // xdp_prog_tx returns XDP_TX to transmit the packet back out the same interface. // This is used with BPF_F_TEST_XDP_LIVE_FRAMES for packet generation. SEC("xdp") int xdp_prog_tx(struct xdp_md *ctx) { return XDP_TX; } // xdp_prog_pass is attached to the interface to enable XDP_TX. // XDP_TX requires an XDP program to be attached to the target interface. SEC("xdp") int xdp_prog_pass(struct xdp_md *ctx) { return XDP_PASS; } ================================================ FILE: features/doc.go ================================================ // Package features allows probing for BPF features available to the calling process. // // In general, the error return values from feature probes in this package // all have the following semantics unless otherwise specified: // // err == nil: The feature is available. // errors.Is(err, ebpf.ErrNotSupported): The feature is not available. // err != nil: Any errors encountered during probe execution, wrapped. // // Note that the latter case may include false negatives, and that resource // creation may succeed despite an error being returned. For example, some // map and program types cannot reliably be probed and will return an // inconclusive error. // // As a rule, only `nil` and `ebpf.ErrNotSupported` are conclusive. // // Probe results are cached by the library and persist throughout any changes // to the process' environment, like capability changes. package features ================================================ FILE: features/link.go ================================================ package features import ( "errors" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // HaveBPFLinkUprobeMulti probes the running kernel if uprobe_multi link is supported. // // See the package documentation for the meaning of the error return value. func HaveBPFLinkUprobeMulti() error { return haveBPFLinkUprobeMulti() } var haveBPFLinkUprobeMulti = internal.NewFeatureTest("bpf_link_uprobe_multi", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "probe_upm_link", Type: ebpf.Kprobe, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, AttachType: ebpf.AttachTraceUprobeMulti, License: "MIT", }) if errors.Is(err, unix.E2BIG) { // Kernel doesn't support AttachType field. return ebpf.ErrNotSupported } if err != nil { return err } defer prog.Close() // We try to create uprobe multi link on '/' path which results in // error with -EBADF in case uprobe multi link is supported. fd, err := sys.LinkCreateUprobeMulti(&sys.LinkCreateUprobeMultiAttr{ ProgFd: uint32(prog.FD()), AttachType: sys.BPF_TRACE_UPROBE_MULTI, Path: sys.NewStringPointer("/"), Offsets: sys.SlicePointer([]uint64{0}), Count: 1, }) switch { case errors.Is(err, unix.EBADF): return nil case errors.Is(err, unix.EINVAL): return ebpf.ErrNotSupported case err != nil: return err } // should not happen fd.Close() return errors.New("successfully attached uprobe_multi to /, kernel bug?") }, "6.6") // HaveBPFLinkKprobeMulti probes the running kernel if kprobe_multi link is supported. // // See the package documentation for the meaning of the error return value. func HaveBPFLinkKprobeMulti() error { return haveBPFLinkKprobeMulti() } var haveBPFLinkKprobeMulti = internal.NewFeatureTest("bpf_link_kprobe_multi", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "probe_kpm_link", Type: ebpf.Kprobe, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, AttachType: ebpf.AttachTraceKprobeMulti, License: "MIT", }) if errors.Is(err, unix.E2BIG) { // Kernel doesn't support AttachType field. return ebpf.ErrNotSupported } if err != nil { return err } defer prog.Close() fd, err := sys.LinkCreateKprobeMulti(&sys.LinkCreateKprobeMultiAttr{ ProgFd: uint32(prog.FD()), AttachType: sys.BPF_TRACE_KPROBE_MULTI, Count: 1, Syms: sys.NewStringSlicePointer([]string{"vprintk"}), }) switch { case errors.Is(err, unix.EINVAL): return ebpf.ErrNotSupported // If CONFIG_FPROBE isn't set. case errors.Is(err, unix.EOPNOTSUPP): return ebpf.ErrNotSupported case err != nil: return err } fd.Close() return nil }, "5.18") // HaveBPFLinkKprobeSession probes the running kernel if kprobe_session link is supported. // // See the package documentation for the meaning of the error return value. func HaveBPFLinkKprobeSession() error { return haveBPFLinkKprobeSession() } var haveBPFLinkKprobeSession = internal.NewFeatureTest("bpf_link_kprobe_session", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "probe_kps_link", Type: ebpf.Kprobe, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, AttachType: ebpf.AttachTraceKprobeSession, License: "MIT", }) if errors.Is(err, unix.E2BIG) { // Kernel doesn't support AttachType field. return ebpf.ErrNotSupported } if err != nil { return err } defer prog.Close() fd, err := sys.LinkCreateKprobeMulti(&sys.LinkCreateKprobeMultiAttr{ ProgFd: uint32(prog.FD()), AttachType: sys.BPF_TRACE_KPROBE_SESSION, Count: 1, Syms: sys.NewStringSlicePointer([]string{"vprintk"}), }) switch { case errors.Is(err, unix.EINVAL): return ebpf.ErrNotSupported // If CONFIG_FPROBE isn't set. case errors.Is(err, unix.EOPNOTSUPP): return ebpf.ErrNotSupported case err != nil: return err } fd.Close() return nil }, "6.10") ================================================ FILE: features/link_test.go ================================================ package features import ( "testing" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveBPFLinkUprobeMulti(t *testing.T) { testutils.CheckFeatureTest(t, HaveBPFLinkUprobeMulti) } func TestHaveBPFLinkKprobeMulti(t *testing.T) { testutils.CheckFeatureTest(t, HaveBPFLinkKprobeMulti) } func TestHaveBPFLinkKprobeSession(t *testing.T) { testutils.CheckFeatureTest(t, HaveBPFLinkKprobeSession) } ================================================ FILE: features/map.go ================================================ package features import ( "errors" "fmt" "os" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // HaveMapType probes the running kernel for the availability of the specified map type. // // See the package documentation for the meaning of the error return value. func HaveMapType(mt ebpf.MapType) error { return haveMapTypeMatrix.Result(mt) } func probeCgroupStorageMap(mt sys.MapType) error { // keySize needs to be sizeof(struct{u32 + u64}) = 12 (+ padding = 16) // by using unsafe.Sizeof(int) we are making sure that this works on 32bit and 64bit archs return createMap(&sys.MapCreateAttr{ MapType: mt, ValueSize: 4, KeySize: uint32(8 + unsafe.Sizeof(int(0))), MaxEntries: 0, }) } func probeStorageMap(mt sys.MapType) error { // maxEntries needs to be 0 // BPF_F_NO_PREALLOC needs to be set // btf* fields need to be set // see alloc_check for local_storage map types err := createMap(&sys.MapCreateAttr{ MapType: mt, KeySize: 4, ValueSize: 4, MaxEntries: 0, MapFlags: sys.BPF_F_NO_PREALLOC, BtfKeyTypeId: 1, BtfValueTypeId: 1, BtfFd: ^uint32(0), }) if errors.Is(err, unix.EBADF) { // Triggered by BtfFd. return nil } return err } func probeNestedMap(mt sys.MapType) error { // assign invalid innerMapFd to pass validation check // will return EBADF err := probeMap(&sys.MapCreateAttr{ MapType: mt, InnerMapFd: ^uint32(0), }) if errors.Is(err, unix.EBADF) { return nil } return err } func probeMap(attr *sys.MapCreateAttr) error { if attr.KeySize == 0 { attr.KeySize = 4 } if attr.ValueSize == 0 { attr.ValueSize = 4 } attr.MaxEntries = 1 return createMap(attr) } func createMap(attr *sys.MapCreateAttr) error { fd, err := sys.MapCreate(attr) if err == nil { fd.Close() return nil } switch { // EINVAL occurs when attempting to create a map with an unknown type. // E2BIG occurs when MapCreateAttr contains non-zero bytes past the end // of the struct known by the running kernel, meaning the kernel is too old // to support the given map type. case errors.Is(err, unix.EINVAL), errors.Is(err, unix.E2BIG): return ebpf.ErrNotSupported } return err } var haveMapTypeMatrix = internal.FeatureMatrix[ebpf.MapType]{ ebpf.Hash: {Version: "3.19"}, ebpf.Array: {Version: "3.19"}, ebpf.ProgramArray: {Version: "4.2"}, ebpf.PerfEventArray: {Version: "4.3"}, ebpf.PerCPUHash: {Version: "4.6"}, ebpf.PerCPUArray: {Version: "4.6"}, ebpf.StackTrace: { Version: "4.6", Fn: func() error { return probeMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_STACK_TRACE, ValueSize: 8, // sizeof(uint64) }) }, }, ebpf.CGroupArray: {Version: "4.8"}, ebpf.LRUHash: {Version: "4.10"}, ebpf.LRUCPUHash: {Version: "4.10"}, ebpf.LPMTrie: { Version: "4.11", Fn: func() error { // keySize and valueSize need to be sizeof(struct{u32 + u8}) + 1 + padding = 8 // BPF_F_NO_PREALLOC needs to be set return probeMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_LPM_TRIE, KeySize: 8, ValueSize: 8, MapFlags: sys.BPF_F_NO_PREALLOC, }) }, }, ebpf.ArrayOfMaps: { Version: "4.12", Fn: func() error { return probeNestedMap(sys.BPF_MAP_TYPE_ARRAY_OF_MAPS) }, }, ebpf.HashOfMaps: { Version: "4.12", Fn: func() error { return probeNestedMap(sys.BPF_MAP_TYPE_HASH_OF_MAPS) }, }, ebpf.DevMap: {Version: "4.14"}, ebpf.SockMap: {Version: "4.14"}, ebpf.CPUMap: {Version: "4.15"}, ebpf.XSKMap: {Version: "4.18"}, ebpf.SockHash: {Version: "4.18"}, ebpf.CGroupStorage: { Version: "4.19", Fn: func() error { return probeCgroupStorageMap(sys.BPF_MAP_TYPE_CGROUP_STORAGE) }, }, ebpf.ReusePortSockArray: {Version: "4.19"}, ebpf.PerCPUCGroupStorage: { Version: "4.20", Fn: func() error { return probeCgroupStorageMap(sys.BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) }, }, ebpf.Queue: { Version: "4.20", Fn: func() error { return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_QUEUE, KeySize: 0, ValueSize: 4, MaxEntries: 1, }) }, }, ebpf.Stack: { Version: "4.20", Fn: func() error { return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_STACK, KeySize: 0, ValueSize: 4, MaxEntries: 1, }) }, }, ebpf.SkStorage: { Version: "5.2", Fn: func() error { return probeStorageMap(sys.BPF_MAP_TYPE_SK_STORAGE) }, }, ebpf.DevMapHash: {Version: "5.4"}, ebpf.StructOpsMap: { Version: "5.6", Fn: func() error { // StructOps requires setting a vmlinux type id, but id 1 will always // resolve to some type of integer. This will cause ENOTSUPP. err := probeMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_STRUCT_OPS, BtfVmlinuxValueTypeId: 1, }) if errors.Is(err, sys.ENOTSUPP) { // ENOTSUPP means the map type is at least known to the kernel. return nil } return err }, }, ebpf.RingBuf: { Version: "5.8", Fn: func() error { // keySize and valueSize need to be 0 // maxEntries needs to be power of 2 and PAGE_ALIGNED return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_RINGBUF, KeySize: 0, ValueSize: 0, MaxEntries: uint32(os.Getpagesize()), }) }, }, ebpf.InodeStorage: { Version: "5.10", Fn: func() error { return probeStorageMap(sys.BPF_MAP_TYPE_INODE_STORAGE) }, }, ebpf.TaskStorage: { Version: "5.11", Fn: func() error { return probeStorageMap(sys.BPF_MAP_TYPE_TASK_STORAGE) }, }, ebpf.BloomFilter: { Version: "5.16", Fn: func() error { return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_BLOOM_FILTER, KeySize: 0, ValueSize: 4, MaxEntries: 1, }) }, }, ebpf.UserRingbuf: { Version: "6.1", Fn: func() error { // keySize and valueSize need to be 0 // maxEntries needs to be power of 2 and PAGE_ALIGNED return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_USER_RINGBUF, KeySize: 0, ValueSize: 0, MaxEntries: uint32(os.Getpagesize()), }) }, }, ebpf.CgroupStorage: { Version: "6.2", Fn: func() error { return probeStorageMap(sys.BPF_MAP_TYPE_CGRP_STORAGE) }, }, ebpf.Arena: { Version: "6.9", Fn: func() error { return createMap(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_ARENA, KeySize: 0, ValueSize: 0, MaxEntries: 1, // one page MapExtra: 0, // can mmap() at any address MapFlags: sys.BPF_F_MMAPABLE, }) }, }, } func init() { for mt, ft := range haveMapTypeMatrix { ft.Name = mt.String() if ft.Fn == nil { // Avoid referring to the loop variable in the closure. mt := sys.MapType(mt) ft.Fn = func() error { return probeMap(&sys.MapCreateAttr{MapType: mt}) } } } } // MapFlags document which flags may be feature probed. type MapFlags uint32 // Flags which may be feature probed. const ( BPF_F_NO_PREALLOC = sys.BPF_F_NO_PREALLOC BPF_F_RDONLY_PROG = sys.BPF_F_RDONLY_PROG BPF_F_WRONLY_PROG = sys.BPF_F_WRONLY_PROG BPF_F_MMAPABLE = sys.BPF_F_MMAPABLE BPF_F_INNER_MAP = sys.BPF_F_INNER_MAP ) // HaveMapFlag probes the running kernel for the availability of the specified map flag. // // Returns an error if flag is not one of the flags declared in this package. // See the package documentation for the meaning of the error return value. func HaveMapFlag(flag MapFlags) (err error) { return haveMapFlagsMatrix.Result(flag) } func probeMapFlag(attr *sys.MapCreateAttr) error { // For now, we do not check if the map type is supported because we only support // probing for flags defined on arrays and hashes that are always supported. // In the future, if we allow probing on flags defined on newer types, checking for map type // support will be required. if attr.MapType == sys.BPF_MAP_TYPE_UNSPEC { attr.MapType = sys.BPF_MAP_TYPE_ARRAY } attr.KeySize = 4 attr.ValueSize = 4 attr.MaxEntries = 1 fd, err := sys.MapCreate(attr) if err == nil { fd.Close() } else if errors.Is(err, unix.EINVAL) { // EINVAL occurs when attempting to create a map with an unknown type or an unknown flag. err = ebpf.ErrNotSupported } return err } var haveMapFlagsMatrix = internal.FeatureMatrix[MapFlags]{ BPF_F_NO_PREALLOC: { Version: "4.6", Fn: func() error { return probeMapFlag(&sys.MapCreateAttr{ MapType: sys.BPF_MAP_TYPE_HASH, MapFlags: BPF_F_NO_PREALLOC, }) }, }, BPF_F_RDONLY_PROG: { Version: "5.2", Fn: func() error { return probeMapFlag(&sys.MapCreateAttr{ MapFlags: BPF_F_RDONLY_PROG, }) }, }, BPF_F_WRONLY_PROG: { Version: "5.2", Fn: func() error { return probeMapFlag(&sys.MapCreateAttr{ MapFlags: BPF_F_WRONLY_PROG, }) }, }, BPF_F_MMAPABLE: { Version: "5.5", Fn: func() error { return probeMapFlag(&sys.MapCreateAttr{ MapFlags: BPF_F_MMAPABLE, }) }, }, BPF_F_INNER_MAP: { Version: "5.10", Fn: func() error { return probeMapFlag(&sys.MapCreateAttr{ MapFlags: BPF_F_INNER_MAP, }) }, }, } func init() { for mf, ft := range haveMapFlagsMatrix { ft.Name = fmt.Sprint(mf) } } ================================================ FILE: features/map_test.go ================================================ package features import ( "errors" "math" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveMapType(t *testing.T) { testutils.CheckFeatureMatrix(t, haveMapTypeMatrix) } func TestHaveMapFlag(t *testing.T) { testutils.CheckFeatureMatrix(t, haveMapFlagsMatrix) } func TestHaveMapTypeInvalid(t *testing.T) { if err := HaveMapType(ebpf.MapType(math.MaxUint32)); err == nil { t.Fatal("Expected an error") } else if errors.Is(err, internal.ErrNotSupported) { t.Fatal("Got ErrNotSupported:", err) } } ================================================ FILE: features/misc.go ================================================ package features import ( "errors" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" ) // HaveLargeInstructions probes the running kernel if more than 4096 instructions // per program are supported. // // Upstream commit c04c0d2b968a ("bpf: increase complexity limit and maximum program size"). // // See the package documentation for the meaning of the error return value. func HaveLargeInstructions() error { return haveLargeInstructions() } var haveLargeInstructions = internal.NewFeatureTest(">4096 instructions", func() error { const maxInsns = 4096 insns := make(asm.Instructions, maxInsns, maxInsns+1) for i := range insns { insns[i] = asm.Mov.Imm(asm.R0, 1) } insns = append(insns, asm.Return()) return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SocketFilter, Instructions: insns, }) }, "5.2") // HaveBoundedLoops probes the running kernel if bounded loops are supported. // // Upstream commit 2589726d12a1 ("bpf: introduce bounded loops"). // // See the package documentation for the meaning of the error return value. func HaveBoundedLoops() error { return haveBoundedLoops() } var haveBoundedLoops = internal.NewFeatureTest("bounded loops", func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SocketFilter, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 10), asm.Sub.Imm(asm.R0, 1).WithSymbol("loop"), asm.JNE.Imm(asm.R0, 0, "loop"), asm.Return(), }, }) }, "5.3") // HaveV2ISA probes the running kernel if instructions of the v2 ISA are supported. // // Upstream commit 92b31a9af73b ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). // // See the package documentation for the meaning of the error return value. func HaveV2ISA() error { return haveV2ISA() } var haveV2ISA = internal.NewFeatureTest("v2 ISA", func() error { err := probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SocketFilter, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.JLT.Imm(asm.R0, 0, "exit"), asm.Mov.Imm(asm.R0, 1), asm.Return().WithSymbol("exit"), }, }) // This sometimes bubbles up from the JIT on aarch64. if errors.Is(err, sys.ENOTSUPP) { return ebpf.ErrNotSupported } return err }, "4.14") // HaveV3ISA probes the running kernel if instructions of the v3 ISA are supported. // // Upstream commit 092ed0968bb6 ("bpf: verifier support JMP32"). // // See the package documentation for the meaning of the error return value. func HaveV3ISA() error { return haveV3ISA() } var haveV3ISA = internal.NewFeatureTest("v3 ISA", func() error { err := probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SocketFilter, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.JLT.Imm32(asm.R0, 0, "exit"), asm.Mov.Imm(asm.R0, 1), asm.Return().WithSymbol("exit"), }, }) // This sometimes bubbles up from the JIT on aarch64. if errors.Is(err, sys.ENOTSUPP) { return ebpf.ErrNotSupported } return err }, "5.1") // HaveV4ISA probes the running kernel if instructions of the v4 ISA are supported. // // Upstream commit 1f9a1ea821ff ("bpf: Support new sign-extension load insns"). // // See the package documentation for the meaning of the error return value. func HaveV4ISA() error { return haveV4ISA() } var haveV4ISA = internal.NewFeatureTest("v4 ISA", func() error { err := probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SocketFilter, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.JEq.Imm(asm.R0, 1, "error"), asm.LongJump("exit"), asm.Mov.Imm(asm.R0, 1).WithSymbol("error"), asm.Return().WithSymbol("exit"), }, }) // This sometimes bubbles up from the JIT on aarch64. if errors.Is(err, sys.ENOTSUPP) { return ebpf.ErrNotSupported } return err }, "6.6") ================================================ FILE: features/misc_test.go ================================================ package features import ( "testing" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveLargeInstructions(t *testing.T) { testutils.CheckFeatureTest(t, HaveLargeInstructions) } func TestHaveBoundedLoops(t *testing.T) { testutils.CheckFeatureTest(t, HaveBoundedLoops) } func TestHaveV2ISA(t *testing.T) { testutils.CheckFeatureTest(t, HaveV2ISA) } func TestHaveV3ISA(t *testing.T) { testutils.CheckFeatureTest(t, HaveV3ISA) } func TestHaveV4ISA(t *testing.T) { testutils.CheckFeatureTest(t, HaveV4ISA) } ================================================ FILE: features/prog.go ================================================ package features import ( "errors" "fmt" "slices" "strings" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // HaveProgramType probes the running kernel for the availability of the specified program type. // // See the package documentation for the meaning of the error return value. func HaveProgramType(pt ebpf.ProgramType) (err error) { return haveProgramTypeMatrix.Result(pt) } func probeProgram(spec *ebpf.ProgramSpec) error { if spec.Instructions == nil { spec.Instructions = asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), } } prog, err := ebpf.NewProgramWithOptions(spec, ebpf.ProgramOptions{ LogDisabled: true, }) if err == nil { prog.Close() } switch { // EINVAL occurs when attempting to create a program with an unknown type. // E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end // of the struct known by the running kernel, meaning the kernel is too old // to support the given prog type. case errors.Is(err, unix.EINVAL), errors.Is(err, unix.E2BIG): err = ebpf.ErrNotSupported } return err } var haveProgramTypeMatrix = internal.FeatureMatrix[ebpf.ProgramType]{ ebpf.SocketFilter: {Version: "3.19"}, ebpf.Kprobe: {Version: "4.1"}, ebpf.SchedCLS: {Version: "4.1"}, ebpf.SchedACT: {Version: "4.1"}, ebpf.TracePoint: {Version: "4.7"}, ebpf.XDP: {Version: "4.8"}, ebpf.PerfEvent: {Version: "4.9"}, ebpf.CGroupSKB: {Version: "4.10"}, ebpf.CGroupSock: {Version: "4.10"}, ebpf.LWTIn: {Version: "4.10"}, ebpf.LWTOut: {Version: "4.10"}, ebpf.LWTXmit: {Version: "4.10"}, ebpf.SockOps: {Version: "4.13"}, ebpf.SkSKB: {Version: "4.14"}, ebpf.CGroupDevice: {Version: "4.15"}, ebpf.SkMsg: {Version: "4.17"}, ebpf.RawTracepoint: {Version: "4.17"}, ebpf.CGroupSockAddr: { Version: "4.17", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.CGroupSockAddr, AttachType: ebpf.AttachCGroupInet4Connect, }) }, }, ebpf.LWTSeg6Local: {Version: "4.18"}, ebpf.LircMode2: {Version: "4.18"}, ebpf.SkReuseport: {Version: "4.19"}, ebpf.FlowDissector: {Version: "4.20"}, ebpf.CGroupSysctl: {Version: "5.2"}, ebpf.RawTracepointWritable: {Version: "5.2"}, ebpf.CGroupSockopt: { Version: "5.3", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.CGroupSockopt, AttachType: ebpf.AttachCGroupGetsockopt, }) }, }, ebpf.Tracing: { Version: "5.5", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.Tracing, AttachType: ebpf.AttachTraceFEntry, AttachTo: "bpf_init", }) }, }, ebpf.StructOps: { Version: "5.6", Fn: func() error { err := probeProgram(&ebpf.ProgramSpec{ Type: ebpf.StructOps, License: "GPL", }) if errors.Is(err, sys.ENOTSUPP) { // ENOTSUPP means the program type is at least known to the kernel. return nil } return err }, }, ebpf.Extension: { Version: "5.6", Fn: func() error { // create btf.Func to add to first ins of target and extension so both progs are btf powered btfFn := btf.Func{ Name: "a", Type: &btf.FuncProto{ Return: &btf.Int{}, Params: []btf.FuncParam{ {Name: "ctx", Type: &btf.Pointer{Target: &btf.Struct{Name: "xdp_md"}}}, }, }, Linkage: btf.GlobalFunc, } insns := asm.Instructions{ btf.WithFuncMetadata(asm.Mov.Imm(asm.R0, 0), &btfFn), asm.Return(), } // create target prog prog, err := ebpf.NewProgramWithOptions( &ebpf.ProgramSpec{ Type: ebpf.XDP, Instructions: insns, }, ebpf.ProgramOptions{ LogDisabled: true, }, ) if err != nil { return err } defer prog.Close() // probe for Extension prog with target return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.Extension, Instructions: insns, AttachTarget: prog, AttachTo: btfFn.Name, }) }, }, ebpf.LSM: { Version: "5.7", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.LSM, AttachType: ebpf.AttachLSMMac, AttachTo: "file_mprotect", License: "GPL", }) }, }, ebpf.SkLookup: { Version: "5.9", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.SkLookup, AttachType: ebpf.AttachSkLookup, }) }, }, ebpf.Syscall: { Version: "5.14", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.Syscall, Flags: sys.BPF_F_SLEEPABLE, }) }, }, ebpf.Netfilter: { Version: "6.4", Fn: func() error { return probeProgram(&ebpf.ProgramSpec{ Type: ebpf.Netfilter, AttachType: ebpf.AttachNetfilter, }) }, }, } func init() { for key, ft := range haveProgramTypeMatrix { ft.Name = key.String() if ft.Fn == nil { key := key // avoid the dreaded loop variable problem ft.Fn = func() error { return probeProgram(&ebpf.ProgramSpec{Type: key}) } } } } type helperKey struct { typ ebpf.ProgramType helper asm.BuiltinFunc } var helperCache = internal.NewFeatureCache(func(key helperKey) *internal.FeatureTest { return &internal.FeatureTest{ Name: fmt.Sprintf("%s for program type %s", key.helper, key.typ), Fn: func() error { return haveProgramHelper(key.typ, key.helper) }, } }) // HaveProgramHelper probes the running kernel for the availability of the specified helper // function to a specified program type. // Return values have the following semantics: // // err == nil: The feature is available. // errors.Is(err, ebpf.ErrNotSupported): The feature is not available. // err != nil: Any errors encountered during probe execution, wrapped. // // Note that the latter case may include false negatives, and that program creation may // succeed despite an error being returned. // Only `nil` and `ebpf.ErrNotSupported` are conclusive. // // Probe results are cached and persist throughout any process capability changes. func HaveProgramHelper(pt ebpf.ProgramType, helper asm.BuiltinFunc) error { return helperCache.Result(helperKey{pt, helper}) } func haveProgramHelper(pt ebpf.ProgramType, helper asm.BuiltinFunc) error { if ok := helperProbeNotImplemented(pt); ok { return fmt.Errorf("no feature probe for %v/%v", pt, helper) } if err := HaveProgramType(pt); err != nil { return err } spec := &ebpf.ProgramSpec{ Type: pt, Instructions: asm.Instructions{ helper.Call(), asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "GPL", } switch pt { case ebpf.CGroupSockAddr: spec.AttachType = ebpf.AttachCGroupInet4Connect case ebpf.CGroupSockopt: spec.AttachType = ebpf.AttachCGroupGetsockopt case ebpf.SkLookup: spec.AttachType = ebpf.AttachSkLookup case ebpf.Syscall: spec.Flags = sys.BPF_F_SLEEPABLE case ebpf.Netfilter: spec.AttachType = ebpf.AttachNetfilter } prog, err := ebpf.NewProgramWithOptions(spec, ebpf.ProgramOptions{ LogLevel: 1, }) if err == nil { prog.Close() } var verr *ebpf.VerifierError if !errors.As(err, &verr) { return err } helperTag := fmt.Sprintf("#%d", helper) switch { // EACCES occurs when attempting to create a program probe with a helper // while the register args when calling this helper aren't set up properly. // We interpret this as the helper being available, because the verifier // returns EINVAL if the helper is not supported by the running kernel. case errors.Is(err, unix.EACCES): err = nil // EINVAL occurs when attempting to create a program with an unknown helper. case errors.Is(err, unix.EINVAL): // https://github.com/torvalds/linux/blob/09a0fa92e5b45e99cf435b2fbf5ebcf889cf8780/kernel/bpf/verifier.c#L10663 if logContainsAll(verr.Log, "invalid func", helperTag) { return ebpf.ErrNotSupported } // https://github.com/torvalds/linux/blob/09a0fa92e5b45e99cf435b2fbf5ebcf889cf8780/kernel/bpf/verifier.c#L10668 wrongProgramType := logContainsAll(verr.Log, "program of this type cannot use helper", helperTag) // https://github.com/torvalds/linux/blob/59b418c7063d30e0a3e1f592d47df096db83185c/kernel/bpf/verifier.c#L10204 // 4.9 doesn't include # in verifier output. wrongProgramType = wrongProgramType || logContainsAll(verr.Log, "unknown func") if wrongProgramType { return fmt.Errorf("program of this type cannot use helper: %w", ebpf.ErrNotSupported) } } return err } func logContainsAll(log []string, needles ...string) bool { first := max(len(log)-5, 0) // Check last 5 lines. return slices.ContainsFunc(log[first:], func(line string) bool { for _, needle := range needles { if !strings.Contains(line, needle) { return false } } return true }) } func helperProbeNotImplemented(pt ebpf.ProgramType) bool { switch pt { case ebpf.Extension, ebpf.LSM, ebpf.StructOps, ebpf.Tracing: return true } return false } ================================================ FILE: features/prog_test.go ================================================ package features import ( "errors" "fmt" "math" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" ) func TestMain(m *testing.M) { testmain.Run(m) } func TestHaveProgramType(t *testing.T) { testutils.CheckFeatureMatrix(t, haveProgramTypeMatrix) } func TestHaveProgramTypeInvalid(t *testing.T) { if err := HaveProgramType(ebpf.ProgramType(math.MaxUint32)); err == nil { t.Fatal("Expected an error") } else if errors.Is(err, internal.ErrNotSupported) { t.Fatal("Got ErrNotSupported:", err) } } func TestHaveProgramHelper(t *testing.T) { type testCase struct { prog ebpf.ProgramType helper asm.BuiltinFunc expected error version string } // Referencing linux kernel commits to track the kernel version required to pass these test cases. // These cases are derived from libbpf's selftests and helper/prog combinations that are // probed for in cilium/cilium. testCases := []testCase{ {ebpf.Kprobe, asm.FnMapLookupElem, nil, "3.19"}, // d0003ec01c66 {ebpf.SocketFilter, asm.FnKtimeGetCoarseNs, nil, "5.11"}, // d05512618056 {ebpf.SchedCLS, asm.FnSkbVlanPush, nil, "4.3"}, // 4e10df9a60d9 {ebpf.Kprobe, asm.FnSkbVlanPush, ebpf.ErrNotSupported, "4.3"}, // 4e10df9a60d9 {ebpf.Kprobe, asm.FnSysBpf, ebpf.ErrNotSupported, "5.14"}, // 79a7f8bdb159 {ebpf.Syscall, asm.FnSysBpf, nil, "5.14"}, // 79a7f8bdb159 {ebpf.XDP, asm.FnJiffies64, nil, "5.5"}, // 5576b991e9c1 {ebpf.XDP, asm.FnKtimeGetBootNs, nil, "5.7"}, // 71d19214776e {ebpf.SchedCLS, asm.FnSkbChangeHead, nil, "5.8"}, // 6f3f65d80dac {ebpf.SchedCLS, asm.FnRedirectNeigh, nil, "5.10"}, // b4ab31414970 {ebpf.SchedCLS, asm.FnSkbEcnSetCe, nil, "5.1"}, // f7c917ba11a6 {ebpf.SchedACT, asm.FnSkAssign, nil, "5.6"}, // cf7fbe660f2d {ebpf.SchedACT, asm.FnFibLookup, nil, "4.18"}, // 87f5fc7e48dd {ebpf.Kprobe, asm.FnFibLookup, ebpf.ErrNotSupported, "4.18"}, // 87f5fc7e48dd {ebpf.CGroupSockAddr, asm.FnGetsockopt, nil, "5.8"}, // beecf11bc218 {ebpf.CGroupSockAddr, asm.FnSkLookupTcp, nil, "4.20"}, // 6acc9b432e67 {ebpf.CGroupSockAddr, asm.FnGetNetnsCookie, nil, "5.7"}, // f318903c0bf4 {ebpf.CGroupSock, asm.FnGetNetnsCookie, nil, "5.7"}, // f318903c0bf4 {ebpf.Kprobe, asm.FnKtimeGetCoarseNs, ebpf.ErrNotSupported, "5.16"}, // 5e0bc3082e2e {ebpf.CGroupSockAddr, asm.FnGetCgroupClassid, nil, "5.7"}, // 5a52ae4e32a6 {ebpf.Kprobe, asm.FnGetBranchSnapshot, nil, "5.16"}, // 856c02dbce4f {ebpf.SchedCLS, asm.FnSkbSetTstamp, nil, "5.18"}, // 9bb984f28d5b {ebpf.CGroupSockopt, asm.FnSkStorageDelete, nil, "5.3"}, // 6ac99e8f23d4 {ebpf.SkLookup, asm.FnSkcToUdp6Sock, nil, "5.9"}, // 0d4fad3e57df {ebpf.Syscall, asm.FnSysClose, nil, "5.14"}, // 3abea089246f {ebpf.Netfilter, asm.FnCgrpStorageDelete, nil, "6.4"}, // c4bcfb38a95e } for _, tc := range testCases { t.Run(fmt.Sprintf("%s/%s", tc.prog.String(), tc.helper.String()), func(t *testing.T) { feature := fmt.Sprintf("helper %s for program type %s", tc.helper.String(), tc.prog.String()) testutils.SkipOnOldKernel(t, tc.version, feature) err := HaveProgramHelper(tc.prog, tc.helper) testutils.SkipIfNotSupportedOnOS(t, err) if !errors.Is(err, tc.expected) { t.Fatalf("%s/%s: %v", tc.prog.String(), tc.helper.String(), err) } }) } } func TestHelperProbeNotImplemented(t *testing.T) { // Currently we don't support probing helpers for Tracing, Extension, LSM and StructOps programs. // For each of those test the availability of the FnMapLookupElem helper and expect it to fail. for _, pt := range []ebpf.ProgramType{ebpf.Tracing, ebpf.Extension, ebpf.LSM, ebpf.StructOps} { t.Run(pt.String(), func(t *testing.T) { if err := HaveProgramHelper(pt, asm.FnMapLookupElem); err == nil { t.Fatal("Expected an error") } }) } } ================================================ FILE: features/version.go ================================================ package features import "github.com/cilium/ebpf/internal/linux" // LinuxVersionCode returns the version of the currently running kernel // as defined in the LINUX_VERSION_CODE compile-time macro. It is represented // in the format described by the KERNEL_VERSION macro from linux/version.h. // // Do not use the version to make assumptions about the presence of certain // kernel features, always prefer feature probes in this package. Some // distributions backport or disable eBPF features. func LinuxVersionCode() (uint32, error) { v, err := linux.KernelVersion() if err != nil { return 0, err } return v.Kernel(), nil } ================================================ FILE: fuzz_test.go ================================================ package ebpf import ( "bytes" "debug/elf" "testing" ) func FuzzLoadCollectionSpec(f *testing.F) { f.Add([]byte(elf.ELFMAG)) f.Fuzz(func(t *testing.T, data []byte) { if len(data) < len(elf.ELFMAG) { t.Skip("input can't be valid ELF") } spec, err := LoadCollectionSpecFromReader(bytes.NewReader(data)) if err != nil { if spec != nil { t.Fatal("spec is not nil") } } else if spec == nil { t.Fatal("spec is nil") } }) } ================================================ FILE: go.mod ================================================ module github.com/cilium/ebpf go 1.24.0 require ( github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6 github.com/google/go-cmp v0.7.0 github.com/jsimonetti/rtnetlink/v2 v2.0.1 golang.org/x/sync v0.17.0 golang.org/x/sys v0.37.0 ) require ( github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/docker/cli v29.2.0+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect github.com/google/go-containerregistry v0.20.6 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/native v1.1.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mdlayher/netlink v1.7.2 // indirect github.com/mdlayher/socket v0.5.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/cobra v1.9.1 // indirect github.com/spf13/pflag v1.0.7 // indirect github.com/stretchr/testify v1.10.0 // indirect github.com/vbatts/tar-split v0.12.1 // indirect golang.org/x/mod v0.29.0 // indirect golang.org/x/net v0.46.0 // indirect golang.org/x/tools v0.38.0 // indirect gotest.tools/v3 v3.5.0 // indirect ) tool ( github.com/cilium/ebpf/cmd/bpf2go github.com/cilium/ebpf/internal/cmd/gentypes github.com/google/go-containerregistry/cmd/crane golang.org/x/tools/cmd/stringer ) ================================================ FILE: go.sum ================================================ github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8= github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/cli v29.2.0+incompatible h1:9oBd9+YM7rxjZLfyMGxjraKBKE4/nVyvVfN4qNl9XRM= github.com/docker/cli v29.2.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6 h1:teYtXy9B7y5lHTp8V9KPxpYRAVA7dozigQcMiBust1s= github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6/go.mod h1:p4lGIVX+8Wa6ZPNDvqcxq36XpUDLh42FLetFU7odllI= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB31qAwjAohdSTU= github.com/google/go-containerregistry v0.20.6/go.mod h1:T0x8MuoAoKX/873bkeSfLD2FAkwCDf9/HZgsFJ02E2Y= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM= github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo= github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.0 h1:Ljk6PdHdOhAb5aDMWXjDLMMhph+BpztA4v1QdqEW2eY= gotest.tools/v3 v3.5.0/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= ================================================ FILE: helpers_test.go ================================================ package ebpf import ( "errors" "sync" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils" ) var haveTestmod = sync.OnceValues(func() (bool, error) { if platform.IsWindows { return false, nil } // See https://github.com/torvalds/linux/commit/290248a5b7d829871b3ea3c62578613a580a1744 testmod, err := btf.FindHandle(func(info *btf.HandleInfo) bool { return info.IsModule() && info.Name == "bpf_testmod" }) if err != nil && !errors.Is(err, btf.ErrNotFound) { return false, err } testmod.Close() return testmod != nil, nil }) var haveTestmodOps = sync.OnceValues(func() (bool, error) { haveTestMod, err := haveTestmod() if err != nil { return false, err } if !haveTestMod { return false, nil } target := btf.Type((*btf.Struct)(nil)) _, module, err := findTargetInKernel("bpf_struct_ops_bpf_testmod_ops", &target, btf.NewCache()) if err != nil && !errors.Is(err, btf.ErrNotFound) { return false, err } if errors.Is(err, btf.ErrNotFound) { return false, nil } defer module.Close() return true, nil }) func requireTestmod(tb testing.TB) { tb.Helper() testutils.SkipOnOldKernel(tb, "5.11", "bpf_testmod") testmod, err := haveTestmod() if err != nil { tb.Fatal(err) } if !testmod { tb.Skip("bpf_testmod not loaded") } } func requireTestmodOps(tb testing.TB) { tb.Helper() testutils.SkipOnOldKernel(tb, "5.11", "bpf_testmod") testmodOps, err := haveTestmodOps() if err != nil { tb.Fatal(err) } if !testmodOps { tb.Skip("bpf_testmod_ops not loaded") } } func newMap(tb testing.TB, spec *MapSpec, opts *MapOptions) (*Map, error) { tb.Helper() spec = fixupMapSpec(spec) if opts == nil { opts = new(MapOptions) } m, err := NewMapWithOptions(spec, *opts) testutils.SkipIfNotSupportedOnOS(tb, err) if err != nil { return nil, err } tb.Cleanup(func() { m.Close() }) return m, nil } func mustNewMap(tb testing.TB, spec *MapSpec, opts *MapOptions) *Map { tb.Helper() m, err := newMap(tb, spec, opts) qt.Assert(tb, qt.IsNil(err)) return m } func createMap(tb testing.TB, typ MapType, maxEntries uint32) *Map { tb.Helper() return mustNewMap(tb, &MapSpec{ Name: "test", Type: typ, KeySize: 4, ValueSize: 4, MaxEntries: maxEntries, }, nil) } func createMapInMap(tb testing.TB, outer, inner MapType) *Map { tb.Helper() return mustNewMap(tb, &MapSpec{ Type: outer, KeySize: 4, MaxEntries: 2, InnerMap: &MapSpec{ Type: inner, KeySize: 4, ValueSize: 4, MaxEntries: 2, }, }, nil) } func newProgram(tb testing.TB, spec *ProgramSpec, opts *ProgramOptions) (*Program, error) { tb.Helper() if opts == nil { opts = new(ProgramOptions) } spec = fixupProgramSpec(spec) prog, err := NewProgramWithOptions(spec, *opts) testutils.SkipIfNotSupportedOnOS(tb, err) if err != nil { return nil, err } tb.Cleanup(func() { prog.Close() }) return prog, nil } func mustNewProgram(tb testing.TB, spec *ProgramSpec, opts *ProgramOptions) *Program { tb.Helper() prog, err := newProgram(tb, spec, opts) qt.Assert(tb, qt.IsNil(err)) return prog } func createProgram(tb testing.TB, typ ProgramType, retval int64) *Program { tb.Helper() return mustNewProgram(tb, &ProgramSpec{ Name: "test", Type: typ, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, retval, asm.DWord), asm.Return(), }, License: "MIT", }, nil) } var basicProgramSpec = &ProgramSpec{ Name: "test", Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 2, asm.DWord), asm.Return(), }, License: "MIT", } // createBasicProgram returns a program of an unspecified type which returns // a non-zero value when executed. func createBasicProgram(tb testing.TB) *Program { return mustNewProgram(tb, basicProgramSpec, nil) } func newCollection(tb testing.TB, spec *CollectionSpec, opts *CollectionOptions) (*Collection, error) { tb.Helper() testutils.SkipNonNativeEndian(tb, spec.ByteOrder) spec = fixupCollectionSpec(spec) if opts == nil { opts = new(CollectionOptions) } c, err := NewCollectionWithOptions(spec, *opts) testutils.SkipIfNotSupportedOnOS(tb, err) if err != nil { return nil, err } tb.Cleanup(func() { c.Close() }) return c, nil } func mustNewCollection(tb testing.TB, spec *CollectionSpec, opts *CollectionOptions) *Collection { tb.Helper() c, err := newCollection(tb, spec, opts) qt.Assert(tb, qt.IsNil(err)) return c } func loadAndAssign(tb testing.TB, spec *CollectionSpec, to any, opts *CollectionOptions) error { tb.Helper() spec = fixupCollectionSpec(spec) err := spec.LoadAndAssign(to, opts) testutils.SkipIfNotSupported(tb, err) return err } func mustLoadAndAssign(tb testing.TB, spec *CollectionSpec, to any, opts *CollectionOptions) { qt.Assert(tb, qt.IsNil(loadAndAssign(tb, spec, to, opts))) } func mustRun(tb testing.TB, prog *Program, opts *RunOptions) (retval uint32) { tb.Helper() if opts == nil { opts = &RunOptions{} } if platform.IsLinux && opts.Data == nil { opts.Data = internal.EmptyBPFContext } if platform.IsWindows { switch prog.Type() { case WindowsSample: const minSampleContextLen = 32 if opts.Context == nil { opts.Context = make([]byte, minSampleContextLen) } } } ret, err := prog.Run(opts) testutils.SkipIfNotSupported(tb, err) qt.Assert(tb, qt.IsNil(err)) return ret } // The functions below translate Linux types to their Windows equivalents, if // possible. This allows running most tests on Windows without modification. func fixupMapType(typ MapType) MapType { if !platform.IsWindows { return typ } switch typ { case Array: return WindowsArray case Hash: return WindowsHash case ProgramArray: return WindowsProgramArray case PerCPUHash: return WindowsPerCPUHash case PerCPUArray: return WindowsPerCPUArray case LRUHash: return WindowsLRUHash case LRUCPUHash: return WindowsLRUCPUHash case ArrayOfMaps: return WindowsArrayOfMaps case HashOfMaps: return WindowsHashOfMaps case LPMTrie: return WindowsLPMTrie case Queue: return WindowsQueue case Stack: return WindowsStack case RingBuf: return WindowsRingBuf default: return typ } } func fixupMapSpec(spec *MapSpec) *MapSpec { if !platform.IsWindows { return spec } spec = spec.Copy() spec.Type = fixupMapType(spec.Type) if spec.InnerMap != nil { spec.InnerMap.Type = fixupMapType(spec.InnerMap.Type) } return spec } func fixupProgramType(typ ProgramType) ProgramType { if !platform.IsWindows { return typ } switch typ { case SocketFilter: return WindowsSample case XDP: return WindowsSample default: return typ } } func fixupProgramSpec(spec *ProgramSpec) *ProgramSpec { if !platform.IsWindows { return spec } spec = spec.Copy() spec.Type = fixupProgramType(spec.Type) for i, ins := range spec.Instructions { if ins.IsBuiltinCall() { switch asm.BuiltinFunc(ins.Constant) { case asm.FnMapUpdateElem: spec.Instructions[i].Constant = int64(asm.WindowsFnMapUpdateElem) case asm.FnMapLookupElem: spec.Instructions[i].Constant = int64(asm.WindowsFnMapLookupElem) case asm.FnTailCall: spec.Instructions[i].Constant = int64(asm.WindowsFnTailCall) } } } return spec } func fixupCollectionSpec(spec *CollectionSpec) *CollectionSpec { if !platform.IsWindows { return spec } spec = spec.Copy() for name := range spec.Maps { spec.Maps[name] = fixupMapSpec(spec.Maps[name]) } for name := range spec.Programs { spec.Programs[name] = fixupProgramSpec(spec.Programs[name]) } return spec } ================================================ FILE: info.go ================================================ package ebpf import ( "bufio" "bytes" "encoding/hex" "errors" "fmt" "io" "os" "reflect" "time" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // The *Info structs expose metadata about a program or map. Most // fields are exposed via a getter: // // func (*MapInfo) ID() (MapID, bool) // // This is because the metadata available changes based on kernel version. // The second boolean return value indicates whether a particular field is // available on the current kernel. // // Always add new metadata as such a getter, unless you can somehow get the // value of the field on all supported kernels. Also document which version // a particular field first appeared in. // // Some metadata is a buffer which needs additional parsing. In this case, // store the undecoded data in the Info struct and provide a getter which // decodes it when necessary. See ProgramInfo.Instructions for an example. // MapInfo describes a map. type MapInfo struct { // Type of the map. Type MapType // KeySize is the size of the map key in bytes. KeySize uint32 // ValueSize is the size of the map value in bytes. ValueSize uint32 // MaxEntries is the maximum number of entries the map can hold. Its meaning // is map-specific. MaxEntries uint32 // Flags used during map creation. Flags uint32 // Name as supplied by user space at load time. Available from 4.15. Name string id MapID btf btf.ID mapExtra uint64 memlock uint64 frozen bool } // minimalMapInfoFromFd queries the minimum information needed to create a Map // based on a file descriptor. This requires the map type, key/value sizes, // maxentries and flags. // // Does not fall back to fdinfo since the version gap between fdinfo (4.10) and // [sys.ObjInfo] (4.13) is small and both kernels are EOL since at least Nov // 2017. // // Requires at least Linux 4.13. func minimalMapInfoFromFd(fd *sys.FD) (*MapInfo, error) { var info sys.MapInfo if err := sys.ObjInfo(fd, &info); err != nil { return nil, fmt.Errorf("getting object info: %w", err) } typ, err := MapTypeForPlatform(platform.Native, info.Type) if err != nil { return nil, fmt.Errorf("map type: %w", err) } return &MapInfo{ Type: typ, KeySize: info.KeySize, ValueSize: info.ValueSize, MaxEntries: info.MaxEntries, Flags: uint32(info.MapFlags), Name: unix.ByteSliceToString(info.Name[:]), }, nil } // newMapInfoFromFd queries map information about the given fd. [sys.ObjInfo] is // attempted first, supplementing any missing values with information from // /proc/self/fdinfo. Ignores EINVAL from ObjInfo as well as ErrNotSupported // from reading fdinfo (indicating the file exists, but no fields of interest // were found). If both fail, an error is always returned. func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) { var info sys.MapInfo err1 := sys.ObjInfo(fd, &info) // EINVAL means the kernel doesn't support BPF_OBJ_GET_INFO_BY_FD. Continue // with fdinfo if that's the case. if err1 != nil && !errors.Is(err1, unix.EINVAL) { return nil, fmt.Errorf("getting object info: %w", err1) } typ, err := MapTypeForPlatform(platform.Native, info.Type) if err != nil { return nil, fmt.Errorf("map type: %w", err) } mi := &MapInfo{ typ, info.KeySize, info.ValueSize, info.MaxEntries, uint32(info.MapFlags), unix.ByteSliceToString(info.Name[:]), MapID(info.Id), btf.ID(info.BtfId), info.MapExtra, 0, false, } // Supplement OBJ_INFO with data from /proc/self/fdinfo. It contains fields // like memlock and frozen that are not present in OBJ_INFO. err2 := readMapInfoFromProc(fd, mi) if err2 != nil && !errors.Is(err2, ErrNotSupported) { return nil, fmt.Errorf("getting map info from fdinfo: %w", err2) } if err1 != nil && err2 != nil { return nil, fmt.Errorf("ObjInfo and fdinfo both failed: objinfo: %w, fdinfo: %w", err1, err2) } return mi, nil } // readMapInfoFromProc queries map information about the given fd from // /proc/self/fdinfo. It only writes data into fields that have a zero value. func readMapInfoFromProc(fd *sys.FD, mi *MapInfo) error { var mapType uint32 err := scanFdInfo(fd, map[string]interface{}{ "map_type": &mapType, "map_id": &mi.id, "key_size": &mi.KeySize, "value_size": &mi.ValueSize, "max_entries": &mi.MaxEntries, "map_flags": &mi.Flags, "map_extra": &mi.mapExtra, "memlock": &mi.memlock, "frozen": &mi.frozen, }) if err != nil { return err } if mi.Type == 0 { mi.Type, err = MapTypeForPlatform(platform.Linux, mapType) if err != nil { return fmt.Errorf("map type: %w", err) } } return nil } // ID returns the map ID. // // Available from 4.13. // // The bool return value indicates whether this optional field is available. func (mi *MapInfo) ID() (MapID, bool) { return mi.id, mi.id > 0 } // BTFID returns the BTF ID associated with the Map. // // The ID is only valid as long as the associated Map is kept alive. // Available from 4.18. // // The bool return value indicates whether this optional field is available and // populated. (The field may be available but not populated if the kernel // supports the field but the Map was loaded without BTF information.) func (mi *MapInfo) BTFID() (btf.ID, bool) { return mi.btf, mi.btf > 0 } // MapExtra returns an opaque field whose meaning is map-specific. // // Available from 5.16. // // The bool return value indicates whether this optional field is available and // populated, if it was specified during Map creation. func (mi *MapInfo) MapExtra() (uint64, bool) { return mi.mapExtra, mi.mapExtra > 0 } // Memlock returns an approximate number of bytes allocated to this map. // // Available from 4.10. // // The bool return value indicates whether this optional field is available. func (mi *MapInfo) Memlock() (uint64, bool) { return mi.memlock, mi.memlock > 0 } // Frozen indicates whether [Map.Freeze] was called on this map. If true, // modifications from user space are not allowed. // // Available from 5.2. Requires access to procfs. // // If the kernel doesn't support map freezing, this field will always be false. func (mi *MapInfo) Frozen() bool { return mi.frozen } // ProgramStats contains runtime statistics for a single [Program], returned by // [Program.Stats]. // // Will contain mostly zero values if the collection of statistics is not // enabled, see [EnableStats]. type ProgramStats struct { // Total accumulated runtime of the Program. // // Requires at least Linux 5.8. Runtime time.Duration // Total number of times the Program has executed. // // Requires at least Linux 5.8. RunCount uint64 // Total number of times the program was not executed due to recursion. This // can happen when another bpf program is already running on the cpu, when bpf // program execution is interrupted, for example. // // Requires at least Linux 5.12. RecursionMisses uint64 } func newProgramStatsFromFd(fd *sys.FD) (*ProgramStats, error) { var info sys.ProgInfo if err := sys.ObjInfo(fd, &info); err != nil { return nil, fmt.Errorf("getting program info: %w", err) } return &ProgramStats{ Runtime: time.Duration(info.RunTimeNs), RunCount: info.RunCnt, RecursionMisses: info.RecursionMisses, }, nil } // programJitedInfo holds information about JITed info of a program. type programJitedInfo struct { // ksyms holds the ksym addresses of the BPF program, including those of its // subprograms. // // Available from 4.18. ksyms []uint64 numKsyms uint32 // insns holds the JITed machine native instructions of the program, // including those of its subprograms. // // Available from 4.13. insns []byte numInsns uint32 // lineInfos holds the JITed line infos, which are kernel addresses. // // Available from 5.0. lineInfos []uint64 numLineInfos uint32 // lineInfoRecSize is the size of a single line info record. // // Available from 5.0. lineInfoRecSize uint32 // funcLens holds the insns length of each function. // // Available from 4.18. funcLens []uint32 numFuncLens uint32 } // ProgramInfo describes a Program's immutable metadata. For runtime statistics, // see [ProgramStats]. type ProgramInfo struct { Type ProgramType id ProgramID // Truncated hash of the BPF bytecode. Available from 4.13. Tag string // Name as supplied by user space at load time. Available from 4.15. Name string createdByUID uint32 haveCreatedByUID bool btf btf.ID loadTime time.Duration restricted bool maps []MapID insns []byte numInsns uint32 jitedSize uint32 verifiedInstructions uint32 jitedInfo programJitedInfo lineInfos []byte numLineInfos uint32 funcInfos []byte numFuncInfos uint32 memlock uint64 } // minimalProgramFromFd queries the minimum information needed to create a // Program based on a file descriptor, requiring at least the program type. // // Does not fall back to fdinfo since the version gap between fdinfo (4.10) and // [sys.ObjInfo] (4.13) is small and both kernels are EOL since at least Nov // 2017. // // Requires at least Linux 4.13. func minimalProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) { var info sys.ProgInfo if err := sys.ObjInfo(fd, &info); err != nil { return nil, fmt.Errorf("getting object info: %w", err) } typ, err := ProgramTypeForPlatform(platform.Native, info.Type) if err != nil { return nil, fmt.Errorf("program type: %w", err) } return &ProgramInfo{ Type: typ, Name: unix.ByteSliceToString(info.Name[:]), }, nil } // newProgramInfoFromFd queries program information about the given fd. // // [sys.ObjInfo] is attempted first, supplementing any missing values with // information from /proc/self/fdinfo. Ignores EINVAL from ObjInfo as well as // ErrNotSupported from reading fdinfo (indicating the file exists, but no // fields of interest were found). If both fail, an error is always returned. func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) { var info sys.ProgInfo err1 := sys.ObjInfo(fd, &info) // EINVAL means the kernel doesn't support BPF_OBJ_GET_INFO_BY_FD. Continue // with fdinfo if that's the case. if err1 != nil && !errors.Is(err1, unix.EINVAL) { return nil, fmt.Errorf("getting object info: %w", err1) } typ, err := ProgramTypeForPlatform(platform.Native, info.Type) if err != nil { return nil, fmt.Errorf("program type: %w", err) } pi := ProgramInfo{ Type: typ, id: ProgramID(info.Id), Tag: hex.EncodeToString(info.Tag[:]), Name: unix.ByteSliceToString(info.Name[:]), btf: btf.ID(info.BtfId), jitedSize: info.JitedProgLen, loadTime: time.Duration(info.LoadTime), verifiedInstructions: info.VerifiedInsns, numInsns: info.XlatedProgLen, } // Supplement OBJ_INFO with data from /proc/self/fdinfo. It contains fields // like memlock that is not present in OBJ_INFO. err2 := readProgramInfoFromProc(fd, &pi) if err2 != nil && !errors.Is(err2, ErrNotSupported) { return nil, fmt.Errorf("getting map info from fdinfo: %w", err2) } if err1 != nil && err2 != nil { return nil, fmt.Errorf("ObjInfo and fdinfo both failed: objinfo: %w, fdinfo: %w", err1, err2) } if platform.IsWindows && info.Tag == [8]uint8{} { // Windows doesn't support the tag field, clear it for now. pi.Tag = "" } // Start with a clean struct for the second call, otherwise we may get EFAULT. var info2 sys.ProgInfo makeSecondCall := false if info.NrMapIds > 0 { pi.maps = make([]MapID, info.NrMapIds) info2.NrMapIds = info.NrMapIds info2.MapIds = sys.SlicePointer(pi.maps) makeSecondCall = true } else if haveProgramInfoMapIDs() == nil { // This program really has no associated maps. pi.maps = make([]MapID, 0) } else { // The kernel doesn't report associated maps. pi.maps = nil } // createdByUID and NrMapIds were introduced in the same kernel version. if pi.maps != nil && platform.IsLinux { pi.createdByUID = info.CreatedByUid pi.haveCreatedByUID = true } if info.XlatedProgLen > 0 { pi.insns = make([]byte, info.XlatedProgLen) var info3 sys.ProgInfo info3.XlatedProgLen = info.XlatedProgLen info3.XlatedProgInsns = sys.SlicePointer(pi.insns) // When kernel.kptr_restrict and net.core.bpf_jit_harden are both set, it causes the // syscall to abort when trying to readback xlated instructions, skipping other info // as well. So request xlated instructions separately. if err := sys.ObjInfo(fd, &info3); err != nil { return nil, err } if info3.XlatedProgInsns.IsNil() { pi.restricted = true pi.insns = nil } } if info.NrLineInfo > 0 { pi.lineInfos = make([]byte, btf.LineInfoSize*info.NrLineInfo) info2.LineInfo = sys.SlicePointer(pi.lineInfos) info2.LineInfoRecSize = btf.LineInfoSize info2.NrLineInfo = info.NrLineInfo pi.numLineInfos = info.NrLineInfo makeSecondCall = true } if info.NrFuncInfo > 0 { pi.funcInfos = make([]byte, btf.FuncInfoSize*info.NrFuncInfo) info2.FuncInfo = sys.SlicePointer(pi.funcInfos) info2.FuncInfoRecSize = btf.FuncInfoSize info2.NrFuncInfo = info.NrFuncInfo pi.numFuncInfos = info.NrFuncInfo makeSecondCall = true } pi.jitedInfo.lineInfoRecSize = info.JitedLineInfoRecSize if info.JitedProgLen > 0 { pi.jitedInfo.numInsns = info.JitedProgLen pi.jitedInfo.insns = make([]byte, info.JitedProgLen) info2.JitedProgLen = info.JitedProgLen info2.JitedProgInsns = sys.SlicePointer(pi.jitedInfo.insns) makeSecondCall = true } if info.NrJitedFuncLens > 0 { pi.jitedInfo.numFuncLens = info.NrJitedFuncLens pi.jitedInfo.funcLens = make([]uint32, info.NrJitedFuncLens) info2.NrJitedFuncLens = info.NrJitedFuncLens info2.JitedFuncLens = sys.SlicePointer(pi.jitedInfo.funcLens) makeSecondCall = true } if info.NrJitedLineInfo > 0 { pi.jitedInfo.numLineInfos = info.NrJitedLineInfo pi.jitedInfo.lineInfos = make([]uint64, info.NrJitedLineInfo) info2.NrJitedLineInfo = info.NrJitedLineInfo info2.JitedLineInfo = sys.SlicePointer(pi.jitedInfo.lineInfos) info2.JitedLineInfoRecSize = info.JitedLineInfoRecSize makeSecondCall = true } if info.NrJitedKsyms > 0 { pi.jitedInfo.numKsyms = info.NrJitedKsyms pi.jitedInfo.ksyms = make([]uint64, info.NrJitedKsyms) info2.JitedKsyms = sys.SlicePointer(pi.jitedInfo.ksyms) info2.NrJitedKsyms = info.NrJitedKsyms makeSecondCall = true } if makeSecondCall { if err := sys.ObjInfo(fd, &info2); err != nil { return nil, err } if info.JitedProgLen > 0 && info2.JitedProgInsns.IsNil() { // JIT information is not available due to kernel.kptr_restrict pi.jitedInfo.lineInfos = nil pi.jitedInfo.ksyms = nil pi.jitedInfo.insns = nil pi.jitedInfo.funcLens = nil } } if len(pi.Name) == len(info.Name)-1 { // Possibly truncated, check BTF info for full name name, err := readNameFromFunc(&pi) if err == nil { pi.Name = name } // If an error occurs, keep the truncated name, which is better than none } return &pi, nil } func readNameFromFunc(pi *ProgramInfo) (string, error) { if pi.numFuncInfos == 0 { return "", errors.New("no function info") } spec, err := pi.btfSpec() if err != nil { return "", err } funcInfos, err := btf.LoadFuncInfos( bytes.NewReader(pi.funcInfos), internal.NativeEndian, pi.numFuncInfos, spec, ) if err != nil { return "", err } for _, funcInfo := range funcInfos { if funcInfo.Offset == 0 { // Information about the whole program return funcInfo.Func.Name, nil } } return "", errors.New("no function info about program") } func readProgramInfoFromProc(fd *sys.FD, pi *ProgramInfo) error { var progType uint32 err := scanFdInfo(fd, map[string]interface{}{ "prog_type": &progType, "prog_tag": &pi.Tag, "memlock": &pi.memlock, }) if errors.Is(err, ErrNotSupported) && !errors.Is(err, internal.ErrNotSupportedOnOS) { return &internal.UnsupportedFeatureError{ Name: "reading program info from /proc/self/fdinfo", MinimumVersion: internal.Version{4, 10, 0}, } } if err != nil { return err } pi.Type, err = ProgramTypeForPlatform(platform.Linux, progType) if err != nil { return fmt.Errorf("program type: %w", err) } return nil } // ID returns the program ID. // // Available from 4.13. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) ID() (ProgramID, bool) { return pi.id, pi.id > 0 } // CreatedByUID returns the Uid that created the program. // // Available from 4.15. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) CreatedByUID() (uint32, bool) { return pi.createdByUID, pi.haveCreatedByUID } // BTFID returns the BTF ID associated with the program. // // The ID is only valid as long as the associated program is kept alive. // Available from 5.0. // // The bool return value indicates whether this optional field is available and // populated. (The field may be available but not populated if the kernel // supports the field but the program was loaded without BTF information.) func (pi *ProgramInfo) BTFID() (btf.ID, bool) { return pi.btf, pi.btf > 0 } // btfSpec returns the BTF spec associated with the program. func (pi *ProgramInfo) btfSpec() (*btf.Spec, error) { id, ok := pi.BTFID() if !ok { return nil, fmt.Errorf("program created without BTF or unsupported kernel: %w", ErrNotSupported) } h, err := btf.NewHandleFromID(id) if err != nil { return nil, fmt.Errorf("get BTF handle: %w", err) } defer h.Close() spec, err := h.Spec(nil) if err != nil { return nil, fmt.Errorf("get BTF spec: %w", err) } return spec, nil } // ErrRestrictedKernel is returned when kernel address information is restricted // by kernel.kptr_restrict and/or net.core.bpf_jit_harden sysctls. var ErrRestrictedKernel = internal.ErrRestrictedKernel // LineInfos returns the BTF line information of the program. // // Available from 5.0. // // Returns an error wrapping [ErrRestrictedKernel] if line infos are restricted // by sysctls. // // Requires CAP_SYS_ADMIN or equivalent for reading BTF information. Returns // ErrNotSupported if the program was created without BTF or if the kernel // doesn't support the field. func (pi *ProgramInfo) LineInfos() (btf.LineOffsets, error) { if len(pi.lineInfos) == 0 { return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported) } spec, err := pi.btfSpec() if err != nil { return nil, err } return btf.LoadLineInfos( bytes.NewReader(pi.lineInfos), internal.NativeEndian, pi.numLineInfos, spec, ) } // Instructions returns the 'xlated' instruction stream of the program // after it has been verified and rewritten by the kernel. These instructions // cannot be loaded back into the kernel as-is, this is mainly used for // inspecting loaded programs for troubleshooting, dumping, etc. // // For example, map accesses are made to reference their kernel map IDs, // not the FDs they had when the program was inserted. Note that before // the introduction of bpf_insn_prepare_dump in kernel 4.16, xlated // instructions were not sanitized, making the output even less reusable // and less likely to round-trip or evaluate to the same program Tag. // // The first instruction is marked as a symbol using the Program's name. // // If available, the instructions will be annotated with metadata from the // BTF. This includes line information and function information. Reading // this metadata requires CAP_SYS_ADMIN or equivalent. If capability is // unavailable, the instructions will be returned without metadata. // // Returns an error wrapping [ErrRestrictedKernel] if instructions are // restricted by sysctls. // // Available from 4.13. Requires CAP_BPF or equivalent for plain instructions. // Requires CAP_SYS_ADMIN for instructions with metadata. func (pi *ProgramInfo) Instructions() (asm.Instructions, error) { if platform.IsWindows && len(pi.insns) == 0 { return nil, fmt.Errorf("read instructions: %w", internal.ErrNotSupportedOnOS) } if pi.restricted { return nil, fmt.Errorf("instructions: %w", ErrRestrictedKernel) } // If the calling process is not BPF-capable or if the kernel doesn't // support getting xlated instructions, the field will be zero. if len(pi.insns) == 0 { return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported) } r := bytes.NewReader(pi.insns) insns, err := asm.AppendInstructions(nil, r, internal.NativeEndian, platform.Native) if err != nil { return nil, fmt.Errorf("unmarshaling instructions: %w", err) } if pi.btf != 0 { btfh, err := btf.NewHandleFromID(pi.btf) if err != nil { // Getting a BTF handle requires CAP_SYS_ADMIN, if not available we get an -EPERM. // Ignore it and fall back to instructions without metadata. if !errors.Is(err, unix.EPERM) { return nil, fmt.Errorf("unable to get BTF handle: %w", err) } } // If we have a BTF handle, we can use it to assign metadata to the instructions. if btfh != nil { defer btfh.Close() spec, err := btfh.Spec(nil) if err != nil { return nil, fmt.Errorf("unable to get BTF spec: %w", err) } lineInfos, err := btf.LoadLineInfos(bytes.NewReader(pi.lineInfos), internal.NativeEndian, pi.numLineInfos, spec) if err != nil { return nil, fmt.Errorf("parse line info: %w", err) } funcInfos, err := btf.LoadFuncInfos(bytes.NewReader(pi.funcInfos), internal.NativeEndian, pi.numFuncInfos, spec) if err != nil { return nil, fmt.Errorf("parse func info: %w", err) } iter := insns.Iterate() for iter.Next() { assignMetadata(iter.Ins, iter.Offset, &funcInfos, &lineInfos, nil) } } } fn := btf.FuncMetadata(&insns[0]) name := pi.Name if fn != nil { name = fn.Name } insns[0] = insns[0].WithSymbol(name) return insns, nil } // JitedSize returns the size of the program's JIT-compiled machine code in // bytes, which is the actual code executed on the host's CPU. This field // requires the BPF JIT compiler to be enabled. // // Returns an error wrapping [ErrRestrictedKernel] if jited program size is // restricted by sysctls. // // Available from 4.13. Reading this metadata requires CAP_BPF or equivalent. func (pi *ProgramInfo) JitedSize() (uint32, error) { if pi.jitedSize == 0 { return 0, fmt.Errorf("insufficient permissions, unsupported kernel, or JIT compiler disabled: %w", ErrNotSupported) } return pi.jitedSize, nil } // TranslatedSize returns the size of the program's translated instructions in // bytes, after it has been verified and rewritten by the kernel. // // Available from 4.13. Reading this metadata requires CAP_BPF or equivalent. func (pi *ProgramInfo) TranslatedSize() (int, error) { if pi.numInsns == 0 { return 0, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported) } return int(pi.numInsns), nil } // MapIDs returns the maps related to the program. // // Available from 4.15. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) MapIDs() ([]MapID, bool) { return pi.maps, pi.maps != nil } // LoadTime returns when the program was loaded since boot time. // // Available from 4.15. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) LoadTime() (time.Duration, bool) { // loadTime and NrMapIds were introduced in the same kernel version. return pi.loadTime, pi.loadTime > 0 } // VerifiedInstructions returns the number verified instructions in the program. // // Available from 5.16. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) VerifiedInstructions() (uint32, bool) { return pi.verifiedInstructions, pi.verifiedInstructions > 0 } // JitedKsymAddrs returns the ksym addresses of the BPF program, including its // subprograms. The addresses correspond to their symbols in /proc/kallsyms. // // Available from 4.18. Note that before 5.x, this field can be empty for // programs without subprograms (bpf2bpf calls). // // The bool return value indicates whether this optional field is available. // // When a kernel address can't fit into uintptr (which is usually the case when // running 32 bit program on a 64 bit kernel), this returns an empty slice and // a false. func (pi *ProgramInfo) JitedKsymAddrs() ([]uintptr, bool) { ksyms := make([]uintptr, 0, len(pi.jitedInfo.ksyms)) if cap(ksyms) == 0 { return ksyms, false } // Check if a kernel address fits into uintptr (it might not when // using a 32 bit binary on a 64 bit kernel). This check should work // with any kernel address, since they have 1s at the highest bits. if a := pi.jitedInfo.ksyms[0]; uint64(uintptr(a)) != a { return nil, false } for _, ksym := range pi.jitedInfo.ksyms { ksyms = append(ksyms, uintptr(ksym)) } return ksyms, true } // JitedInsns returns the JITed machine native instructions of the program. // // Available from 4.13. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) JitedInsns() ([]byte, bool) { return pi.jitedInfo.insns, len(pi.jitedInfo.insns) > 0 } // JitedLineInfos returns the JITed line infos of the program. // // Available from 5.0. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) JitedLineInfos() ([]uint64, bool) { return pi.jitedInfo.lineInfos, len(pi.jitedInfo.lineInfos) > 0 } // JitedFuncLens returns the insns length of each function in the JITed program. // // Available from 4.18. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) JitedFuncLens() ([]uint32, bool) { return pi.jitedInfo.funcLens, len(pi.jitedInfo.funcLens) > 0 } // FuncInfos returns the offset and function information of all (sub)programs in // a BPF program. // // Available from 5.0. // // Returns an error wrapping [ErrRestrictedKernel] if function information is // restricted by sysctls. // // Requires CAP_SYS_ADMIN or equivalent for reading BTF information. Returns // ErrNotSupported if the program was created without BTF or if the kernel // doesn't support the field. func (pi *ProgramInfo) FuncInfos() (btf.FuncOffsets, error) { if len(pi.funcInfos) == 0 { return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported) } spec, err := pi.btfSpec() if err != nil { return nil, err } return btf.LoadFuncInfos( bytes.NewReader(pi.funcInfos), internal.NativeEndian, pi.numFuncInfos, spec, ) } // ProgramInfo returns an approximate number of bytes allocated to this program. // // Available from 4.10. // // The bool return value indicates whether this optional field is available. func (pi *ProgramInfo) Memlock() (uint64, bool) { return pi.memlock, pi.memlock > 0 } func scanFdInfo(fd *sys.FD, fields map[string]interface{}) error { if platform.IsWindows { return fmt.Errorf("read fdinfo: %w", internal.ErrNotSupportedOnOS) } fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", fd.Int())) if err != nil { return err } defer fh.Close() if err := scanFdInfoReader(fh, fields); err != nil { return fmt.Errorf("%s: %w", fh.Name(), err) } return nil } func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error { var ( scanner = bufio.NewScanner(r) scanned int reader bytes.Reader ) for scanner.Scan() { key, rest, found := bytes.Cut(scanner.Bytes(), []byte(":")) if !found { // Line doesn't contain a colon, skip. continue } field, ok := fields[string(key)] if !ok { continue } // If field already contains a non-zero value, don't overwrite it with fdinfo. if !zero(field) { scanned++ continue } // Cut the \t following the : as well as any potential trailing whitespace. rest = bytes.TrimSpace(rest) reader.Reset(rest) if n, err := fmt.Fscan(&reader, field); err != nil || n != 1 { return fmt.Errorf("can't parse field %s: %v", key, err) } scanned++ } if err := scanner.Err(); err != nil { return fmt.Errorf("scanning fdinfo: %w", err) } if len(fields) > 0 && scanned == 0 { return ErrNotSupported } return nil } func zero(arg any) bool { v := reflect.ValueOf(arg) // Unwrap pointers and interfaces. for v.Kind() == reflect.Pointer || v.Kind() == reflect.Interface { v = v.Elem() } return v.IsZero() } // EnableStats starts collecting runtime statistics of eBPF programs, like the // amount of program executions and the cumulative runtime. // // Specify a BPF_STATS_* constant to select which statistics to collect, like // [unix.BPF_STATS_RUN_TIME]. Closing the returned [io.Closer] will stop // collecting statistics. // // Collecting statistics may have a performance impact. // // Requires at least Linux 5.8. func EnableStats(which uint32) (io.Closer, error) { fd, err := sys.EnableStats(&sys.EnableStatsAttr{ Type: which, }) if err != nil { return nil, err } return fd, nil } var haveProgramInfoMapIDs = internal.NewFeatureTest("map IDs in program info", func() error { if platform.IsWindows { // We only support efW versions which have this feature, no need to probe. return nil } prog, err := progLoad(asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, SocketFilter, "MIT") if err != nil { return err } defer prog.Close() err = sys.ObjInfo(prog, &sys.ProgInfo{ // NB: Don't need to allocate MapIds since the program isn't using // any maps. NrMapIds: 1, }) if errors.Is(err, unix.EINVAL) { // Most likely the syscall doesn't exist. return internal.ErrNotSupported } if errors.Is(err, unix.E2BIG) { // We've hit check_uarg_tail_zero on older kernels. return internal.ErrNotSupported } return err }, "4.15", "windows:0.21.0") ================================================ FILE: info_test.go ================================================ package ebpf import ( "fmt" "os" "reflect" "runtime" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) var btfFn = &btf.Func{ Name: "_", Type: &btf.FuncProto{ Return: &btf.Int{Size: 16}, Params: []btf.FuncParam{}, }, Linkage: btf.StaticFunc, } var hashMapSpec = &MapSpec{ Type: Hash, KeySize: 4, ValueSize: 5, MaxEntries: 2, Flags: sys.BPF_F_NO_PREALLOC, } var multiprogSpec = &ProgramSpec{ Name: "test", Type: SocketFilter, Instructions: asm.Instructions{ btf.WithFuncMetadata(asm.LoadImm(asm.R0, 0, asm.DWord), btfFn). WithSource(asm.Comment("line info")), asm.Call.Label("fn"), asm.Return(), btf.WithFuncMetadata(asm.LoadImm(asm.R0, 0, asm.DWord), btfFn). WithSource(asm.Comment("line info")).WithSymbol("fn"), asm.Return(), }, License: "MIT", } func validateMapInfo(t *testing.T, info *MapInfo, spec *MapSpec) { t.Helper() qt.Assert(t, qt.Equals(info.Type, spec.Type)) qt.Assert(t, qt.Equals(info.KeySize, spec.KeySize)) qt.Assert(t, qt.Equals(info.ValueSize, spec.ValueSize)) qt.Assert(t, qt.Equals(info.MaxEntries, spec.MaxEntries)) qt.Assert(t, qt.Equals(info.Flags, spec.Flags)) memlock, _ := info.Memlock() qt.Assert(t, qt.Not(qt.Equals(memlock, 0))) } func TestMapInfo(t *testing.T) { m := mustNewMap(t, hashMapSpec, nil) info, err := m.Info() qt.Assert(t, qt.IsNil(err)) validateMapInfo(t, info, hashMapSpec) } func TestMapInfoFromProc(t *testing.T) { hash := mustNewMap(t, hashMapSpec, nil) var info MapInfo err := readMapInfoFromProc(hash.fd, &info) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) validateMapInfo(t, &info, hashMapSpec) } func TestMapInfoFromProcOuterMap(t *testing.T) { outer := &MapSpec{ Type: ArrayOfMaps, KeySize: 4, ValueSize: 4, MaxEntries: 2, InnerMap: &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 2, }, } m := mustNewMap(t, outer, nil) var info MapInfo err := readMapInfoFromProc(m.fd, &info) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) validateMapInfo(t, &info, outer) } func BenchmarkNewMapFromFD(b *testing.B) { b.ReportAllocs() m := mustNewMap(b, hashMapSpec, nil) for b.Loop() { if _, err := newMapFromFD(m.fd); err != nil { b.Fatal(err) } } } func BenchmarkMapInfo(b *testing.B) { b.ReportAllocs() m := mustNewMap(b, hashMapSpec, nil) for b.Loop() { if _, err := newMapInfoFromFd(m.fd); err != nil { b.Fatal(err) } } } func validateProgInfo(t *testing.T, spec *ProgramSpec, info *ProgramInfo) { t.Helper() qt.Assert(t, qt.Equals(info.Type, spec.Type)) if info.Tag != "" { qt.Assert(t, qt.SliceAny( []string{ "d7edec644f05498d", // SHA1, pre-6.18 "01e57aadad14352b", // SHA256 }, qt.F2(qt.Equals, info.Tag), )) } memlock, ok := info.Memlock() if ok { qt.Assert(t, qt.Equals(memlock, 4096)) } } func TestProgramInfo(t *testing.T) { spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(t, spec, nil) info, err := newProgramInfoFromFd(prog.fd) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) validateProgInfo(t, spec, info) id, ok := info.ID() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Not(qt.Equals(id, 0))) if testutils.IsVersionLessThan(t, "4.15", "windows:0.20") { qt.Assert(t, qt.Equals(info.Name, "")) } else { qt.Assert(t, qt.Equals(info.Name, "test")) } if jitedSize, err := info.JitedSize(); testutils.IsVersionLessThan(t, "4.13") { qt.Assert(t, qt.IsNotNil(err)) } else { qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(jitedSize > 0)) } if xlatedSize, err := info.TranslatedSize(); testutils.IsVersionLessThan(t, "4.13") { qt.Assert(t, qt.IsNotNil(err)) } else { qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(xlatedSize > 0)) } if uid, ok := info.CreatedByUID(); testutils.IsVersionLessThan(t, "4.15") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(uid, uint32(os.Getuid()))) } if loadTime, ok := info.LoadTime(); testutils.IsVersionLessThan(t, "4.15") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(loadTime > 0)) } if verifiedInsns, ok := info.VerifiedInstructions(); testutils.IsVersionLessThan(t, "5.16") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(verifiedInsns > 0)) } if insns, ok := info.JitedInsns(); testutils.IsVersionLessThan(t, "4.13") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(len(insns) > 0)) } } func BenchmarkNewProgramFromFD(b *testing.B) { b.ReportAllocs() spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(b, spec, nil) for b.Loop() { if _, err := newProgramFromFD(prog.fd); err != nil { b.Fatal(err) } } } func BenchmarkProgramInfo(b *testing.B) { b.ReportAllocs() spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(b, spec, nil) for b.Loop() { if _, err := newProgramInfoFromFd(prog.fd); err != nil { b.Fatal(err) } } } func TestProgramInfoProc(t *testing.T) { spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(t, spec, nil) var info ProgramInfo err := readProgramInfoFromProc(prog.fd, &info) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) validateProgInfo(t, spec, &info) } func TestProgramInfoBTF(t *testing.T) { prog, err := newProgram(t, multiprogSpec, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) info, err := prog.Info() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) // On kernels before 5.x, nr_jited_ksyms is not set for programs without subprogs. // It's included here since this test uses a bpf program with subprogs. if addrs, ok := info.JitedKsymAddrs(); testutils.IsVersionLessThan(t, "4.18") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(len(addrs) > 0)) } if lens, ok := info.JitedFuncLens(); testutils.IsVersionLessThan(t, "4.18") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(len(lens) > 0)) } if infos, ok := info.JitedLineInfos(); testutils.IsVersionLessThan(t, "5.0") { qt.Assert(t, qt.IsFalse(ok)) } else { qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsTrue(len(infos) > 0)) } if funcs, err := info.FuncInfos(); testutils.IsVersionLessThan(t, "5.0") { qt.Assert(t, qt.IsNotNil(err)) } else { qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.HasLen(funcs, 2)) qt.Assert(t, qt.ContentEquals(funcs[0].Func, btfFn)) qt.Assert(t, qt.ContentEquals(funcs[1].Func, btfFn)) } if lines, err := info.LineInfos(); testutils.IsVersionLessThan(t, "5.0") { qt.Assert(t, qt.IsNotNil(err)) } else { qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.HasLen(lines, 2)) qt.Assert(t, qt.Equals(lines[0].Line.Line(), "line info")) qt.Assert(t, qt.Equals(lines[1].Line.Line(), "line info")) } } func TestProgramInfoMapIDs(t *testing.T) { arr := createMap(t, Array, 1) prog := mustNewProgram(t, &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadMapPtr(asm.R0, arr.FD()), asm.LoadImm(asm.R0, 2, asm.DWord), asm.Return(), }, License: "MIT", }, nil) info, err := prog.Info() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) ids, ok := info.MapIDs() switch { case testutils.IsVersionLessThan(t, "4.15", "windows:0.20"): qt.Assert(t, qt.IsFalse(ok)) qt.Assert(t, qt.HasLen(ids, 0)) default: qt.Assert(t, qt.IsTrue(ok)) mapInfo, err := arr.Info() qt.Assert(t, qt.IsNil(err)) mapID, ok := mapInfo.ID() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.ContentEquals(ids, []MapID{mapID})) } } func TestProgramInfoMapIDsNoMaps(t *testing.T) { prog := createBasicProgram(t) info, err := prog.Info() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) ids, ok := info.MapIDs() switch { case testutils.IsVersionLessThan(t, "4.15", "windows:0.20"): qt.Assert(t, qt.IsFalse(ok)) qt.Assert(t, qt.HasLen(ids, 0)) default: qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.HasLen(ids, 0)) } } func TestScanFdInfoReader(t *testing.T) { tests := []struct { fields map[string]interface{} valid bool }{ {nil, true}, {map[string]interface{}{"foo": new(string)}, true}, {map[string]interface{}{"zap": new(string)}, false}, {map[string]interface{}{"foo": new(int)}, false}, } for _, test := range tests { err := scanFdInfoReader(strings.NewReader("foo:\tbar\n"), test.fields) if test.valid { if err != nil { t.Errorf("fields %v returns an error: %s", test.fields, err) } } else { if err == nil { t.Errorf("fields %v doesn't return an error", test.fields) } } } } func BenchmarkScanFdInfoReader(b *testing.B) { b.ReportAllocs() // Pathological case with 9 fields we're not interested in, and one // field we are, all the way at the very end. input := strings.Repeat("ignore:\tthis\n", 9) input += "foo:\tbar\n" r := strings.NewReader(input) var val string fields := map[string]any{"foo": &val} for b.Loop() { val = "" r.Reset(input) if err := scanFdInfoReader(r, fields); err != nil { b.Fatal(err) } if val != "bar" { b.Fatal("unexpected value:", val) } } } // TestProgramStats loads a BPF program once and executes back-to-back test runs // of the program. See testStats for details. func TestProgramStats(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF_ENABLE_STATS") prog := createBasicProgram(t) s, err := prog.Stats() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(s.RunCount, 0)) qt.Assert(t, qt.Equals(s.RecursionMisses, 0)) if runtime.GOARCH != "arm64" { // Runtime is flaky on arm64. qt.Assert(t, qt.Equals(s.Runtime, 0)) } if err := testStats(t, prog); err != nil { testutils.SkipIfNotSupportedOnOS(t, err) t.Error(err) } } // BenchmarkStats is a benchmark of TestStats. See testStats for details. func BenchmarkStats(b *testing.B) { b.ReportAllocs() testutils.SkipOnOldKernel(b, "5.8", "BPF_ENABLE_STATS") prog := createBasicProgram(b) for b.Loop() { if err := testStats(b, prog); err != nil { testutils.SkipIfNotSupportedOnOS(b, err) b.Fatal(err) } } } // testStats implements the behaviour under test for TestStats // and BenchmarkStats. First, a test run is executed with runtime statistics // enabled, followed by another with runtime stats disabled. Counters are only // expected to increase on the runs where runtime stats are enabled. // // Due to runtime behaviour on Go 1.14 and higher, the syscall backing // (*Program).Test() could be invoked multiple times for each call to Test(), // resulting in RunCount incrementing by more than one. Expecting RunCount to // be of a specific value after a call to Test() is therefore not possible. // See https://golang.org/doc/go1.14#runtime for more details. func testStats(tb testing.TB, prog *Program) error { tb.Helper() in := internal.EmptyBPFContext stats, err := EnableStats(uint32(sys.BPF_STATS_RUN_TIME)) if err != nil { return fmt.Errorf("failed to enable stats: %w", err) } defer stats.Close() // Program execution with runtime statistics enabled. // Should increase both runtime and run counter. mustRun(tb, prog, &RunOptions{Data: in}) s1, err := prog.Stats() qt.Assert(tb, qt.IsNil(err)) qt.Assert(tb, qt.Not(qt.Equals(s1.RunCount, 0)), qt.Commentf("expected run count to be at least 1 after first invocation")) qt.Assert(tb, qt.Not(qt.Equals(s1.Runtime, 0)), qt.Commentf("expected runtime to be at least 1ns after first invocation")) qt.Assert(tb, qt.IsNil(stats.Close())) // Second program execution, with runtime statistics gathering disabled. // Total runtime and run counters are not expected to increase. mustRun(tb, prog, &RunOptions{Data: in}) s2, err := prog.Stats() qt.Assert(tb, qt.IsNil(err)) qt.Assert(tb, qt.Equals(s2.RunCount, s1.RunCount), qt.Commentf("run count (%d) increased after first invocation (%d)", s2.RunCount, s1.RunCount)) qt.Assert(tb, qt.Equals(s2.Runtime, s1.Runtime), qt.Commentf("runtime (%d) increased after first invocation (%d)", s2.Runtime, s1.Runtime)) return nil } func TestHaveProgramInfoMapIDs(t *testing.T) { testutils.CheckFeatureTest(t, haveProgramInfoMapIDs) } func TestProgInfoExtBTF(t *testing.T) { testutils.SkipOnOldKernel(t, "5.0", "Program BTF (func/line_info)") spec, err := LoadCollectionSpec(testutils.NativeFile(t, "testdata/loader-%s.elf")) if err != nil { t.Fatal(err) } var obj struct { Main *Program `ebpf:"xdp_prog"` } err = loadAndAssign(t, spec, &obj, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer obj.Main.Close() info, err := obj.Main.Info() if err != nil { t.Fatal(err) } inst, err := info.Instructions() if err != nil { t.Fatal(err) } expectedLineInfoCount := 28 expectedFuncInfo := map[string]bool{ "xdp_prog": false, "static_fn": false, "global_fn2": false, "global_fn3": false, } lineInfoCount := 0 for _, ins := range inst { if ins.Source() != nil { lineInfoCount++ } fn := btf.FuncMetadata(&ins) if fn != nil { expectedFuncInfo[fn.Name] = true } } if lineInfoCount != expectedLineInfoCount { t.Errorf("expected %d line info entries, got %d", expectedLineInfoCount, lineInfoCount) } for fn, found := range expectedFuncInfo { if !found { t.Errorf("func %q not found", fn) } } } func TestInfoExportedFields(t *testing.T) { // It is highly unlikely that you should be adjusting the asserts below. // See the comment at the top of info.go for more information. var names []string for _, field := range reflect.VisibleFields(reflect.TypeOf(MapInfo{})) { if field.IsExported() { names = append(names, field.Name) } } qt.Assert(t, qt.ContentEquals(names, []string{ "Type", "KeySize", "ValueSize", "MaxEntries", "Flags", "Name", })) names = nil for _, field := range reflect.VisibleFields(reflect.TypeOf(ProgramInfo{})) { if field.IsExported() { names = append(names, field.Name) } } qt.Assert(t, qt.ContentEquals(names, []string{ "Type", "Tag", "Name", })) } func TestZero(t *testing.T) { var ( empty = "" nul uint32 = 0 one uint32 = 1 iempty any = "" inul any = uint32(0) ione any = uint32(1) ) qt.Assert(t, qt.IsTrue(zero(empty))) qt.Assert(t, qt.IsTrue(zero(nul))) qt.Assert(t, qt.IsFalse(zero(one))) qt.Assert(t, qt.IsTrue(zero(&empty))) qt.Assert(t, qt.IsTrue(zero(&nul))) qt.Assert(t, qt.IsFalse(zero(&one))) qt.Assert(t, qt.IsTrue(zero(iempty))) qt.Assert(t, qt.IsTrue(zero(inul))) qt.Assert(t, qt.IsFalse(zero(ione))) qt.Assert(t, qt.IsTrue(zero(&iempty))) qt.Assert(t, qt.IsTrue(zero(&inul))) qt.Assert(t, qt.IsFalse(zero(&ione))) } ================================================ FILE: internal/cmd/genfunctions.awk ================================================ #!/usr/bin/gawk -f # Generate constants from Linux headers. # # This script expects include/uapi/bpf.h as input. BEGIN { print "// Code generated by internal/cmd/genfunctions.awk; DO NOT EDIT." print "" print "package asm" print "" print "// Code in this file is derived from Linux, available under the GPL-2.0 WITH Linux-syscall-note." print "" print "import \"github.com/cilium/ebpf/internal/platform\"" print "" print "// Built-in functions (Linux)." print "const (" } /FN\([[:alnum:]_]+, [[:digit:]]+,.*\)/ { name = gensub(/.*FN\(([[:alnum:]_]+), [[:digit:]]+,.*\).*/, "\\1", 1) id = gensub(/.*FN\([[:alnum:]_]+, ([[:digit:]]+),.*\).*/, "\\1", 1) split(tolower(name), parts, "_") result = "Fn" for (i in parts) { part = parts[i] result = result substr(toupper(substr(part,1,1)), 1, 1) substr(part, 2) } print "\t" result " = BuiltinFunc(platform.LinuxTag | " id ")" } END { print ")" print "" } ================================================ FILE: internal/cmd/gensections.awk ================================================ #!/usr/bin/gawk -f # This script expects tools/lib/bpf/libbpf.c as input. function trim(str, left, right) { str = gensub("^[\t ]*" left, "", "g", str) return gensub(right "$", "", "g", str) } BEGIN { print "// Code generated by internal/cmd/gensections.awk; DO NOT EDIT." print "" print "package ebpf" print "" print "// Code in this file is derived from libbpf, available under BSD-2-Clause." print "" print "import \"github.com/cilium/ebpf/internal/sys\"" print "" print "var elfSectionDefs = []libbpfElfSectionDef{" FS="," } /\tSEC_DEF/ { pattern = trim(substr($1, 10)) prog_type = "sys.BPF_PROG_TYPE_" trim($2) attach_type = trim($3) attach_type = attach_type == "0" ? "0" : "sys." attach_type flags = trim($4, "", ")") flags = gensub("SEC_", "_SEC_", "g", flags) printf "\t{%s, %s, %s, %s},\n", pattern, prog_type, attach_type, flags; } END { print "}" print "" } ================================================ FILE: internal/cmd/genwinfunctions.awk ================================================ #!/usr/bin/gawk -f # Generate constants from eBPF for Windows headers. # # This script expects include/ebpf_structs.h as input. BEGIN { print "// Code generated by internal/cmd/genwinfunctions.awk; DO NOT EDIT." print "" print "package asm" print "" print "// Code in this file is derived from eBPF for Windows, available under the MIT License." print "" print "import \"github.com/cilium/ebpf/internal/platform\"" print "" print "// Built-in functions (Windows)." print "const (" } /BPF_FUNC_[[:alnum:]_]+ *= *[0-9]+,/ { name = gensub(/.*BPF_FUNC_([[:alnum:]_]+) *=.*/, "\\1", 1) id = gensub(/.*BPF_FUNC_[[:alnum:]_]+ *= *([0-9]+),.*/, "\\1", 1) split(tolower(name), parts, "_") result = "WindowsFn" for (i in parts) { part = parts[i] result = result substr(toupper(substr(part,1,1)), 1, 1) substr(part, 2) } print "\t" result " = BuiltinFunc(platform.WindowsTag | " id ")" } END { print ")" print "" } ================================================ FILE: internal/deque.go ================================================ package internal import "math/bits" // Deque implements a double ended queue. type Deque[T any] struct { elems []T read, write uint64 mask uint64 } // Reset clears the contents of the deque while retaining the backing buffer. func (dq *Deque[T]) Reset() { var zero T for i := dq.read; i < dq.write; i++ { dq.elems[i&dq.mask] = zero } dq.read, dq.write = 0, 0 } func (dq *Deque[T]) Empty() bool { return dq.read == dq.write } // Push adds an element to the end. func (dq *Deque[T]) Push(e T) { dq.Grow(1) dq.elems[dq.write&dq.mask] = e dq.write++ } // Shift returns the first element or the zero value. func (dq *Deque[T]) Shift() T { var zero T if dq.Empty() { return zero } index := dq.read & dq.mask t := dq.elems[index] dq.elems[index] = zero dq.read++ return t } // Pop returns the last element or the zero value. func (dq *Deque[T]) Pop() T { var zero T if dq.Empty() { return zero } dq.write-- index := dq.write & dq.mask t := dq.elems[index] dq.elems[index] = zero return t } // Grow the deque's capacity, if necessary, to guarantee space for another n // elements. func (dq *Deque[T]) Grow(n int) { have := dq.write - dq.read need := have + uint64(n) if need < have { panic("overflow") } if uint64(len(dq.elems)) >= need { return } // Round up to the new power of two which is at least 8. // See https://jameshfisher.com/2018/03/30/round-up-power-2/ capacity := max(1<<(64-bits.LeadingZeros64(need-1)), 8) elems := make([]T, have, capacity) pivot := dq.read & dq.mask copied := copy(elems, dq.elems[pivot:]) copy(elems[copied:], dq.elems[:pivot]) dq.elems = elems[:capacity] dq.mask = uint64(capacity) - 1 dq.read, dq.write = 0, have } ================================================ FILE: internal/deque_test.go ================================================ package internal import "testing" func TestDeque(t *testing.T) { t.Run("pop", func(t *testing.T) { var dq Deque[int] dq.Push(1) dq.Push(2) if dq.Pop() != 2 { t.Error("Didn't pop 2 first") } if dq.Pop() != 1 { t.Error("Didn't pop 1 second") } if dq.Pop() != 0 { t.Error("Didn't pop zero") } }) t.Run("shift", func(t *testing.T) { var td Deque[int] td.Push(1) td.Push(2) if td.Shift() != 1 { t.Error("Didn't shift 1 first") } if td.Shift() != 2 { t.Error("Didn't shift b second") } if td.Shift() != 0 { t.Error("Didn't shift zero") } }) t.Run("push", func(t *testing.T) { var td Deque[int] td.Push(1) td.Push(2) td.Shift() for i := 1; i <= 12; i++ { td.Push(i) } if td.Shift() != 2 { t.Error("Didn't shift 2 first") } for i := 1; i <= 12; i++ { if v := td.Shift(); v != i { t.Fatalf("Shifted %d at pos %d", v, i) } } }) t.Run("grow", func(t *testing.T) { var td Deque[int] td.Push(1) td.Push(2) td.Push(3) td.Shift() td.Grow(7) if len(td.elems) < 9 { t.Fatal("Expected at least 9 elements, got", len(td.elems)) } if cap(td.elems)&(cap(td.elems)-1) != 0 { t.Fatalf("Capacity %d is not a power of two", cap(td.elems)) } if td.Shift() != 2 || td.Shift() != 3 { t.Fatal("Elements don't match after grow") } }) } ================================================ FILE: internal/efw/enums.go ================================================ //go:build windows package efw import ( "syscall" "unsafe" "golang.org/x/sys/windows" ) /* Converts an attach type enum into a GUID. ebpf_result_t ebpf_get_ebpf_attach_type( bpf_attach_type_t bpf_attach_type, _Out_ ebpf_attach_type_t* ebpf_attach_type_t *ebpf_attach_type) */ var ebpfGetEbpfAttachTypeProc = newProc("ebpf_get_ebpf_attach_type") func EbpfGetEbpfAttachType(attachType uint32) (windows.GUID, error) { addr, err := ebpfGetEbpfAttachTypeProc.Find() if err != nil { return windows.GUID{}, err } var attachTypeGUID windows.GUID err = errorResult(syscall.SyscallN(addr, uintptr(attachType), uintptr(unsafe.Pointer(&attachTypeGUID)), )) return attachTypeGUID, err } /* Retrieve a program type given a GUID. bpf_prog_type_t ebpf_get_bpf_program_type(_In_ const ebpf_program_type_t* program_type) */ var ebpfGetBpfProgramTypeProc = newProc("ebpf_get_bpf_program_type") func EbpfGetBpfProgramType(programType windows.GUID) (uint32, error) { addr, err := ebpfGetBpfProgramTypeProc.Find() if err != nil { return 0, err } return uint32Result(syscall.SyscallN(addr, uintptr(unsafe.Pointer(&programType)))), nil } /* Retrieve an attach type given a GUID. bpf_attach_type_t ebpf_get_bpf_attach_type(_In_ const ebpf_attach_type_t* ebpf_attach_type) */ var ebpfGetBpfAttachTypeProc = newProc("ebpf_get_bpf_attach_type") func EbpfGetBpfAttachType(attachType windows.GUID) (uint32, error) { addr, err := ebpfGetBpfAttachTypeProc.Find() if err != nil { return 0, err } return uint32Result(syscall.SyscallN(addr, uintptr(unsafe.Pointer(&attachType)))), nil } ================================================ FILE: internal/efw/error_reporting.go ================================================ //go:build windows package efw import ( "errors" "fmt" "os" "syscall" "testing" "golang.org/x/sys/windows" ) func init() { if !testing.Testing() { return } if isDebuggerPresent() { return } if err := configureCRTErrorReporting(); err != nil { fmt.Fprintln(os.Stderr, "WARNING: Could not configure CRT error reporting, tests may hang:", err) } } var errErrorReportingAlreadyConfigured = errors.New("error reporting already configured") // Configure built-in error reporting of the C runtime library. // // The C runtime emits assertion failures into a graphical message box by default. // This causes a hang in CI environments. This function configures the CRT to // log to stderr instead. func configureCRTErrorReporting() error { const ucrtDebug = "ucrtbased.dll" // Constants from crtdbg.h // // See https://doxygen.reactos.org/da/d40/crt_2crtdbg_8h_source.html const ( _CRT_ERROR = 1 _CRT_ASSERT = 2 _CRTDBG_MODE_FILE = 0x1 _CRTDBG_MODE_WNDW = 0x4 _CRTDBG_HFILE_ERROR = -2 _CRTDBG_FILE_STDERR = -4 ) // Load the efW API to trigger loading the CRT. This may fail, in which case // we can't figure out which CRT is being used. // In that case we rely on the error bubbling up via some other path. _ = module.Load() ucrtHandle, err := syscall.UTF16PtrFromString(ucrtDebug) if err != nil { return err } var handle windows.Handle err = windows.GetModuleHandleEx(0, ucrtHandle, &handle) if errors.Is(err, windows.ERROR_MOD_NOT_FOUND) { // Loading the ebpf api did not pull in the debug UCRT, so there is // nothing to configure. return nil } else if err != nil { return err } defer windows.FreeLibrary(handle) setReportModeAddr, err := windows.GetProcAddress(handle, "_CrtSetReportMode") if err != nil { return err } setReportMode := func(reportType int, reportMode int) (int, error) { // See https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/crtsetreportmode?view=msvc-170 r1, _, err := syscall.SyscallN(setReportModeAddr, uintptr(reportType), uintptr(reportMode)) if int(r1) == -1 { return 0, fmt.Errorf("set report mode for type %d: %w", reportType, err) } return int(r1), nil } setReportFileAddr, err := windows.GetProcAddress(handle, "_CrtSetReportFile") if err != nil { return err } setReportFile := func(reportType int, reportFile int) (int, error) { // See https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/crtsetreportfile?view=msvc-170 r1, _, err := syscall.SyscallN(setReportFileAddr, uintptr(reportType), uintptr(reportFile)) if int(r1) == _CRTDBG_HFILE_ERROR { return 0, fmt.Errorf("set report file for type %d: %w", reportType, err) } return int(r1), nil } reportToFile := func(reportType, defaultMode int) error { oldMode, err := setReportMode(reportType, _CRTDBG_MODE_FILE) if err != nil { return err } if oldMode != defaultMode { // Attempt to restore old mode if it was different from the expected default. _, _ = setReportMode(reportType, oldMode) return errErrorReportingAlreadyConfigured } oldFile, err := setReportFile(reportType, _CRTDBG_FILE_STDERR) if err != nil { return err } if oldFile != -1 { // Attempt to restore old file if it was different from the expected default. _, _ = setReportFile(reportType, oldFile) return errErrorReportingAlreadyConfigured } return nil } // See https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/crtsetreportmode?view=msvc-170#remarks // for defaults. if err := reportToFile(_CRT_ASSERT, _CRTDBG_MODE_WNDW); err != nil { return err } if err := reportToFile(_CRT_ERROR, _CRTDBG_MODE_WNDW); err != nil { return err } return nil } // isDebuggerPresent returns true if the current process is being debugged. // // See https://learn.microsoft.com/en-us/windows/win32/api/debugapi/nf-debugapi-isdebuggerpresent func isDebuggerPresent() bool { kernel32Handle, err := windows.LoadLibrary("kernel32.dll") if err != nil { return false } isDebuggerPresentAddr, err := windows.GetProcAddress(kernel32Handle, "IsDebuggerPresent") if err != nil { return false } r1, _, _ := syscall.SyscallN(isDebuggerPresentAddr) return r1 != 0 } ================================================ FILE: internal/efw/error_reporting_test.go ================================================ //go:build windows package efw import ( "testing" "github.com/go-quicktest/qt" ) func TestConfigureErrorReporting(t *testing.T) { qt.Assert(t, qt.ErrorIs(configureCRTErrorReporting(), errErrorReportingAlreadyConfigured)) } func TestIsDebuggerPresent(t *testing.T) { qt.Assert(t, qt.IsFalse(isDebuggerPresent())) } ================================================ FILE: internal/efw/fd.go ================================================ //go:build windows package efw import ( "syscall" "unsafe" ) // ebpf_result_t ebpf_close_fd(fd_t fd) var ebpfCloseFdProc = newProc("ebpf_close_fd") func EbpfCloseFd(fd int) error { addr, err := ebpfCloseFdProc.Find() if err != nil { return err } return errorResult(syscall.SyscallN(addr, uintptr(fd))) } // ebpf_result_t ebpf_duplicate_fd(fd_t fd, _Out_ fd_t* dup) var ebpfDuplicateFdProc = newProc("ebpf_duplicate_fd") func EbpfDuplicateFd(fd int) (int, error) { addr, err := ebpfDuplicateFdProc.Find() if err != nil { return -1, err } var dup FD err = errorResult(syscall.SyscallN(addr, uintptr(fd), uintptr(unsafe.Pointer(&dup)))) return int(dup), err } ================================================ FILE: internal/efw/map.go ================================================ //go:build windows package efw import ( "runtime" "syscall" "unsafe" "golang.org/x/sys/windows" ) /* ebpf_ring_buffer_map_map_buffer( fd_t map_fd, _Outptr_result_maybenull_ void** consumer, _Outptr_result_maybenull_ const void** producer, _Outptr_result_buffer_maybenull_(*data_size) const uint8_t** data, _Out_ size_t* data_size) EBPF_NO_EXCEPT; */ var ebpfRingBufferMapMapBufferProc = newProc("ebpf_ring_buffer_map_map_buffer") func EbpfRingBufferMapMapBuffer(mapFd int) (consumer, producer, data *uint8, dataLen Size, _ error) { addr, err := ebpfRingBufferMapMapBufferProc.Find() if err != nil { return nil, nil, nil, 0, err } err = errorResult(syscall.SyscallN(addr, uintptr(mapFd), uintptr(unsafe.Pointer(&consumer)), uintptr(unsafe.Pointer(&producer)), uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&dataLen)), )) if err != nil { return nil, nil, nil, 0, err } return consumer, producer, data, dataLen, nil } /* ebpf_ring_buffer_map_unmap_buffer( fd_t map_fd, _In_ void* consumer, _In_ const void* producer, _In_ const void* data) EBPF_NO_EXCEPT; */ var ebpfRingBufferMapUnmapBufferProc = newProc("ebpf_ring_buffer_map_unmap_buffer") func EbpfRingBufferMapUnmapBuffer(mapFd int, consumer, producer, data *uint8) error { addr, err := ebpfRingBufferMapUnmapBufferProc.Find() if err != nil { return err } return errorResult(syscall.SyscallN(addr, uintptr(mapFd), uintptr(unsafe.Pointer(consumer)), uintptr(unsafe.Pointer(producer)), uintptr(unsafe.Pointer(data)), )) } /* ebpf_result_t ebpf_map_set_wait_handle( fd_t map_fd, uint64_t index, ebpf_handle_t handle) */ var ebpfMapSetWaitHandleProc = newProc("ebpf_map_set_wait_handle") func EbpfMapSetWaitHandle(mapFd int, index uint64, handle windows.Handle) error { addr, err := ebpfMapSetWaitHandleProc.Find() if err != nil { return err } return errorResult(syscall.SyscallN(addr, uintptr(mapFd), uintptr(index), uintptr(handle), )) } /* ebpf_result_t ebpf_ring_buffer_map_write( fd_t ring_buffer_map_fd, const void* data, size_t data_length) */ var ebpfRingBufferMapWriteProc = newProc("ebpf_ring_buffer_map_write") func EbpfRingBufferMapWrite(ringBufferMapFd int, data []byte) error { addr, err := ebpfRingBufferMapWriteProc.Find() if err != nil { return err } err = errorResult(syscall.SyscallN(addr, uintptr(ringBufferMapFd), uintptr(unsafe.Pointer(&data[0])), uintptr(len(data)), )) runtime.KeepAlive(data) return err } ================================================ FILE: internal/efw/module.go ================================================ //go:build windows // Package efw contains support code for eBPF for Windows. package efw import ( "golang.org/x/sys/windows" ) // module is the global handle for the eBPF for Windows user-space API. var module = windows.NewLazyDLL("ebpfapi.dll") // FD is the equivalent of fd_t. // // See https://github.com/microsoft/ebpf-for-windows/blob/54632eb360c560ebef2f173be1a4a4625d540744/include/ebpf_api.h#L24 type FD int32 // Size is the equivalent of size_t. // // This is correct on amd64 and arm64 according to tests on godbolt.org. type Size uint64 // Int is the equivalent of int on MSVC (am64, arm64) and MinGW (gcc, clang). type Int int32 // ObjectType is the equivalent of ebpf_object_type_t. // // See https://github.com/microsoft/ebpf-for-windows/blob/44f5de09ec0f3f7ad176c00a290c1cb7106cdd5e/include/ebpf_core_structs.h#L41 type ObjectType uint32 const ( EBPF_OBJECT_UNKNOWN ObjectType = iota EBPF_OBJECT_MAP EBPF_OBJECT_LINK EBPF_OBJECT_PROGRAM ) ================================================ FILE: internal/efw/native.go ================================================ //go:build windows package efw import ( "syscall" "unsafe" "golang.org/x/sys/windows" ) /* ebpf_result_t ebpf_object_load_native_by_fds( _In_z_ const char* file_name, _Inout_ size_t* count_of_maps, _Out_writes_opt_(count_of_maps) fd_t* map_fds, _Inout_ size_t* count_of_programs, _Out_writes_opt_(count_of_programs) fd_t* program_fds) */ var ebpfObjectLoadNativeByFdsProc = newProc("ebpf_object_load_native_by_fds") func EbpfObjectLoadNativeFds(fileName string, mapFds []FD, programFds []FD) (int, int, error) { addr, err := ebpfObjectLoadNativeByFdsProc.Find() if err != nil { return 0, 0, err } fileBytes, err := windows.ByteSliceFromString(fileName) if err != nil { return 0, 0, err } countOfMaps := Size(len(mapFds)) countOfPrograms := Size(len(programFds)) err = errorResult(syscall.SyscallN(addr, uintptr(unsafe.Pointer(&fileBytes[0])), uintptr(unsafe.Pointer(&countOfMaps)), uintptr(unsafe.Pointer(&mapFds[0])), uintptr(unsafe.Pointer(&countOfPrograms)), uintptr(unsafe.Pointer(&programFds[0])), )) return int(countOfMaps), int(countOfPrograms), err } ================================================ FILE: internal/efw/object.go ================================================ //go:build windows package efw import ( "syscall" "unsafe" "golang.org/x/sys/windows" ) // https://github.com/microsoft/ebpf-for-windows/blob/9d9003c39c3fd75be5225ac0fce30077d6bf0604/include/ebpf_core_structs.h#L15 const _EBPF_MAX_PIN_PATH_LENGTH = 256 /* Retrieve object info and type from a fd. ebpf_result_t ebpf_object_get_info_by_fd( fd_t bpf_fd, _Inout_updates_bytes_to_opt_(*info_size, *info_size) void* info, _Inout_opt_ uint32_t* info_size, _Out_opt_ ebpf_object_type_t* type) */ var ebpfObjectGetInfoByFdProc = newProc("ebpf_object_get_info_by_fd") func EbpfObjectGetInfoByFd(fd int, info unsafe.Pointer, info_size *uint32) (ObjectType, error) { addr, err := ebpfObjectGetInfoByFdProc.Find() if err != nil { return 0, err } var objectType ObjectType err = errorResult(syscall.SyscallN(addr, uintptr(fd), uintptr(info), uintptr(unsafe.Pointer(info_size)), uintptr(unsafe.Pointer(&objectType)), )) return objectType, err } // ebpf_result_t ebpf_object_unpin(_In_z_ const char* path) var ebpfObjectUnpinProc = newProc("ebpf_object_unpin") func EbpfObjectUnpin(path string) error { addr, err := ebpfObjectUnpinProc.Find() if err != nil { return err } pathBytes, err := windows.ByteSliceFromString(path) if err != nil { return err } return errorResult(syscall.SyscallN(addr, uintptr(unsafe.Pointer(&pathBytes[0])))) } /* Retrieve the next pinned object path. ebpf_result_t ebpf_get_next_pinned_object_path( _In_opt_z_ const char* start_path, _Out_writes_z_(next_path_len) char* next_path, size_t next_path_len, _Inout_opt_ ebpf_object_type_t* type) */ var ebpfGetNextPinnedObjectPath = newProc("ebpf_get_next_pinned_object_path") func EbpfGetNextPinnedObjectPath(startPath string, objectType ObjectType) (string, ObjectType, error) { addr, err := ebpfGetNextPinnedObjectPath.Find() if err != nil { return "", 0, err } ptr, err := windows.BytePtrFromString(startPath) if err != nil { return "", 0, err } tmp := make([]byte, _EBPF_MAX_PIN_PATH_LENGTH) err = errorResult(syscall.SyscallN(addr, uintptr(unsafe.Pointer(ptr)), uintptr(unsafe.Pointer(&tmp[0])), uintptr(len(tmp)), uintptr(unsafe.Pointer(&objectType)), )) return windows.ByteSliceToString(tmp), objectType, err } /* Canonicalize a path using filesystem canonicalization rules. _Must_inspect_result_ ebpf_result_t ebpf_canonicalize_pin_path(_Out_writes_(output_size) char* output, size_t output_size, _In_z_ const char* input) */ var ebpfCanonicalizePinPath = newProc("ebpf_canonicalize_pin_path") func EbpfCanonicalizePinPath(input string) (string, error) { addr, err := ebpfCanonicalizePinPath.Find() if err != nil { return "", err } inputBytes, err := windows.ByteSliceFromString(input) if err != nil { return "", err } output := make([]byte, _EBPF_MAX_PIN_PATH_LENGTH) err = errorResult(syscall.SyscallN(addr, uintptr(unsafe.Pointer(&output[0])), uintptr(len(output)), uintptr(unsafe.Pointer(&inputBytes[0])), )) return windows.ByteSliceToString(output), err } ================================================ FILE: internal/efw/proc.go ================================================ //go:build windows package efw import ( "errors" "fmt" "syscall" "golang.org/x/sys/windows" ) /* The BPF syscall wrapper which is ABI compatible with Linux. int bpf(int cmd, union bpf_attr* attr, unsigned int size) */ var BPF = newProc("bpf") type proc struct { proc *windows.LazyProc } func newProc(name string) proc { return proc{module.NewProc(name)} } func (p proc) Find() (uintptr, error) { if err := p.proc.Find(); err != nil { if errors.Is(err, windows.ERROR_MOD_NOT_FOUND) { return 0, fmt.Errorf("load %s: not found", module.Name) } return 0, err } return p.proc.Addr(), nil } // uint32Result wraps a function which returns a uint32_t. func uint32Result(r1, _ uintptr, _ syscall.Errno) uint32 { return uint32(r1) } // errorResult wraps a function which returns ebpf_result_t. func errorResult(r1, _ uintptr, errNo syscall.Errno) error { err := resultToError(Result(r1)) if err != nil && errNo != 0 { return fmt.Errorf("%w (errno: %v)", err, errNo) } return err } ================================================ FILE: internal/efw/proc_test.go ================================================ //go:build windows package efw import ( "testing" "github.com/go-quicktest/qt" ) func TestNewProc(t *testing.T) { _, err := newProc("a_function_which_doesnt_exist").Find() qt.Assert(t, qt.ErrorMatches(err, ".* a_function_which_doesnt_exist .*")) } func TestCall(t *testing.T) { var err error allocs := testing.AllocsPerRun(10, func() { _, err = EbpfGetEbpfAttachType(2) }) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(allocs, 0)) } ================================================ FILE: internal/efw/program.go ================================================ //go:build windows package efw import ( "syscall" "unsafe" "golang.org/x/sys/windows" ) /* Attach a program. ebpf_result_t ebpf_program_attach_by_fds( fd_t program_fd, _In_opt_ const ebpf_attach_type_t* attach_type, _In_reads_bytes_opt_(attach_parameters_size) void* attach_parameters, size_t attach_parameters_size, _Out_ fd_t* link) */ var ebpfProgramAttachByFdsProc = newProc("ebpf_program_attach_by_fds") func EbpfProgramAttachFds(fd int, attachType windows.GUID, params unsafe.Pointer, params_size uintptr) (int, error) { addr, err := ebpfProgramAttachByFdsProc.Find() if err != nil { return 0, err } var link FD err = errorResult(syscall.SyscallN(addr, uintptr(fd), uintptr(unsafe.Pointer(&attachType)), uintptr(params), params_size, uintptr(unsafe.Pointer(&link)), )) return int(link), err } ================================================ FILE: internal/efw/result.go ================================================ //go:build windows package efw // See https://github.com/microsoft/ebpf-for-windows/blob/main/include/ebpf_result.h type Result int32 //go:generate go tool stringer -tags windows -output result_string_windows.go -type=Result const ( EBPF_SUCCESS Result = iota EBPF_VERIFICATION_FAILED EBPF_JIT_COMPILATION_FAILED EBPF_PROGRAM_LOAD_FAILED EBPF_INVALID_FD EBPF_INVALID_OBJECT EBPF_INVALID_ARGUMENT EBPF_OBJECT_NOT_FOUND EBPF_OBJECT_ALREADY_EXISTS EBPF_FILE_NOT_FOUND EBPF_ALREADY_PINNED EBPF_NOT_PINNED EBPF_NO_MEMORY EBPF_PROGRAM_TOO_LARGE EBPF_RPC_EXCEPTION EBPF_ALREADY_INITIALIZED EBPF_ELF_PARSING_FAILED EBPF_FAILED EBPF_OPERATION_NOT_SUPPORTED EBPF_KEY_NOT_FOUND EBPF_ACCESS_DENIED EBPF_BLOCKED_BY_POLICY EBPF_ARITHMETIC_OVERFLOW EBPF_EXTENSION_FAILED_TO_LOAD EBPF_INSUFFICIENT_BUFFER EBPF_NO_MORE_KEYS EBPF_KEY_ALREADY_EXISTS EBPF_NO_MORE_TAIL_CALLS EBPF_PENDING EBPF_OUT_OF_SPACE EBPF_CANCELED EBPF_INVALID_POINTER EBPF_TIMEOUT EBPF_STALE_ID EBPF_INVALID_STATE ) func (r Result) Error() string { return r.String() } func resultToError(res Result) error { if res == EBPF_SUCCESS { return nil } return res } ================================================ FILE: internal/efw/result_string_windows.go ================================================ // Code generated by "stringer -tags windows -output result_string_windows.go -type=Result"; DO NOT EDIT. package efw import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[EBPF_SUCCESS-0] _ = x[EBPF_VERIFICATION_FAILED-1] _ = x[EBPF_JIT_COMPILATION_FAILED-2] _ = x[EBPF_PROGRAM_LOAD_FAILED-3] _ = x[EBPF_INVALID_FD-4] _ = x[EBPF_INVALID_OBJECT-5] _ = x[EBPF_INVALID_ARGUMENT-6] _ = x[EBPF_OBJECT_NOT_FOUND-7] _ = x[EBPF_OBJECT_ALREADY_EXISTS-8] _ = x[EBPF_FILE_NOT_FOUND-9] _ = x[EBPF_ALREADY_PINNED-10] _ = x[EBPF_NOT_PINNED-11] _ = x[EBPF_NO_MEMORY-12] _ = x[EBPF_PROGRAM_TOO_LARGE-13] _ = x[EBPF_RPC_EXCEPTION-14] _ = x[EBPF_ALREADY_INITIALIZED-15] _ = x[EBPF_ELF_PARSING_FAILED-16] _ = x[EBPF_FAILED-17] _ = x[EBPF_OPERATION_NOT_SUPPORTED-18] _ = x[EBPF_KEY_NOT_FOUND-19] _ = x[EBPF_ACCESS_DENIED-20] _ = x[EBPF_BLOCKED_BY_POLICY-21] _ = x[EBPF_ARITHMETIC_OVERFLOW-22] _ = x[EBPF_EXTENSION_FAILED_TO_LOAD-23] _ = x[EBPF_INSUFFICIENT_BUFFER-24] _ = x[EBPF_NO_MORE_KEYS-25] _ = x[EBPF_KEY_ALREADY_EXISTS-26] _ = x[EBPF_NO_MORE_TAIL_CALLS-27] _ = x[EBPF_PENDING-28] _ = x[EBPF_OUT_OF_SPACE-29] _ = x[EBPF_CANCELED-30] _ = x[EBPF_INVALID_POINTER-31] _ = x[EBPF_TIMEOUT-32] _ = x[EBPF_STALE_ID-33] _ = x[EBPF_INVALID_STATE-34] } const _Result_name = "EBPF_SUCCESSEBPF_VERIFICATION_FAILEDEBPF_JIT_COMPILATION_FAILEDEBPF_PROGRAM_LOAD_FAILEDEBPF_INVALID_FDEBPF_INVALID_OBJECTEBPF_INVALID_ARGUMENTEBPF_OBJECT_NOT_FOUNDEBPF_OBJECT_ALREADY_EXISTSEBPF_FILE_NOT_FOUNDEBPF_ALREADY_PINNEDEBPF_NOT_PINNEDEBPF_NO_MEMORYEBPF_PROGRAM_TOO_LARGEEBPF_RPC_EXCEPTIONEBPF_ALREADY_INITIALIZEDEBPF_ELF_PARSING_FAILEDEBPF_FAILEDEBPF_OPERATION_NOT_SUPPORTEDEBPF_KEY_NOT_FOUNDEBPF_ACCESS_DENIEDEBPF_BLOCKED_BY_POLICYEBPF_ARITHMETIC_OVERFLOWEBPF_EXTENSION_FAILED_TO_LOADEBPF_INSUFFICIENT_BUFFEREBPF_NO_MORE_KEYSEBPF_KEY_ALREADY_EXISTSEBPF_NO_MORE_TAIL_CALLSEBPF_PENDINGEBPF_OUT_OF_SPACEEBPF_CANCELEDEBPF_INVALID_POINTEREBPF_TIMEOUTEBPF_STALE_IDEBPF_INVALID_STATE" var _Result_index = [...]uint16{0, 12, 36, 63, 87, 102, 121, 142, 163, 189, 208, 227, 242, 256, 278, 296, 320, 343, 354, 382, 400, 418, 440, 464, 493, 517, 534, 557, 580, 592, 609, 622, 642, 654, 667, 685} func (i Result) String() string { if i < 0 || i >= Result(len(_Result_index)-1) { return "Result(" + strconv.FormatInt(int64(i), 10) + ")" } return _Result_name[_Result_index[i]:_Result_index[i+1]] } ================================================ FILE: internal/efw/result_test.go ================================================ //go:build windows package efw import ( "testing" "github.com/go-quicktest/qt" ) func TestResultToError(t *testing.T) { qt.Assert(t, qt.IsNil(resultToError(EBPF_SUCCESS))) qt.Assert(t, qt.IsNotNil(resultToError(EBPF_ACCESS_DENIED))) // Ensure that common results do not allocate. for _, result := range []Result{ EBPF_SUCCESS, EBPF_NO_MORE_KEYS, EBPF_KEY_NOT_FOUND, } { t.Run(result.String(), func(t *testing.T) { allocs := testing.AllocsPerRun(1, func() { _ = resultToError(result) }) qt.Assert(t, qt.Equals(allocs, 0.0)) }) } } ================================================ FILE: internal/efw/structs.go ================================================ //go:build windows package efw import "golang.org/x/sys/windows" // https://github.com/microsoft/ebpf-for-windows/blob/95267a53b26c68a94145d1731e2a4c8b546034c3/include/ebpf_structs.h#L366 const _BPF_OBJ_NAME_LEN = 64 // See https://github.com/microsoft/ebpf-for-windows/blob/95267a53b26c68a94145d1731e2a4c8b546034c3/include/ebpf_structs.h#L372-L386 type BpfMapInfo struct { _ uint32 ///< Map ID. _ uint32 ///< Type of map. _ uint32 ///< Size in bytes of a map key. _ uint32 ///< Size in bytes of a map value. _ uint32 ///< Maximum number of entries allowed in the map. Name [_BPF_OBJ_NAME_LEN]byte ///< Null-terminated map name. _ uint32 ///< Map flags. _ uint32 ///< ID of inner map template. _ uint32 ///< Number of pinned paths. } // See https://github.com/microsoft/ebpf-for-windows/blob/95267a53b26c68a94145d1731e2a4c8b546034c3/include/ebpf_structs.h#L396-L410 type BpfProgInfo struct { _ uint32 ///< Program ID. _ uint32 ///< Program type, if a cross-platform type. _ uint32 ///< Number of maps associated with this program. _ uintptr ///< Pointer to caller-allocated array to fill map IDs into. Name [_BPF_OBJ_NAME_LEN]byte ///< Null-terminated map name. _ windows.GUID ///< Program type UUID. _ windows.GUID ///< Attach type UUID. _ uint32 ///< Number of pinned paths. _ uint32 ///< Number of attached links. } ================================================ FILE: internal/elf.go ================================================ package internal import ( "debug/elf" "fmt" "io" ) type SafeELFFile struct { *elf.File } // NewSafeELFFile reads an ELF safely. // // Any panic during parsing is turned into an error. This is necessary since // there are a bunch of unfixed bugs in debug/elf. // // https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) { defer func() { r := recover() if r == nil { return } safe = nil err = fmt.Errorf("reading ELF file panicked: %s", r) }() file, err := elf.NewFile(r) if err != nil { return nil, err } return &SafeELFFile{file}, nil } // OpenSafeELFFile reads an ELF from a file. // // It works like NewSafeELFFile, with the exception that safe.Close will // close the underlying file. func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) { defer func() { r := recover() if r == nil { return } safe = nil err = fmt.Errorf("reading ELF file panicked: %s", r) }() file, err := elf.Open(path) if err != nil { return nil, err } return &SafeELFFile{file}, nil } // Symbols is the safe version of elf.File.Symbols. func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) { defer func() { r := recover() if r == nil { return } syms = nil err = fmt.Errorf("reading ELF symbols panicked: %s", r) }() syms, err = se.File.Symbols() return } // DynamicSymbols is the safe version of elf.File.DynamicSymbols. func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) { defer func() { r := recover() if r == nil { return } syms = nil err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r) }() syms, err = se.File.DynamicSymbols() return } // SectionsByType returns all sections in the file with the specified section type. func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section { sections := make([]*elf.Section, 0, 1) for _, section := range se.Sections { if section.Type == typ { sections = append(sections, section) } } return sections } ================================================ FILE: internal/endian_be.go ================================================ //go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 package internal import "encoding/binary" // NativeEndian is set to either binary.BigEndian or binary.LittleEndian, // depending on the host's endianness. var NativeEndian = binary.BigEndian ================================================ FILE: internal/endian_le.go ================================================ //go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 || wasm package internal import "encoding/binary" // NativeEndian is set to either binary.BigEndian or binary.LittleEndian, // depending on the host's endianness. var NativeEndian = binary.LittleEndian ================================================ FILE: internal/epoll/poller.go ================================================ //go:build !windows package epoll import ( "errors" "fmt" "math" "os" "runtime" "slices" "sync" "time" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/unix" ) var ( ErrFlushed = errors.New("data was flushed") errEpollWaitDeadlineExceeded = fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded) errEpollWaitClosed = fmt.Errorf("epoll wait: %w", os.ErrClosed) ) // Poller waits for readiness notifications from multiple file descriptors. // // The wait can be interrupted by calling Close. type Poller struct { // mutexes protect the fields declared below them. If you need to // acquire both at once you must lock epollMu before eventMu. epollMu sync.Mutex epollFd int eventMu sync.Mutex closeEvent *eventFd flushEvent *eventFd cleanup runtime.Cleanup } func New() (_ *Poller, err error) { closeFDOnError := func(fd int) { if err != nil { unix.Close(fd) } } closeEventFDOnError := func(e *eventFd) { if err != nil { e.close() } } epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) if err != nil { return nil, fmt.Errorf("create epoll fd: %w", err) } defer closeFDOnError(epollFd) p := &Poller{epollFd: epollFd} p.closeEvent, err = newEventFd() if err != nil { return nil, err } defer closeEventFDOnError(p.closeEvent) p.flushEvent, err = newEventFd() if err != nil { return nil, err } defer closeEventFDOnError(p.flushEvent) if err := p.Add(p.closeEvent.raw, 0); err != nil { return nil, fmt.Errorf("add close eventfd: %w", err) } if err := p.Add(p.flushEvent.raw, 0); err != nil { return nil, fmt.Errorf("add flush eventfd: %w", err) } p.cleanup = runtime.AddCleanup(p, func(raw int) { _ = unix.Close(raw) }, p.epollFd) return p, nil } // Close the poller. // // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent // calls will return os.ErrClosed. func (p *Poller) Close() error { p.cleanup.Stop() // Interrupt Wait() via the closeEvent fd if it's currently blocked. if err := p.wakeWaitForClose(); err != nil { return err } // Acquire the lock. This ensures that Wait isn't running. p.epollMu.Lock() defer p.epollMu.Unlock() // Prevent other calls to Close(). p.eventMu.Lock() defer p.eventMu.Unlock() if p.epollFd != -1 { unix.Close(p.epollFd) p.epollFd = -1 } if p.closeEvent != nil { p.closeEvent.close() p.closeEvent = nil } if p.flushEvent != nil { p.flushEvent.close() p.flushEvent = nil } return nil } // Add an fd to the poller. // // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It // must not exceed math.MaxInt32. // // Add is blocked by Wait. func (p *Poller) Add(fd int, id int) error { if int64(id) > math.MaxInt32 { return fmt.Errorf("unsupported id: %d", id) } p.epollMu.Lock() defer p.epollMu.Unlock() if p.epollFd == -1 { return fmt.Errorf("epoll add: %w", os.ErrClosed) } // The representation of EpollEvent isn't entirely accurate. // Pad is fully usable, not just padding. Hence we stuff the // id in there, which allows us to identify the event later (e.g., // in case of perf events, which CPU sent it). event := unix.EpollEvent{ Events: unix.EPOLLIN, Fd: int32(fd), Pad: int32(id), } if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil { return fmt.Errorf("add fd to epoll: %v", err) } return nil } // Wait for events. // // Returns the number of pending events and any errors. // // - [os.ErrClosed] if interrupted by [Close]. // - [ErrFlushed] if interrupted by [Flush]. // - [os.ErrDeadlineExceeded] if deadline is reached. func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) { p.epollMu.Lock() defer p.epollMu.Unlock() if p.epollFd == -1 { return 0, errEpollWaitClosed } for { timeout := int(-1) if !deadline.IsZero() { // Ensure deadline is not in the past and not too far into the future. timeout = int(internal.Between(time.Until(deadline).Milliseconds(), 0, math.MaxInt)) } n, err := unix.EpollWait(p.epollFd, events, timeout) if temp, ok := err.(temporaryError); ok && temp.Temporary() { // Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400 continue } if err != nil { return 0, err } if n == 0 { return 0, errEpollWaitDeadlineExceeded } for i := 0; i < n; { event := events[i] if int(event.Fd) == p.closeEvent.raw { // Since we don't read p.closeEvent the event is never cleared and // we'll keep getting this wakeup until Close() acquires the // lock and sets p.epollFd = -1. return 0, errEpollWaitClosed } if int(event.Fd) == p.flushEvent.raw { // read event to prevent it from continuing to wake p.flushEvent.read() err = ErrFlushed events = slices.Delete(events, i, i+1) n -= 1 continue } i++ } return n, err } } type temporaryError interface { Temporary() bool } // wakeWaitForClose unblocks Wait if it's epoll_wait. func (p *Poller) wakeWaitForClose() error { p.eventMu.Lock() defer p.eventMu.Unlock() if p.closeEvent == nil { return fmt.Errorf("epoll wake: %w", os.ErrClosed) } return p.closeEvent.add(1) } // Flush unblocks Wait if it's epoll_wait, for purposes of reading pending samples func (p *Poller) Flush() error { p.eventMu.Lock() defer p.eventMu.Unlock() if p.flushEvent == nil { return fmt.Errorf("epoll wake: %w", os.ErrClosed) } return p.flushEvent.add(1) } // eventFd wraps a Linux eventfd. // // An eventfd acts like a counter: writes add to the counter, reads retrieve // the counter and reset it to zero. Reads also block if the counter is zero. // // See man 2 eventfd. type eventFd struct { file *os.File // prefer raw over file.Fd(), since the latter puts the file into blocking // mode. raw int } func newEventFd() (*eventFd, error) { fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK) if err != nil { return nil, err } file := os.NewFile(uintptr(fd), "event") return &eventFd{file, fd}, nil } func (efd *eventFd) close() error { return efd.file.Close() } func (efd *eventFd) add(n uint64) error { var buf [8]byte internal.NativeEndian.PutUint64(buf[:], n) _, err := efd.file.Write(buf[:]) return err } func (efd *eventFd) read() (uint64, error) { var buf [8]byte _, err := efd.file.Read(buf[:]) return internal.NativeEndian.Uint64(buf[:]), err } ================================================ FILE: internal/epoll/poller_test.go ================================================ //go:build !windows package epoll import ( "errors" "math" "os" "testing" "time" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/unix" ) func TestPoller(t *testing.T) { t.Parallel() event, poller := mustNewPoller(t) done := make(chan struct{}, 1) read := func() { defer func() { done <- struct{}{} }() events := make([]unix.EpollEvent, 1) n, err := poller.Wait(events, time.Time{}) if errors.Is(err, os.ErrClosed) { return } if err != nil { t.Error("Error from wait:", err) return } if n != 1 { t.Errorf("Got %d instead of 1 events", n) } if e := events[0]; e.Pad != 42 { t.Errorf("Incorrect value in EpollEvent.Pad: %d != 42", e.Pad) } } if err := event.add(1); err != nil { t.Fatal(err) } go read() select { case <-done: case <-time.After(time.Second): t.Fatal("Timed out") } if _, err := event.read(); err != nil { t.Fatal(err) } go read() select { case <-done: t.Fatal("Wait doesn't block") case <-time.After(time.Second): } if err := poller.Close(); err != nil { t.Fatal("Close returns an error:", err) } select { case <-done: case <-time.After(time.Second): t.Fatal("Close doesn't unblock Wait") } if err := poller.Close(); !errors.Is(err, os.ErrClosed) { t.Fatal("Closing a second time doesn't return ErrClosed:", err) } } func TestPollerDeadline(t *testing.T) { t.Parallel() _, poller := mustNewPoller(t) events := make([]unix.EpollEvent, 1) _, err := poller.Wait(events, time.Now().Add(-time.Second)) if !errors.Is(err, os.ErrDeadlineExceeded) { t.Fatal("Expected os.ErrDeadlineExceeded on deadline in the past, got", err) } done := make(chan struct{}) go func() { defer close(done) _, err := poller.Wait(events, time.Now().Add(math.MaxInt64)) if !errors.Is(err, os.ErrClosed) { t.Error("Expected os.ErrClosed when interrupting deadline, got", err) } }() // Wait for the goroutine to enter the syscall. time.Sleep(500 * time.Microsecond) poller.Close() <-done } func TestPollerFlush(t *testing.T) { t.Parallel() _, poller := mustNewPoller(t) events := make([]unix.EpollEvent, 1) done := make(chan struct{}) go func() { defer close(done) _, err := poller.Wait(events, time.Time{}) qt.Check(t, qt.ErrorIs(err, ErrFlushed)) }() // Wait for the goroutine to enter the syscall. time.Sleep(500 * time.Microsecond) poller.Flush() <-done } func mustNewPoller(t *testing.T) (*eventFd, *Poller) { t.Helper() event, err := newEventFd() if err != nil { t.Fatal(err) } t.Cleanup(func() { event.close() }) poller, err := New() if err != nil { t.Fatal(err) } t.Cleanup(func() { poller.Close() }) if err := poller.Add(event.raw, 42); err != nil { t.Fatal("Can't add fd:", err) } return event, poller } ================================================ FILE: internal/errors.go ================================================ package internal import ( "bytes" "fmt" "io" "strings" ) // ErrorWithLog wraps err in a VerifierError that includes the parsed verifier // log buffer. // // The default error output is a summary of the full log. The latter can be // accessed via VerifierError.Log or by formatting the error, see Format. func ErrorWithLog(source string, err error, log []byte) *VerifierError { const whitespace = "\t\r\v\n " // Convert verifier log C string by truncating it on the first 0 byte // and trimming trailing whitespace before interpreting as a Go string. if i := bytes.IndexByte(log, 0); i != -1 { log = log[:i] } log = bytes.Trim(log, whitespace) if len(log) == 0 { return &VerifierError{source, err, nil} } logLines := bytes.Split(log, []byte{'\n'}) lines := make([]string, 0, len(logLines)) for _, line := range logLines { // Don't remove leading white space on individual lines. We rely on it // when outputting logs. lines = append(lines, string(bytes.TrimRight(line, whitespace))) } return &VerifierError{source, err, lines} } // VerifierError includes information from the eBPF verifier. // // It summarises the log output, see Format if you want to output the full contents. type VerifierError struct { source string // The error which caused this error. Cause error // The verifier output split into lines. Log []string } func (le *VerifierError) Unwrap() error { return le.Cause } func (le *VerifierError) Error() string { log := le.Log if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") { // Get rid of "processed 39 insns (limit 1000000) ..." from summary. log = log[:n-1] } var b strings.Builder fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error()) n := len(log) if n == 0 { return b.String() } lines := log[n-1:] if n >= 2 && includePreviousLine(log[n-1]) { // Add one more line of context if it aids understanding the error. lines = log[n-2:] } for _, line := range lines { b.WriteString(": ") b.WriteString(strings.TrimSpace(line)) } omitted := len(le.Log) - len(lines) if omitted > 0 { fmt.Fprintf(&b, " (%d line(s) omitted)", omitted) } return b.String() } // includePreviousLine returns true if the given line likely is better // understood with additional context from the preceding line. func includePreviousLine(line string) bool { // We need to find a good trade off between understandable error messages // and too much complexity here. Checking the string prefix is ok, requiring // regular expressions to do it is probably overkill. if strings.HasPrefix(line, "\t") { // [13] STRUCT drm_rect size=16 vlen=4 // \tx1 type_id=2 return true } if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' { // 0: (95) exit // R0 !read_ok return true } if strings.HasPrefix(line, "invalid bpf_context access") { // 0: (79) r6 = *(u64 *)(r1 +0) // func '__x64_sys_recvfrom' arg0 type FWD is not a struct // invalid bpf_context access off=0 size=8 return true } return false } // Format the error. // // Understood verbs are %s and %v, which are equivalent to calling Error(). %v // allows outputting additional information using the following flags: // // %+v: Output the first lines, or all lines if no width is given. // %-v: Output the last lines, or all lines if no width is given. // // Use width to specify how many lines to output. Use the '-' flag to output // lines from the end of the log instead of the beginning. func (le *VerifierError) Format(f fmt.State, verb rune) { switch verb { case 's': _, _ = io.WriteString(f, le.Error()) case 'v': n, haveWidth := f.Width() if !haveWidth || n > len(le.Log) { n = len(le.Log) } if !f.Flag('+') && !f.Flag('-') { if haveWidth { _, _ = io.WriteString(f, "%!v(BADWIDTH)") return } _, _ = io.WriteString(f, le.Error()) return } if f.Flag('+') && f.Flag('-') { _, _ = io.WriteString(f, "%!v(BADFLAG)") return } fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error()) omitted := len(le.Log) - n lines := le.Log[:n] if f.Flag('-') { // Print last instead of first lines. lines = le.Log[len(le.Log)-n:] if omitted > 0 { fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted) } } for _, line := range lines { fmt.Fprintf(f, "\n\t%s", line) } if !f.Flag('-') { if omitted > 0 { fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted) } } default: fmt.Fprintf(f, "%%!%c(BADVERB)", verb) } } ================================================ FILE: internal/errors_test.go ================================================ package internal import ( "errors" "os" "testing" "github.com/go-quicktest/qt" ) func TestVerifierErrorWhitespace(t *testing.T) { b := []byte("unreachable insn 28") b = append(b, 0xa, // \n 0xd, // \r 0x9, // \t 0x20, // space 0, 0, // trailing NUL bytes ) err := ErrorWithLog("frob", errors.New("test"), b) qt.Assert(t, qt.Equals(err.Error(), "frob: test: unreachable insn 28")) for _, log := range [][]byte{ nil, []byte("\x00"), []byte(" "), } { err = ErrorWithLog("frob", errors.New("test"), log) qt.Assert(t, qt.Equals(err.Error(), "frob: test"), qt.Commentf("empty log %q has incorrect format", log)) } } func TestVerifierErrorWrapping(t *testing.T) { sentinel := errors.New("bad") ve := ErrorWithLog("frob", sentinel, nil) qt.Assert(t, qt.ErrorIs(ve, sentinel), qt.Commentf("should wrap provided error")) ve = ErrorWithLog("frob", sentinel, []byte("foo")) qt.Assert(t, qt.ErrorIs(ve, sentinel), qt.Commentf("should wrap provided error")) qt.Assert(t, qt.StringContains(ve.Error(), "foo"), qt.Commentf("verifier log should appear in error string")) } func TestVerifierErrorSummary(t *testing.T) { // Suppress the last line containing 'processed ... insns'. errno524 := readErrorFromFile(t, "testdata/errno524.log") qt.Assert(t, qt.StringContains(errno524.Error(), "JIT doesn't support bpf-to-bpf calls")) qt.Assert(t, qt.Not(qt.StringContains(errno524.Error(), "processed 39 insns"))) // Include the previous line if the current one starts with a tab. invalidMember := readErrorFromFile(t, "testdata/invalid-member.log") qt.Assert(t, qt.StringContains(invalidMember.Error(), "STRUCT task_struct size=7744 vlen=218: cpus_mask type_id=109 bitfield_size=0 bits_offset=7744 Invalid member")) // Only include the last line. issue43 := readErrorFromFile(t, "testdata/issue-43.log") qt.Assert(t, qt.StringContains(issue43.Error(), "[11] FUNC helper_func2 type_id=10 vlen != 0")) qt.Assert(t, qt.Not(qt.StringContains(issue43.Error(), "[10] FUNC_PROTO (anon) return=3 args=(3 arg)"))) // Include instruction that caused invalid register access. invalidR0 := readErrorFromFile(t, "testdata/invalid-R0.log") qt.Assert(t, qt.StringContains(invalidR0.Error(), "0: (95) exit: R0 !read_ok")) // Include symbol that doesn't match context type. invalidCtx := readErrorFromFile(t, "testdata/invalid-ctx-access.log") qt.Assert(t, qt.StringContains(invalidCtx.Error(), "func '__x64_sys_recvfrom' arg0 type FWD is not a struct: invalid bpf_context access off=0 size=8")) } func readErrorFromFile(tb testing.TB, file string) *VerifierError { tb.Helper() contents, err := os.ReadFile(file) if err != nil { tb.Fatal("Read file:", err) } return ErrorWithLog("file", errors.New("error"), contents) } ================================================ FILE: internal/feature.go ================================================ package internal import ( "errors" "fmt" "runtime" "sync" "github.com/cilium/ebpf/internal/platform" ) // ErrNotSupported indicates that a feature is not supported. var ErrNotSupported = errors.New("not supported") // ErrNotSupportedOnOS indicates that a feature is not supported on the current // operating system. var ErrNotSupportedOnOS = fmt.Errorf("%w on %s", ErrNotSupported, runtime.GOOS) // ErrRestrictedKernel is returned when kernel address information is restricted // by kernel.kptr_restrict and/or net.core.bpf_jit_harden sysctls. var ErrRestrictedKernel = errors.New("restricted by kernel.kptr_restrict and/or net.core.bpf_jit_harden sysctls") // UnsupportedFeatureError is returned by FeatureTest() functions. type UnsupportedFeatureError struct { // The minimum version required for this feature. // // On Linux this refers to the mainline kernel version, on other platforms // to the version of the runtime. // // Used for the error string, and for sanity checking during testing. MinimumVersion Version // The name of the feature that isn't supported. Name string } func (ufe *UnsupportedFeatureError) Error() string { if ufe.MinimumVersion.Unspecified() { return fmt.Sprintf("%s not supported", ufe.Name) } return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion) } // Is indicates that UnsupportedFeatureError is ErrNotSupported. func (ufe *UnsupportedFeatureError) Is(target error) bool { return target == ErrNotSupported } // FeatureTest caches the result of a [FeatureTestFn]. // // Fields should not be modified after creation. type FeatureTest struct { // The name of the feature being detected. Name string // Version in the form Major.Minor[.Patch]. Version string // The feature test itself. Fn FeatureTestFn mu sync.RWMutex done bool result error } // FeatureTestFn is used to determine whether the kernel supports // a certain feature. // // The return values have the following semantics: // // err == ErrNotSupported: the feature is not available // err == nil: the feature is available // err != nil: the test couldn't be executed type FeatureTestFn func() error // NewFeatureTest is a convenient way to create a single [FeatureTest]. // // versions specifies in which version of a BPF runtime a feature appeared. // The format is "GOOS:Major.Minor[.Patch]". GOOS may be omitted when targeting // Linux. Returns [ErrNotSupportedOnOS] if there is no version specified for the // current OS. func NewFeatureTest(name string, fn FeatureTestFn, versions ...string) func() error { version, err := platform.SelectVersion(versions) if err != nil { return func() error { return err } } if version == "" { return func() error { // We don't return an UnsupportedFeatureError here, since that will // trigger version checks which don't make sense. return fmt.Errorf("%s: %w", name, ErrNotSupportedOnOS) } } ft := &FeatureTest{ Name: name, Version: version, Fn: fn, } return ft.execute } // execute the feature test. // // The result is cached if the test is conclusive. // // See [FeatureTestFn] for the meaning of the returned error. func (ft *FeatureTest) execute() error { ft.mu.RLock() result, done := ft.result, ft.done ft.mu.RUnlock() if done { return result } ft.mu.Lock() defer ft.mu.Unlock() // The test may have been executed by another caller while we were // waiting to acquire ft.mu. if ft.done { return ft.result } err := ft.Fn() if err == nil { ft.done = true return nil } if errors.Is(err, ErrNotSupported) { var v Version if ft.Version != "" { v, err = NewVersion(ft.Version) if err != nil { return fmt.Errorf("feature %s: %w", ft.Name, err) } } ft.done = true ft.result = &UnsupportedFeatureError{ MinimumVersion: v, Name: ft.Name, } return ft.result } // We couldn't execute the feature test to a point // where it could make a determination. // Don't cache the result, just return it. return fmt.Errorf("detect support for %s: %w", ft.Name, err) } // FeatureMatrix groups multiple related feature tests into a map. // // Useful when there is a small number of discrete features which are known // at compile time. // // It must not be modified concurrently with calling [FeatureMatrix.Result]. type FeatureMatrix[K comparable] map[K]*FeatureTest // Result returns the outcome of the feature test for the given key. // // It's safe to call this function concurrently. // // Always returns [ErrNotSupportedOnOS] on Windows. func (fm FeatureMatrix[K]) Result(key K) error { ft, ok := fm[key] if !ok { return fmt.Errorf("no feature probe for %v", key) } if platform.IsWindows { return fmt.Errorf("%s: %w", ft.Name, ErrNotSupportedOnOS) } return ft.execute() } // FeatureCache caches a potentially unlimited number of feature probes. // // Useful when there is a high cardinality for a feature test. type FeatureCache[K comparable] struct { mu sync.RWMutex newTest func(K) *FeatureTest features map[K]*FeatureTest } func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] { return &FeatureCache[K]{ newTest: newTest, features: make(map[K]*FeatureTest), } } func (fc *FeatureCache[K]) Result(key K) error { if platform.IsWindows { return fmt.Errorf("feature probe for %v: %w", key, ErrNotSupportedOnOS) } // NB: Executing the feature test happens without fc.mu taken. return fc.retrieve(key).execute() } func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest { fc.mu.RLock() ft := fc.features[key] fc.mu.RUnlock() if ft != nil { return ft } fc.mu.Lock() defer fc.mu.Unlock() if ft := fc.features[key]; ft != nil { return ft } ft = fc.newTest(key) fc.features[key] = ft return ft } ================================================ FILE: internal/feature_test.go ================================================ package internal import ( "errors" "runtime" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils/testmain" ) func TestMain(m *testing.M) { testmain.Run(m) } func TestFeatureTest(t *testing.T) { var called bool fn := NewFeatureTest("foo", func() error { called = true return nil }, "1.0") if called { t.Error("Function was called too early") } err := fn() if errors.Is(err, ErrNotSupportedOnOS) { qt.Assert(t, qt.IsFalse(called)) return } qt.Assert(t, qt.IsTrue(called), qt.Commentf("function should be invoked")) if err != nil { t.Error("Unexpected negative result:", err) } fn = NewFeatureTest("bar", func() error { return ErrNotSupported }, "2.1.1") err = fn() if err == nil { t.Fatal("Unexpected positive result") } fte, ok := err.(*UnsupportedFeatureError) if !ok { t.Fatal("Result is not a *UnsupportedFeatureError") } if !strings.Contains(fte.Error(), "2.1.1") { t.Error("UnsupportedFeatureError.Error doesn't contain version") } if !errors.Is(err, ErrNotSupported) { t.Error("UnsupportedFeatureError is not ErrNotSupported") } err2 := fn() if err != err2 { t.Error("Didn't cache an error wrapping ErrNotSupported") } fn = NewFeatureTest("bar", func() error { return errors.New("foo") }, "2.1.1") err1, err2 := fn(), fn() if err1 == err2 { t.Error("Cached result of unsuccessful execution") } } func TestFeatureTestNotSupportedOnOS(t *testing.T) { sentinel := errors.New("quux") fn := func() error { return sentinel } qt.Assert(t, qt.IsNotNil(NewFeatureTest("foo", fn)())) qt.Assert(t, qt.ErrorIs(NewFeatureTest("foo", fn, "froz:1.0.0")(), ErrNotSupportedOnOS)) qt.Assert(t, qt.ErrorIs(NewFeatureTest("foo", fn, runtime.GOOS+":1.0")(), sentinel)) if platform.IsLinux { qt.Assert(t, qt.ErrorIs(NewFeatureTest("foo", fn, "1.0")(), sentinel)) } } ================================================ FILE: internal/io.go ================================================ package internal import ( "bufio" "bytes" "compress/gzip" "errors" "fmt" "io" "os" "path/filepath" "sync" ) // NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized // buffered reader. It is a convenience function for reading subsections of // ELF sections while minimizing the amount of read() syscalls made. // // Syscall overhead is non-negligible in continuous integration context // where ELFs might be accessed over virtual filesystems with poor random // access performance. Buffering reads makes sense because (sub)sections // end up being read completely anyway. // // Use instead of the r.Seek() + io.LimitReader() pattern. func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader { // Clamp the size of the buffer to one page to avoid slurping large parts // of a file into memory. bufio.NewReader uses a hardcoded default buffer // of 4096. Allow arches with larger pages to allocate more, but don't // allocate a fixed 4k buffer if we only need to read a small segment. buf := n if ps := int64(os.Getpagesize()); n > ps { buf = ps } return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf)) } // DiscardZeroes makes sure that all written bytes are zero // before discarding them. type DiscardZeroes struct{} func (DiscardZeroes) Write(p []byte) (int, error) { for _, b := range p { if b != 0 { return 0, errors.New("encountered non-zero byte") } } return len(p), nil } // ReadAllCompressed decompresses a gzipped file into memory. func ReadAllCompressed(file string) ([]byte, error) { fh, err := os.Open(file) if err != nil { return nil, err } defer fh.Close() gz, err := gzip.NewReader(fh) if err != nil { return nil, err } defer gz.Close() return io.ReadAll(gz) } // ReadUint64FromFile reads a uint64 from a file. // // format specifies the contents of the file in fmt.Scanf syntax. func ReadUint64FromFile(format string, path ...string) (uint64, error) { filename := filepath.Join(path...) data, err := os.ReadFile(filename) if err != nil { return 0, fmt.Errorf("reading file %q: %w", filename, err) } var value uint64 n, err := fmt.Fscanf(bytes.NewReader(data), format, &value) if err != nil { return 0, fmt.Errorf("parsing file %q: %w", filename, err) } if n != 1 { return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n) } return value, nil } type uint64FromFileKey struct { format, path string } var uint64FromFileCache = struct { sync.RWMutex values map[uint64FromFileKey]uint64 }{ values: map[uint64FromFileKey]uint64{}, } // ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result. func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) { filename := filepath.Join(path...) key := uint64FromFileKey{format, filename} uint64FromFileCache.RLock() if value, ok := uint64FromFileCache.values[key]; ok { uint64FromFileCache.RUnlock() return value, nil } uint64FromFileCache.RUnlock() value, err := ReadUint64FromFile(format, filename) if err != nil { return 0, err } uint64FromFileCache.Lock() defer uint64FromFileCache.Unlock() if value, ok := uint64FromFileCache.values[key]; ok { // Someone else got here before us, use what is cached. return value, nil } uint64FromFileCache.values[key] = value return value, nil } ================================================ FILE: internal/io_test.go ================================================ package internal import ( "bytes" "io" "testing" ) func TestDiscardZero(t *testing.T) { _, err := io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{0, 0, 0})) if err != nil { t.Error("Returned an error even though input was zero:", err) } _, err = io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{1})) if err == nil { t.Error("No error even though input is non-zero") } } ================================================ FILE: internal/kallsyms/cache.go ================================================ package kallsyms import "sync" type cache[K, V comparable] struct { m sync.Map } func (c *cache[K, V]) Load(key K) (value V, _ bool) { v, ok := c.m.Load(key) if !ok { return value, false } value = v.(V) return value, true } func (c *cache[K, V]) Store(key K, value V) { c.m.Store(key, value) } ================================================ FILE: internal/kallsyms/kallsyms.go ================================================ package kallsyms import ( "bytes" "errors" "fmt" "io" "os" "slices" "strconv" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" ) var errAmbiguousKsym = errors.New("multiple kernel symbols with the same name") var symAddrs cache[string, uint64] // AssignAddresses looks up the addresses of the requested symbols in the kernel // and assigns them to their corresponding values in the symbols map. Results // of all lookups are cached, successful or otherwise. // // Any symbols missing in the kernel are ignored. Returns an error if multiple // addresses were found for a symbol. func AssignAddresses(symbols map[string]uint64) error { if !platform.IsLinux { return fmt.Errorf("read /proc/kallsyms: %w", internal.ErrNotSupportedOnOS) } if len(symbols) == 0 { return nil } // Attempt to fetch symbols from cache. request := make(map[string]uint64) for name := range symbols { if addr, ok := symAddrs.Load(name); ok { symbols[name] = addr continue } // Mark the symbol to be read from /proc/kallsyms. request[name] = 0 } if len(request) == 0 { // All symbols satisfied from cache. return nil } f, err := os.Open("/proc/kallsyms") if err != nil { return err } defer f.Close() if err := assignAddresses(f, request); err != nil { return fmt.Errorf("loading symbol addresses: %w", err) } // Update the cache with the new symbols. Cache all requested symbols even if // they weren't found, to avoid repeated lookups. for name, addr := range request { symAddrs.Store(name, addr) symbols[name] = addr } return nil } // assignAddresses assigns kernel symbol addresses read from f to values // requested by symbols. Always scans the whole input to make sure the user // didn't request an ambiguous symbol. func assignAddresses(f io.Reader, symbols map[string]uint64) error { if len(symbols) == 0 { return nil } r := newReader(f) for r.Line() { s, err, skip := parseSymbol(r, nil) if err != nil { return fmt.Errorf("parsing kallsyms line: %w", err) } if skip { continue } existing, requested := symbols[string(s.name)] if existing != 0 { // Multiple addresses for a symbol have been found. Return a friendly // error to avoid silently attaching to the wrong symbol. libbpf also // rejects referring to ambiguous symbols. return fmt.Errorf("symbol %s(0x%x): duplicate found at address 0x%x: %w", s.name, existing, s.addr, errAmbiguousKsym) } if requested { // Reading a symbol with a zero address is a strong indication that // kptr_restrict is set and the process doesn't have CAP_SYSLOG, or // kptr_restrict is set to 2 (never show addresses). // // When running the kernel with KASLR disabled (like CI kernels running in // microVMs), kallsyms will display many absolute symbols at address 0. // This memory is unlikely to contain anything useful, and production // machines are unlikely to run without KASLR. // // Return a helpful error instead of silently returning zero addresses. if s.addr == 0 { return fmt.Errorf("symbol %s: %w", s.name, internal.ErrRestrictedKernel) } symbols[string(s.name)] = s.addr } } if err := r.Err(); err != nil { return fmt.Errorf("reading kallsyms: %w", err) } return nil } type ksym struct { addr uint64 name []byte mod []byte } // parseSymbol parses a line from /proc/kallsyms into an address, type, name and // module. Skip will be true if the symbol doesn't match any of the given symbol // types. See `man 1 nm` for all available types. // // Only yields symbols whose type is contained in types. An empty value for types // disables this filtering. // // Example line: `ffffffffc1682010 T nf_nat_init\t[nf_nat]` func parseSymbol(r *reader, types []rune) (s ksym, err error, skip bool) { for i := 0; r.Word(); i++ { switch i { // Address of the symbol. case 0: s.addr, err = strconv.ParseUint(r.Text(), 16, 64) if err != nil { return s, fmt.Errorf("parsing address: %w", err), false } // Type of the symbol. Assume the character is ASCII-encoded by converting // it directly to a rune, since it's a fixed field controlled by the kernel. case 1: if len(types) > 0 && !slices.Contains(types, rune(r.Bytes()[0])) { return s, nil, true } // Name of the symbol. case 2: s.name = r.Bytes() // Kernel module the symbol is provided by. case 3: s.mod = bytes.Trim(r.Bytes(), "[]") // Ignore any future fields. default: return } } return } ================================================ FILE: internal/kallsyms/kallsyms_test.go ================================================ package kallsyms import ( "bytes" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils" ) var syms = []byte(`0000000000000001 t hid_generic_probe [hid_generic] 00000000000000EA t writenote 00000000000000A0 T tcp_connect 00000000000000B0 B empty_zero_page 00000000000000C0 D kimage_vaddr 00000000000000D0 R __start_pci_fixups_early 00000000000000E0 V hv_root_partition 00000000000000F0 W calibrate_delay_is_known A0000000000000AA a nft_counter_seq [nft_counter] A0000000000000BA b bootconfig_found A0000000000000CA d __func__.10 A0000000000000DA r __ksymtab_LZ4_decompress_fast A0000000000000EA t writenote A0000000000000FA T bench_sym [bench_mod] A0000000000000FF t __kstrtab_功能 [mod]`) func TestParseSyms(t *testing.T) { r := newReader(bytes.NewReader(syms)) i := 0 for ; r.Line(); i++ { s, err, skip := parseSymbol(r, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsFalse(skip)) qt.Assert(t, qt.Not(qt.Equals(s.addr, 0))) qt.Assert(t, qt.Not(qt.Equals(s.name, []byte("")))) } qt.Assert(t, qt.IsNil(r.Err())) qt.Assert(t, qt.Equals(i, 15)) } func TestParseProcKallsyms(t *testing.T) { // Read up to 50k symbols from kallsyms to avoid a slow test. r := newReader(mustOpenProcKallsyms(t)) for i := 0; r.Line() && i < 50_000; i++ { s, err, skip := parseSymbol(r, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsFalse(skip)) qt.Assert(t, qt.Not(qt.Equals(s.name, []byte("")))) } qt.Assert(t, qt.IsNil(r.Err())) } func TestAssignAddressesCaching(t *testing.T) { err := AssignAddresses( map[string]uint64{ "bpf_perf_event_output": 0, "foo": 0, }, ) testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) v, ok := symAddrs.Load("bpf_perf_event_output") qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Not(qt.Equals(v, 0))) v, ok = symAddrs.Load("foo") qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(v, 0)) } func TestAssignAddresses(t *testing.T) { b := bytes.NewBuffer(syms) ksyms := map[string]uint64{ "hid_generic_probe": 0, "tcp_connect": 0, "bootconfig_found": 0, } qt.Assert(t, qt.IsNil(assignAddresses(b, ksyms))) qt.Assert(t, qt.Equals(ksyms["hid_generic_probe"], 0x1)) qt.Assert(t, qt.Equals(ksyms["tcp_connect"], 0xA0)) qt.Assert(t, qt.Equals(ksyms["bootconfig_found"], 0xA0000000000000BA)) b = bytes.NewBuffer(syms) ksyms = map[string]uint64{ "hid_generic_probe": 0, "writenote": 0, } qt.Assert(t, qt.ErrorIs(assignAddresses(b, ksyms), errAmbiguousKsym)) } func BenchmarkAssignAddresses(b *testing.B) { b.ReportAllocs() for b.Loop() { b.StopTimer() f := bytes.NewBuffer(syms) want := map[string]uint64{"bench_sym": 0} b.StartTimer() if err := assignAddresses(f, want); err != nil { b.Fatal(err) } } } // Benchmark getting 5 kernel symbols from /proc/kallsyms. func BenchmarkAssignAddressesKallsyms(b *testing.B) { b.ReportAllocs() for b.Loop() { b.StopTimer() f := mustOpenProcKallsyms(b) want := map[string]uint64{ "bpf_trace_vprintk": 0, "bpf_send_signal": 0, "bpf_event_notify": 0, "bpf_trace_printk": 0, "bpf_perf_event_output": 0, } b.StartTimer() if err := assignAddresses(f, want); err != nil { b.Fatal(err) } } } func mustOpenProcKallsyms(tb testing.TB) *os.File { tb.Helper() if !platform.IsLinux { tb.Skip("/proc/kallsyms is a Linux concept") } f, err := os.Open("/proc/kallsyms") qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { f.Close() }) return f } ================================================ FILE: internal/kallsyms/reader.go ================================================ package kallsyms import ( "bufio" "bytes" "io" ) // reader is a line and word-oriented reader built for reading /proc/kallsyms. // It takes an io.Reader and iterates its contents line by line, then word by // word. // // It's designed to allow partial reading of lines without paying the cost of // allocating objects that will never be accessed, resulting in less work for // the garbage collector. type reader struct { s *bufio.Scanner line []byte word []byte err error } func newReader(r io.Reader) *reader { return &reader{ s: bufio.NewScanner(r), } } // Bytes returns the current word as a byte slice. func (r *reader) Bytes() []byte { return r.word } // Text returns the output of Bytes as a string. func (r *reader) Text() string { return string(r.Bytes()) } // Line advances the reader to the next line in the input. Calling Line resets // the current word, making [reader.Bytes] and [reader.Text] return empty // values. Follow this up with a call to [reader.Word]. // // Like [bufio.Scanner], [reader.Err] needs to be checked after Line returns // false to determine if an error occurred during reading. // // Returns true if Line can be called again. Returns false if all lines in the // input have been read. func (r *reader) Line() bool { for r.s.Scan() { line := r.s.Bytes() if len(line) == 0 { continue } r.line = line r.word = nil return true } if err := r.s.Err(); err != nil { r.err = err } return false } // Word advances the reader to the next word in the current line. // // Returns true if a word is found and Word should be called again. Returns // false when all words on the line have been read. func (r *reader) Word() bool { line := bytes.TrimSpace(r.line) if len(line) == 0 { return false } var found bool r.word, r.line, found = bytes.Cut(line, []byte{' '}) if !found { r.word, r.line, _ = bytes.Cut(line, []byte{'\t'}) } return true } func (r *reader) Err() error { return r.err } ================================================ FILE: internal/kallsyms/reader_test.go ================================================ package kallsyms import ( "bytes" "testing" "github.com/go-quicktest/qt" ) func TestReader(t *testing.T) { b := []byte(` one two three four λέξη `) r := newReader(bytes.NewReader(b)) qt.Assert(t, qt.IsTrue(r.Line())) qt.Assert(t, qt.IsTrue(r.Word())) qt.Assert(t, qt.Equals(r.Text(), "one")) qt.Assert(t, qt.IsTrue(r.Word())) qt.Assert(t, qt.Equals(r.Text(), "two")) qt.Assert(t, qt.IsTrue(r.Word())) qt.Assert(t, qt.Equals(r.Text(), "three")) qt.Assert(t, qt.IsFalse(r.Word())) qt.Assert(t, qt.IsTrue(r.Line())) qt.Assert(t, qt.IsTrue(r.Word())) qt.Assert(t, qt.Equals(r.Text(), "four")) qt.Assert(t, qt.IsFalse(r.Word())) qt.Assert(t, qt.IsTrue(r.Line())) qt.Assert(t, qt.IsTrue(r.Word())) qt.Assert(t, qt.Equals(r.Text(), "λέξη")) qt.Assert(t, qt.IsFalse(r.Word())) qt.Assert(t, qt.IsFalse(r.Line())) qt.Assert(t, qt.IsNil(r.Err())) } ================================================ FILE: internal/kconfig/kconfig.go ================================================ // Package kconfig implements a parser for the format of Linux's .config file. package kconfig import ( "bufio" "bytes" "compress/gzip" "fmt" "io" "math" "strconv" "strings" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" ) // Parse parses the kconfig file for which a reader is given. // All the CONFIG_* which are in filter and which are set set will be // put in the returned map as key with their corresponding value as map value. // If filter is nil, no filtering will occur. // If the kconfig file is not valid, error will be returned. func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) { var r io.Reader zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64)) if err != nil { r = io.NewSectionReader(source, 0, math.MaxInt64) } else { // Source is gzip compressed, transparently decompress. r = zr } ret := make(map[string]string, len(filter)) s := bufio.NewScanner(r) for s.Scan() { line := s.Bytes() err = processKconfigLine(line, ret, filter) if err != nil { return nil, fmt.Errorf("cannot parse line: %w", err) } if filter != nil && len(ret) == len(filter) { break } } if err := s.Err(); err != nil { return nil, fmt.Errorf("cannot parse: %w", err) } if zr != nil { return ret, zr.Close() } return ret, nil } // Golang translation of libbpf bpf_object__process_kconfig_line(): // https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874 // It does the same checks but does not put the data inside the BPF map. func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error { // Ignore empty lines and "# CONFIG_* is not set". if !bytes.HasPrefix(line, []byte("CONFIG_")) { return nil } key, value, found := bytes.Cut(line, []byte{'='}) if !found { return fmt.Errorf("line %q does not contain separator '='", line) } if len(value) == 0 { return fmt.Errorf("line %q has no value", line) } if filter != nil { // NB: map[string(key)] gets special optimisation help from the compiler // and doesn't allocate. Don't turn this into a variable. _, ok := filter[string(key)] if !ok { return nil } } // This can seem odd, but libbpf only sets the value the first time the key is // met: // https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908 _, ok := m[string(key)] if !ok { m[string(key)] = string(value) } return nil } // PutValue translates the value given as parameter depending on the BTF // type, the translated value is then written to the byte array. func PutValue(data []byte, typ btf.Type, value string) error { typ = btf.UnderlyingType(typ) switch value { case "y", "n", "m": return putValueTri(data, typ, value) } if strings.HasPrefix(value, `"`) { return putValueString(data, typ, value) } return putValueNumber(data, typ, value) } // Golang translation of libbpf_tristate enum: // https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169 type triState int const ( TriNo triState = 0 TriYes triState = 1 TriModule triState = 2 ) func putValueTri(data []byte, typ btf.Type, value string) error { switch v := typ.(type) { case *btf.Int: if v.Encoding != btf.Bool { return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding) } if v.Size != 1 { return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size) } switch value { case "y": data[0] = 1 case "n": data[0] = 0 default: return fmt.Errorf("cannot use %q for btf.Bool", value) } case *btf.Enum: if v.Name != "libbpf_tristate" { return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name) } if len(data) != 4 { return fmt.Errorf("expected enum value to occupy 4 bytes in datasec, got: %d", len(data)) } var tri triState switch value { case "y": tri = TriYes case "m": tri = TriModule case "n": tri = TriNo default: return fmt.Errorf("value %q is not supported for libbpf_tristate", value) } internal.NativeEndian.PutUint32(data, uint32(tri)) default: return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v) } return nil } func putValueString(data []byte, typ btf.Type, value string) error { array, ok := typ.(*btf.Array) if !ok { return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array) } contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int) if !ok { return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType) } // Any Int, which is not bool, of one byte could be used to store char: // https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638 if contentType.Size != 1 && contentType.Encoding != btf.Bool { return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size) } if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) { return fmt.Errorf(`value %q must start and finish with '"'`, value) } str := strings.Trim(value, `"`) // We need to trim string if the bpf array is smaller. if uint32(len(str)) >= array.Nelems { str = str[:array.Nelems] } // Write the string content to .kconfig. copy(data, str) return nil } func putValueNumber(data []byte, typ btf.Type, value string) error { integer, ok := typ.(*btf.Int) if !ok { return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer) } size := integer.Size sizeInBits := size * 8 var n uint64 var err error if integer.Encoding == btf.Signed { parsed, e := strconv.ParseInt(value, 0, int(sizeInBits)) n = uint64(parsed) err = e } else { parsed, e := strconv.ParseUint(value, 0, int(sizeInBits)) n = uint64(parsed) err = e } if err != nil { return fmt.Errorf("cannot parse value: %w", err) } return PutInteger(data, integer, n) } // PutInteger writes n into data. // // integer determines how much is written into data and what the valid values // are. func PutInteger(data []byte, integer *btf.Int, n uint64) error { // This function should match set_kcfg_value_num in libbpf. if integer.Encoding == btf.Bool && n > 1 { return fmt.Errorf("invalid boolean value: %d", n) } if len(data) < int(integer.Size) { return fmt.Errorf("can't fit an integer of size %d into a byte slice of length %d", integer.Size, len(data)) } switch integer.Size { case 1: if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt8 || int64(n) < math.MinInt8) { return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) } data[0] = byte(n) case 2: if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt16 || int64(n) < math.MinInt16) { return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) } internal.NativeEndian.PutUint16(data, uint16(n)) case 4: if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt32 || int64(n) < math.MinInt32) { return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) } internal.NativeEndian.PutUint32(data, uint32(n)) case 8: internal.NativeEndian.PutUint64(data, uint64(n)) default: return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", integer.Size) } return nil } ================================================ FILE: internal/kconfig/kconfig_test.go ================================================ package kconfig import ( "encoding/binary" "os" "testing" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/go-quicktest/qt" ) func BenchmarkParse(b *testing.B) { f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz") if err != nil { b.Fatal(err) } defer f.Close() b.ReportAllocs() for b.Loop() { _, err := Parse(f, nil) if err != nil { b.Fatal(err) } } } func BenchmarkParseFiltered(b *testing.B) { f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz") if err != nil { b.Fatal(err) } defer f.Close() b.ReportAllocs() // CONFIG_ARCH_USE_MEMTEST is the last CONFIG_ in the file. // So, we will easily be able to see how many allocated bytes the filtering // permits reducing compared to unfiltered benchmark. filter := map[string]struct{}{"CONFIG_ARCH_USE_MEMTEST": {}} for b.Loop() { _, err := Parse(f, filter) if err != nil { b.Fatal(err) } } } func TestParse(t *testing.T) { t.Parallel() f, err := os.Open("testdata/test.kconfig") if err != nil { t.Fatal("Error reading /testdata/test.kconfig: ", err) } defer f.Close() config, err := Parse(f, nil) if err != nil { t.Fatal("Error parsing kconfig: ", err) } expected := map[string]string{ "CONFIG_TRISTATE": "m", "CONFIG_BOOL": "y", "CONFIG_CHAR": "100", "CONFIG_USHORT": "30000", "CONFIG_INT": "123456", "CONFIG_ULONG": "0xDEADBEEFC0DE", "CONFIG_STR": `"abracad"`, "CONFIG_FOO": `"foo"`, } qt.Assert(t, qt.DeepEquals(config, expected)) } func TestParseFiltered(t *testing.T) { t.Parallel() f, err := os.Open("testdata/test.kconfig") if err != nil { t.Fatal("Error reading /testdata/test.kconfig: ", err) } defer f.Close() filter := map[string]struct{}{"CONFIG_FOO": {}} config, err := Parse(f, filter) if err != nil { t.Fatal("Error parsing gzipped kconfig: ", err) } expected := map[string]string{"CONFIG_FOO": `"foo"`} qt.Assert(t, qt.DeepEquals(config, expected)) } func TestParseGzipped(t *testing.T) { t.Parallel() f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz") if err != nil { t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err) } defer f.Close() _, err = Parse(f, nil) if err != nil { t.Fatal("Error parsing gzipped kconfig: ", err) } } func TestParseGzippedFiltered(t *testing.T) { t.Parallel() f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz") if err != nil { t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err) } defer f.Close() filter := map[string]struct{}{"CONFIG_HZ": {}} config, err := Parse(f, filter) if err != nil { t.Fatal("Error parsing gzipped kconfig: ", err) } expected := map[string]string{"CONFIG_HZ": "1000"} qt.Assert(t, qt.DeepEquals(config, expected)) } func TestProcessKconfigBadLine(t *testing.T) { t.Parallel() m := make(map[string]string) err := processKconfigLine([]byte("CONFIG_FOO"), m, nil) qt.Assert(t, qt.IsNotNil(err), qt.Commentf("line has no '='")) err = processKconfigLine([]byte("CONFIG_FOO="), m, nil) qt.Assert(t, qt.IsNotNil(err), qt.Commentf("line has no value")) } func TestPutValue(t *testing.T) { t.Parallel() type testCase struct { typ btf.Type value string expected any comment string } cases := []testCase{ { typ: &btf.Int{ Size: 1, Encoding: btf.Bool, }, value: "n", expected: int8(0), }, { typ: &btf.Int{ Size: 1, Encoding: btf.Bool, }, value: "y", expected: int8(1), }, { typ: &btf.Int{ Size: 1, Encoding: btf.Bool, }, value: "foo", comment: "Bad value", }, { typ: &btf.Int{}, comment: "Encoding is not Bool", }, { typ: &btf.Int{ Encoding: btf.Bool, }, comment: "Size is not 1", }, { typ: &btf.Enum{ Name: "libbpf_tristate", }, value: "y", expected: int32(TriYes), }, { typ: &btf.Enum{ Name: "libbpf_tristate", }, value: "n", expected: int32(TriNo), }, { typ: &btf.Enum{ Name: "libbpf_tristate", }, value: "m", expected: int32(TriModule), }, { typ: &btf.Enum{ Name: "libbpf_tristate", }, value: "foo", comment: "Bad value", }, { typ: &btf.Enum{ Name: "error", }, comment: "Enum name is wrong", }, { typ: &btf.Array{}, value: "y", comment: "Type is not btf.Int", }, { typ: &btf.Int{ Size: 1, }, value: "255", expected: uint8(255), }, { typ: &btf.Int{ Size: 2, }, value: "0xcafe", expected: uint16(0xcafe), }, { typ: &btf.Int{ Size: 2, }, value: "0755", expected: uint16(0755), }, { typ: &btf.Int{ Size: 4, Encoding: btf.Signed, }, value: "-2147483648", expected: int32(-2147483648), }, { typ: &btf.Int{ Size: 4, Encoding: btf.Signed, }, value: "+2147483647", expected: int32(+2147483647), }, { typ: &btf.Int{ Size: 4, }, value: "0xcafec0de", expected: uint32(0xcafec0de), }, { typ: &btf.Int{ Size: 8, Encoding: btf.Signed, }, value: "+1000000000000", expected: int64(1000000000000), }, { typ: &btf.Int{ Size: 8, }, value: "1000000000000", expected: uint64(1000000000000), }, { typ: &btf.Int{ Size: 1, }, value: "foo", comment: "Value is not an int", }, { typ: &btf.Array{}, value: "1", comment: "Type is not btf.Int", }, { typ: &btf.Int{ Size: 16, }, value: "1", comment: "Size is wrong", }, { typ: &btf.Typedef{ Type: &btf.Int{ Size: 1, }, }, value: "1", expected: uint8(1), }, { typ: &btf.Array{ Type: &btf.Int{ Size: 1, Encoding: btf.Char, }, Nelems: 6, }, value: `"foobar"`, expected: []byte("foobar"), }, { typ: &btf.Array{ Type: &btf.Int{ Size: 1, Encoding: btf.Unsigned, }, Nelems: 3, }, value: `"foobar"`, expected: []byte("foo"), }, { typ: &btf.Array{ Type: &btf.Int{ Size: 1, Encoding: btf.Signed, }, Nelems: 2, }, value: `"42"`, expected: []byte("42"), }, { typ: &btf.Int{}, value: `"foo"`, comment: "Type is not btf.Array", }, { typ: &btf.Array{}, value: `"foo"`, comment: "Type is not btf.Array of btf.Int", }, { typ: &btf.Array{ Type: &btf.Int{ Size: 1, Encoding: btf.Bool, }, }, comment: "Type is not btf.Array of btf.Int of size 1 which is not btf.Bool", }, { typ: &btf.Array{ Type: &btf.Int{ Size: 4, Encoding: btf.Char, }, }, value: `"foo"`, comment: "Type is not btf.Array of btf.Char of size 1", }, { typ: &btf.Array{ Type: &btf.Int{ Size: 1, Encoding: btf.Char, }, }, value: `"foo`, comment: `Value does not start and end with '"'`, }, } for _, c := range cases { if len(c.comment) > 0 { err := PutValue(make([]byte, 0), c.typ, c.value) qt.Assert(t, qt.IsNotNil(err), qt.Commentf(c.comment)) continue } expected, err := binary.Append(nil, internal.NativeEndian, c.expected) qt.Assert(t, qt.IsNil(err)) data := make([]byte, len(expected)) err = PutValue(data, c.typ, c.value) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(data, expected)) } } func TestPutInteger(t *testing.T) { t.Parallel() type testCase struct { expected []byte integer *btf.Int n uint64 err bool comment string } cases := []testCase{ { integer: &btf.Int{Size: 1, Encoding: btf.Unsigned}, n: 0x01, expected: []byte{0x01}, }, { integer: &btf.Int{Size: 2, Encoding: btf.Unsigned}, n: 0x902a, expected: []byte{0x2a, 0x90}, }, { integer: &btf.Int{Size: 4, Encoding: btf.Unsigned}, n: 0x01234567, expected: []byte{0x67, 0x45, 0x23, 0x01}, }, { integer: &btf.Int{Size: 1, Encoding: btf.Signed}, n: 0x80, err: true, comment: "outside of range int8 -128 ~ 127", }, { integer: &btf.Int{Size: 2, Encoding: btf.Signed}, n: 0xabcdabcd, err: true, comment: "outside of range int16 -32768 ~ 32767", }, { integer: &btf.Int{Size: 4, Encoding: btf.Signed}, n: 0x1234567890, err: true, comment: "outside of range int32 -2147483648 ~ 2147483647", }, { integer: &btf.Int{Size: 2, Encoding: btf.Signed}, n: 0xffffffffffffffff, expected: []byte{0xff, 0xff, 0x00, 0x00}, comment: "n means -1", }, { integer: &btf.Int{Size: 2, Encoding: btf.Signed}, n: 0xffffffffffffffff - 0x8000, err: true, comment: "n means -32768(-MinInt16) - 1 in signed value", }, { integer: &btf.Int{Size: 2, Encoding: btf.Signed}, n: 0x7fff, expected: []byte{0xff, 0x7f}, comment: "maximum value of int16", }, { integer: &btf.Int{Size: 2, Encoding: btf.Unsigned}, n: 0xffff, expected: []byte{0xff, 0xff}, }, { integer: &btf.Int{Size: 4, Encoding: btf.Unsigned}, n: 0xffffffff, expected: []byte{0xff, 0xff, 0xff, 0xff}, }, { integer: &btf.Int{Size: 4, Encoding: btf.Signed}, n: 0x80000000, err: true, comment: "outside of range int32 ~2147483648 ~ 2147483647", }, { integer: &btf.Int{Size: 4, Encoding: btf.Signed}, n: 0xffffffffffffffff - 0x80000000, err: true, comment: "outside of range int32 ~2147483648 ~ 2147483647", }, { integer: &btf.Int{Size: 8, Encoding: btf.Unsigned}, n: 0xffffffffffffffff, expected: []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, }, } for _, c := range cases { data := make([]byte, len(c.expected)) err := PutInteger(data, c.integer, c.n) if c.err { qt.Assert(t, qt.IsNotNil(err)) continue } qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(data, c.expected), qt.Commentf(c.comment)) } } func TestPutIntegerError(t *testing.T) { qt.Assert(t, qt.IsNotNil(PutInteger(nil, &btf.Int{Size: 2}, 0)), qt.Commentf("slice too small for int")) qt.Assert(t, qt.IsNotNil(PutInteger(nil, &btf.Int{Encoding: btf.Bool}, 2)), qt.Commentf("n too big for bool")) } ================================================ FILE: internal/kconfig/testdata/test.kconfig ================================================ CONFIG_TRISTATE=m # CONFIG_IS_NOT_SET is not set CONFIG_BOOL=y CONFIG_CHAR=100 CONFIG_USHORT=30000 CONFIG_INT=123456 CONFIG_ULONG=0xDEADBEEFC0DE CONFIG_STR="abracad" CONFIG_FOO="foo" CONFIG_FOO="bar" ================================================ FILE: internal/linux/auxv.go ================================================ package linux import ( "fmt" "io" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/unix" ) type auxvPairReader interface { Close() error ReadAuxvPair() (uint64, uint64, error) } // See https://elixir.bootlin.com/linux/v6.5.5/source/include/uapi/linux/auxvec.h const ( _AT_NULL = 0 // End of vector _AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image ) type auxvRuntimeReader struct { data [][2]uintptr index int } func (r *auxvRuntimeReader) Close() error { return nil } func (r *auxvRuntimeReader) ReadAuxvPair() (uint64, uint64, error) { if r.index >= len(r.data)+2 { return 0, 0, io.EOF } // we manually add the (_AT_NULL, _AT_NULL) pair at the end // that is not provided by the go runtime var tag, value uintptr if r.index < len(r.data) { tag, value = r.data[r.index][0], r.data[r.index][1] } else { tag, value = _AT_NULL, _AT_NULL } r.index += 1 return uint64(tag), uint64(value), nil } func newAuxvRuntimeReader() (auxvPairReader, error) { if !platform.IsLinux { return nil, fmt.Errorf("read auxv from runtime: %w", internal.ErrNotSupportedOnOS) } data, err := unix.Auxv() if err != nil { return nil, fmt.Errorf("read auxv from runtime: %w", err) } return &auxvRuntimeReader{ data: data, index: 0, }, nil } ================================================ FILE: internal/linux/auxv_test.go ================================================ package linux import ( "encoding/binary" "errors" "fmt" "os" "testing" "unsafe" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/unix" ) type auxvFileReader struct { file *os.File order binary.ByteOrder uintptrIs32bits bool } func (r *auxvFileReader) Close() error { return r.file.Close() } type auxvPair32 struct { Tag, Value uint32 } type auxvPair64 struct { Tag, Value uint64 } func (r *auxvFileReader) ReadAuxvPair() (tag, value uint64, _ error) { if r.uintptrIs32bits { var aux auxvPair32 if err := binary.Read(r.file, r.order, &aux); err != nil { return 0, 0, fmt.Errorf("reading auxv entry: %w", err) } return uint64(aux.Tag), uint64(aux.Value), nil } var aux auxvPair64 if err := binary.Read(r.file, r.order, &aux); err != nil { return 0, 0, fmt.Errorf("reading auxv entry: %w", err) } return aux.Tag, aux.Value, nil } func newAuxFileReader(path string, order binary.ByteOrder, uintptrIs32bits bool) (auxvPairReader, error) { // Read data from the auxiliary vector, which is normally passed directly // to the process. Go does not expose that data before go 1.21, so we must read it from procfs. // https://man7.org/linux/man-pages/man3/getauxval.3.html av, err := os.Open(path) if errors.Is(err, unix.EACCES) { return nil, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err) } if err != nil { return nil, fmt.Errorf("opening auxv: %w", err) } return &auxvFileReader{ file: av, order: order, uintptrIs32bits: uintptrIs32bits, }, nil } func newDefaultAuxvFileReader() (auxvPairReader, error) { const uintptrIs32bits = unsafe.Sizeof((uintptr)(0)) == 4 return newAuxFileReader("/proc/self/auxv", internal.NativeEndian, uintptrIs32bits) } func TestAuxvBothSourcesEqual(t *testing.T) { runtimeBased, err := newAuxvRuntimeReader() skipIfNotSupportedOnOS(t, err) if err != nil { t.Fatal(err) } fileBased, err := newDefaultAuxvFileReader() if err != nil { t.Fatal(err) } for { runtimeTag, runtimeValue, err := runtimeBased.ReadAuxvPair() if err != nil { t.Fatal(err) } fileTag, fileValue, err := fileBased.ReadAuxvPair() if err != nil { t.Fatal(err) } if runtimeTag != fileTag { t.Errorf("mismatching tags: runtime=%v, file=%v", runtimeTag, fileTag) } if runtimeValue != fileValue { t.Errorf("mismatching values: runtime=%v, file=%v", runtimeValue, fileValue) } if runtimeTag == _AT_NULL { break } } } ================================================ FILE: internal/linux/cpu.go ================================================ package linux import ( "fmt" "os" "strings" ) func ParseCPUsFromFile(path string) (int, error) { spec, err := os.ReadFile(path) if err != nil { return 0, err } n, err := parseCPUs(string(spec)) if err != nil { return 0, fmt.Errorf("can't parse %s: %v", path, err) } return n, nil } // parseCPUs parses the number of cpus from a string produced // by bitmap_list_string() in the Linux kernel. // Multiple ranges are rejected, since they can't be unified // into a single number. // This is the format of /sys/devices/system/cpu/possible, it // is not suitable for /sys/devices/system/cpu/online, etc. func parseCPUs(spec string) (int, error) { if strings.Trim(spec, "\n") == "0" { return 1, nil } var low, high int n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high) if n != 2 || err != nil { return 0, fmt.Errorf("invalid format: %s", spec) } if low != 0 { return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec) } // cpus is 0 indexed return high + 1, nil } ================================================ FILE: internal/linux/cpu_test.go ================================================ package linux import ( "testing" ) func TestParseCPUs(t *testing.T) { for str, result := range map[string]int{ "0-1": 2, "0-2\n": 3, "0": 1, } { n, err := parseCPUs(str) if err != nil { t.Errorf("Can't parse `%s`: %v", str, err) } else if n != result { t.Error("Parsing", str, "returns", n, "instead of", result) } } for _, str := range []string{ "0,3-4", "0-", "1,", "", } { _, err := parseCPUs(str) if err == nil { t.Error("Parsed invalid format:", str) } } } ================================================ FILE: internal/linux/doc.go ================================================ // Package linux contains OS specific wrappers around package unix. package linux ================================================ FILE: internal/linux/helper_test.go ================================================ package linux import ( "errors" "testing" "github.com/cilium/ebpf/internal" ) // skipIfNotSupportedOnOS is a copy of testutils.SkipIfNotSupported to avoid // a circular dependency. func skipIfNotSupportedOnOS(tb testing.TB, err error) { tb.Helper() if err == internal.ErrNotSupportedOnOS { tb.Fatal("Unwrapped ErrNotSupportedOnOS") } if errors.Is(err, internal.ErrNotSupportedOnOS) { tb.Skip(err.Error()) } } ================================================ FILE: internal/linux/kconfig.go ================================================ package linux import ( "fmt" "os" ) // FindKConfig searches for a kconfig file on the host. // // It first reads from /boot/config- of the current running kernel and tries // /proc/config.gz if nothing was found in /boot. // If none of the file provide a kconfig, it returns an error. func FindKConfig() (*os.File, error) { kernelRelease, err := KernelRelease() if err != nil { return nil, fmt.Errorf("cannot get kernel release: %w", err) } path := "/boot/config-" + kernelRelease f, err := os.Open(path) if err == nil { return f, nil } f, err = os.Open("/proc/config.gz") if err == nil { return f, nil } return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path) } ================================================ FILE: internal/linux/platform.go ================================================ package linux import ( "runtime" ) // PlatformPrefix returns the platform-dependent syscall wrapper prefix used by // the linux kernel. // // Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go // and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047 func PlatformPrefix() string { switch runtime.GOARCH { case "386": return "__ia32_" case "amd64", "amd64p32": return "__x64_" case "arm", "armbe": return "__arm_" case "arm64", "arm64be": return "__arm64_" case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le": return "__mips_" case "s390": return "__s390_" case "s390x": return "__s390x_" case "riscv", "riscv64": return "__riscv_" case "ppc": return "__powerpc_" case "ppc64", "ppc64le": return "__powerpc64_" default: return "" } } ================================================ FILE: internal/linux/statfs.go ================================================ package linux import ( "unsafe" "github.com/cilium/ebpf/internal/unix" ) func FSType(path string) (int64, error) { var statfs unix.Statfs_t if err := unix.Statfs(path, &statfs); err != nil { return 0, err } fsType := int64(statfs.Type) if unsafe.Sizeof(statfs.Type) == 4 { // We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a // negative number when interpreted as int32 so we need to cast via // uint32 to avoid sign extension. fsType = int64(uint32(statfs.Type)) } return fsType, nil } ================================================ FILE: internal/linux/statfs_test.go ================================================ package linux import ( "testing" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" ) func TestFSType(t *testing.T) { for _, fs := range []struct { path string magic int64 }{ {"/sys/kernel/tracing", unix.TRACEFS_MAGIC}, {"/sys/fs/bpf", unix.BPF_FS_MAGIC}, } { fst, err := FSType(fs.path) skipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(fst, fs.magic)) } } ================================================ FILE: internal/linux/vdso.go ================================================ package linux import ( "debug/elf" "encoding/binary" "errors" "fmt" "io" "math" "os" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/unix" ) var ( errAuxvNoVDSO = errors.New("no vdso address found in auxv") ) // vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library // linked into the current process image. func vdsoVersion() (uint32, error) { av, err := newAuxvRuntimeReader() if err != nil { return 0, err } defer av.Close() vdsoAddr, err := vdsoMemoryAddress(av) if err != nil { return 0, fmt.Errorf("finding vDSO memory address: %w", err) } // Use /proc/self/mem rather than unsafe.Pointer tricks. mem, err := os.Open("/proc/self/mem") if err != nil { return 0, fmt.Errorf("opening mem: %w", err) } defer mem.Close() // Open ELF at provided memory address, as offset into /proc/self/mem. c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64)) if err != nil { return 0, fmt.Errorf("reading linux version code: %w", err) } return c, nil } // vdsoMemoryAddress returns the memory address of the vDSO library // linked into the current process image. r is an io.Reader into an auxv blob. func vdsoMemoryAddress(r auxvPairReader) (uintptr, error) { // Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`, // the address of a page containing the virtual Dynamic Shared Object (vDSO). for { tag, value, err := r.ReadAuxvPair() if err != nil { return 0, err } switch tag { case _AT_SYSINFO_EHDR: if value != 0 { return uintptr(value), nil } return 0, fmt.Errorf("invalid vDSO address in auxv") // _AT_NULL is always the last tag/val pair in the aux vector // and can be treated like EOF. case _AT_NULL: return 0, errAuxvNoVDSO } } } // format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)' type elfNoteHeader struct { NameSize int32 DescSize int32 Type int32 } // vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in // the ELF notes section of the binary provided by the reader. func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) { hdr, err := internal.NewSafeELFFile(r) if err != nil { return 0, fmt.Errorf("reading vDSO ELF: %w", err) } sections := hdr.SectionsByType(elf.SHT_NOTE) if len(sections) == 0 { return 0, fmt.Errorf("no note section found in vDSO ELF") } for _, sec := range sections { sr := sec.Open() var n elfNoteHeader // Read notes until we find one named 'Linux'. for { if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil { if errors.Is(err, io.EOF) { // We looked at all the notes in this section break } return 0, fmt.Errorf("reading note header: %w", err) } // If a note name is defined, it follows the note header. var name string if n.NameSize > 0 { // Read the note name, aligned to 4 bytes. buf := make([]byte, internal.Align(n.NameSize, 4)) if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil { return 0, fmt.Errorf("reading note name: %w", err) } // Read nul-terminated string. name = unix.ByteSliceToString(buf[:n.NameSize]) } // If a note descriptor is defined, it follows the name. // It is possible for a note to have a descriptor but not a name. if n.DescSize > 0 { // LINUX_VERSION_CODE is a uint32 value. if name == "Linux" && n.DescSize == 4 && n.Type == 0 { var version uint32 if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil { return 0, fmt.Errorf("reading note descriptor: %w", err) } return version, nil } // Discard the note descriptor if it exists but we're not interested in it. if _, err := io.CopyN(io.Discard, sr, int64(internal.Align(n.DescSize, 4))); err != nil { return 0, err } } } } return 0, fmt.Errorf("no Linux note in ELF") } ================================================ FILE: internal/linux/vdso_test.go ================================================ package linux import ( "encoding/binary" "errors" "os" "testing" "github.com/go-quicktest/qt" ) func TestAuxvVDSOMemoryAddress(t *testing.T) { for _, testcase := range []struct { source string is32bit bool address uint64 }{ {"auxv64le.bin", false, 0x7ffd377e5000}, {"auxv32le.bin", true, 0xb7fc3000}, } { t.Run(testcase.source, func(t *testing.T) { av, err := newAuxFileReader("testdata/"+testcase.source, binary.LittleEndian, testcase.is32bit) if err != nil { t.Fatal(err) } t.Cleanup(func() { av.Close() }) addr, err := vdsoMemoryAddress(av) if err != nil { t.Fatal(err) } if uint64(addr) != testcase.address { t.Errorf("Expected vDSO memory address %x, got %x", testcase.address, addr) } }) } } func TestAuxvNoVDSO(t *testing.T) { // Copy of auxv.bin with the vDSO pointer removed. av, err := newAuxFileReader("testdata/auxv64le_no_vdso.bin", binary.LittleEndian, false) if err != nil { t.Fatal(err) } t.Cleanup(func() { av.Close() }) _, err = vdsoMemoryAddress(av) if want, got := errAuxvNoVDSO, err; !errors.Is(got, want) { t.Fatalf("expected error '%v', got: %v", want, got) } } func TestVDSOVersion(t *testing.T) { _, err := vdsoVersion() skipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) } func TestLinuxVersionCodeEmbedded(t *testing.T) { tests := []struct { file string version uint32 }{ { "testdata/vdso.bin", uint32(328828), // 5.4.124 }, { "testdata/vdso_multiple_notes.bin", uint32(328875), // Container Optimized OS v85 with a 5.4.x kernel }, } for _, test := range tests { t.Run(test.file, func(t *testing.T) { vdso, err := os.Open(test.file) if err != nil { t.Fatal(err) } defer vdso.Close() vc, err := vdsoLinuxVersionCode(vdso) if err != nil { t.Fatal(err) } if vc != test.version { t.Errorf("Expected version code %d, got %d", test.version, vc) } }) } } ================================================ FILE: internal/linux/version.go ================================================ package linux import ( "fmt" "sync" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/unix" ) // KernelVersion returns the version of the currently running kernel. var KernelVersion = sync.OnceValues(detectKernelVersion) // detectKernelVersion returns the version of the running kernel. func detectKernelVersion() (internal.Version, error) { vc, err := vdsoVersion() if err != nil { return internal.Version{}, err } return internal.NewVersionFromCode(vc), nil } // KernelRelease returns the release string of the running kernel. // Its format depends on the Linux distribution and corresponds to directory // names in /lib/modules by convention. Some examples are 5.15.17-1-lts and // 4.19.0-16-amd64. func KernelRelease() (string, error) { var uname unix.Utsname if err := unix.Uname(&uname); err != nil { return "", fmt.Errorf("uname failed: %w", err) } return unix.ByteSliceToString(uname.Release[:]), nil } ================================================ FILE: internal/linux/version_test.go ================================================ package linux import ( "testing" "github.com/go-quicktest/qt" ) func TestCurrentKernelVersion(t *testing.T) { _, err := KernelVersion() skipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) } func TestKernelRelease(t *testing.T) { r, err := KernelRelease() skipIfNotSupportedOnOS(t, err) if err != nil { t.Fatal(err) } if r == "" { t.Fatal("unexpected empty kernel release") } } ================================================ FILE: internal/math.go ================================================ package internal // Align returns 'n' updated to 'alignment' boundary. func Align[I Integer](n, alignment I) I { return (n + alignment - 1) / alignment * alignment } // IsPow returns true if n is a power of two. func IsPow[I Integer](n I) bool { return n != 0 && (n&(n-1)) == 0 } // Between returns the value clamped between a and b. func Between[I Integer](val, a, b I) I { lower, upper := a, b if lower > upper { upper, lower = a, b } val = min(val, upper) return max(val, lower) } // Integer represents all possible integer types. // Remove when x/exp/constraints is moved to the standard library. type Integer interface { ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr } // List of integer types known by the Go compiler. Used by TestIntegerConstraint // to warn if a new integer type is introduced. Remove when x/exp/constraints // is moved to the standard library. var integers = []string{"int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64", "uintptr"} ================================================ FILE: internal/math_test.go ================================================ package internal import ( "fmt" "go/importer" "regexp" "slices" "strings" "testing" "github.com/go-quicktest/qt" ) func TestPow(t *testing.T) { tests := []struct { n int r bool }{ {0, false}, {1, true}, {2, true}, {3, false}, {4, true}, {5, false}, {8, true}, } for _, tt := range tests { t.Run(fmt.Sprintf("%d", tt.n), func(t *testing.T) { if want, got := tt.r, IsPow(tt.n); want != got { t.Errorf("unexpected result for n %d; want: %v, got: %v", tt.n, want, got) } }) } } func TestIntegerConstraint(t *testing.T) { rgx := regexp.MustCompile(`^(u)?int([0-9]*|ptr)?$`) pkg, err := importer.Default().Import("reflect") if err != nil { t.Fatal(err) } for _, name := range pkg.Scope().Names() { name = strings.ToLower(name) if !rgx.MatchString(name) { continue } if !slices.Contains(integers, name) { t.Errorf("Go type %s is not in the list of known integer types", name) } } } func TestBetween(t *testing.T) { tests := []struct { val, a, b, r int }{ {0, 1, 2, 1}, {1, 1, 2, 1}, {2, 1, 2, 2}, {3, 1, 2, 2}, {4, 10, 5, 5}, {11, 10, 5, 10}, } for _, tt := range tests { t.Run(fmt.Sprintf("%d between %d and %d", tt.val, tt.a, tt.b), func(t *testing.T) { qt.Assert(t, qt.Equals(Between(tt.val, tt.a, tt.b), tt.r)) }) } } ================================================ FILE: internal/nil.go ================================================ package internal import ( "fmt" "reflect" ) // IsNil returns an error if i is a nil pointer or a nil interface. Otherwise, // it returns nil. func IsNil(i any) error { v := reflect.ValueOf(i) switch v.Kind() { case reflect.Invalid: return fmt.Errorf("nil interface") case reflect.Pointer: if v.IsNil() { return fmt.Errorf("nil %T", i) } } return nil } ================================================ FILE: internal/output.go ================================================ package internal import ( "bytes" "errors" "go/format" "go/scanner" "io" "reflect" "strings" "unicode" ) // Identifier turns a C style type or field name into an exportable Go equivalent. func Identifier(str string) string { prev := rune(-1) return strings.Map(func(r rune) rune { // See https://golang.org/ref/spec#Identifiers switch { case unicode.IsLetter(r): if prev == -1 { r = unicode.ToUpper(r) } case r == '_': switch { // The previous rune was deleted, or we are at the // beginning of the string. case prev == -1: fallthrough // The previous rune is a lower case letter or a digit. case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)): // delete the current rune, and force the // next character to be uppercased. r = -1 } case unicode.IsDigit(r): default: // Delete the current rune. prev is unchanged. return -1 } prev = r return r }, str) } // WriteFormatted outputs a formatted src into out. // // If formatting fails it returns an informative error message. func WriteFormatted(src []byte, out io.Writer) error { formatted, err := format.Source(src) if err == nil { _, err = out.Write(formatted) return err } var el scanner.ErrorList if !errors.As(err, &el) { return err } var nel scanner.ErrorList for _, err := range el { if !err.Pos.IsValid() { nel = append(nel, err) continue } buf := src[err.Pos.Offset:] nl := bytes.IndexRune(buf, '\n') if nl == -1 { nel = append(nel, err) continue } err.Msg += ": " + string(buf[:nl]) nel = append(nel, err) } return nel } // GoTypeName is like %T, but elides the package name. // // Pointers to a type are peeled off. func GoTypeName(t any) string { rT := reflect.TypeOf(t) for rT.Kind() == reflect.Pointer { rT = rT.Elem() } name := rT.Name() if pkgPath := rT.PkgPath(); pkgPath != "" { name = strings.ReplaceAll(name, pkgPath+".", "") } return name } ================================================ FILE: internal/output_test.go ================================================ package internal import ( "testing" "github.com/go-quicktest/qt" ) func TestIdentifier(t *testing.T) { testcases := []struct { in, out string }{ {".rodata", "Rodata"}, {"_foo_bar_", "FooBar"}, {"ipv6_test", "Ipv6Test"}, {"FOO_BAR", "FOO_BAR"}, {"FOO_", "FOO_"}, {"FOO__BAR", "FOO__BAR"}, {"FOO___BAR", "FOO___BAR"}, {"_FOO__BAR", "FOO__BAR"}, {"__FOO__BAR", "FOO__BAR"}, } for _, tc := range testcases { have := Identifier(tc.in) if have != tc.out { t.Errorf("Expected %q as output of %q, got %q", tc.out, tc.in, have) } } } type foo struct{} func TestGoTypeName(t *testing.T) { type bar[T any] struct{} qt.Assert(t, qt.Equals(GoTypeName(foo{}), "foo")) qt.Assert(t, qt.Equals(GoTypeName(new(foo)), "foo")) qt.Assert(t, qt.Equals(GoTypeName(new(*foo)), "foo")) qt.Assert(t, qt.Equals(GoTypeName(bar[int]{}), "bar[int]")) qt.Assert(t, qt.Equals(GoTypeName(bar[foo]{}), "bar[foo]")) qt.Assert(t, qt.Equals(GoTypeName(bar[testing.T]{}), "bar[testing.T]")) } ================================================ FILE: internal/platform/constants.go ================================================ package platform import "fmt" // Values used to tag platform specific constants. // // The value for Linux is zero so that existing constants do not change. const ( LinuxTag = uint32(iota) << platformShift WindowsTag ) const ( platformMax = 1<<3 - 1 // most not exceed 3 bits to avoid setting the high bit platformShift = 28 platformMask = platformMax << platformShift ) func tagForPlatform(platform string) (uint32, error) { switch platform { case Linux: return LinuxTag, nil case Windows: return WindowsTag, nil default: return 0, fmt.Errorf("unrecognized platform: %s", platform) } } func platformForConstant(c uint32) string { tag := uint32(c & platformMask) switch tag { case LinuxTag: return Linux case WindowsTag: return Windows default: return "" } } // Encode a platform and a value into a tagged constant. // // Returns an error if platform is unknown or c is out of bounds. func EncodeConstant[T ~uint32](platform string, c uint32) (T, error) { if c>>platformShift > 0 { return 0, fmt.Errorf("invalid constant 0x%x", c) } tag, err := tagForPlatform(platform) if err != nil { return 0, err } return T(tag | c), nil } // Decode a platform and a value from a tagged constant. func DecodeConstant[T ~uint32](c T) (string, uint32) { v := uint32(c) & ^uint32(platformMask) return platformForConstant(uint32(c)), v } ================================================ FILE: internal/platform/constants_test.go ================================================ package platform import ( "testing" "github.com/go-quicktest/qt" ) func TestConstant(t *testing.T) { const maxConstant = uint32(1<= len(set.Val) { return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal) } // Write the signal bit into its corresponding word at the corrected offset. set.Val[word] |= 1 << (bit % wordBits) return nil } ================================================ FILE: internal/sys/signals_test.go ================================================ //go:build linux package sys import ( "runtime" "testing" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/unix" ) func TestSigset(t *testing.T) { const maxSignal = unix.Signal(unsafe.Sizeof(unix.Sigset_t{}) * 8) // Type-infer a sigset word. This is a typed uint of 32 or 64 bits depending // on the target architecture, so we can't use an untyped uint. zero := unix.Sigset_t{}.Val[0] words := len(unix.Sigset_t{}.Val) var want, got unix.Sigset_t // Flip the first bit of the first word. if err := sigsetAdd(&got, 1); err != nil { t.Fatal(err) } want.Val[0] = 1 if want != got { t.Fatalf("expected first word to be 0x%x, got: 0x%x", want, got) } // And the last bit of the last word. if err := sigsetAdd(&got, maxSignal); err != nil { t.Fatal(err) } want.Val[words-1] = ^(^zero >> 1) if want != got { t.Fatalf("expected last word to be 0x%x, got: 0x%x", want, got) } if err := sigsetAdd(&got, maxSignal+1); err == nil { t.Fatal("expected out-of-bounds add to be rejected") } if err := sigsetAdd(&got, -1); err == nil { t.Fatal("expected negative signal to be rejected") } } func TestProfilerSignal(t *testing.T) { // Additional goroutine lock to make the PthreadSigmask below execute on the // same OS thread as the functions under test. UnlockOSThread needs to be // called as many times as LockOSThread to unlock the goroutine. runtime.LockOSThread() defer runtime.UnlockOSThread() var old unix.Sigset_t if err := unix.PthreadSigmask(0, nil, &old); err != nil { t.Fatal("get sigmask:", err) } maskProfilerSignal() var have unix.Sigset_t if err := unix.PthreadSigmask(0, nil, &have); err != nil { t.Fatal("get sigmask:", err) } want := have qt.Assert(t, qt.IsNil(sigsetAdd(&want, unix.SIGPROF))) qt.Assert(t, qt.Equals(have, want)) unmaskProfilerSignal() if err := unix.PthreadSigmask(0, nil, &have); err != nil { t.Fatal("get sigmask:", err) } qt.Assert(t, qt.Equals(have, old)) } ================================================ FILE: internal/sys/syscall.go ================================================ package sys import ( "runtime" "unsafe" "github.com/cilium/ebpf/internal/unix" ) // ENOTSUPP is a Linux internal error code that has leaked into UAPI. // // It is not the same as ENOTSUP or EOPNOTSUPP. const ENOTSUPP = unix.Errno(524) // Info is implemented by all structs that can be passed to the ObjInfo syscall. // // MapInfo // ProgInfo // LinkInfo // BtfInfo type Info interface { info() (unsafe.Pointer, uint32) } var _ Info = (*MapInfo)(nil) func (i *MapInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } var _ Info = (*ProgInfo)(nil) func (i *ProgInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } var _ Info = (*LinkInfo)(nil) func (i *LinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *TracingLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *CgroupLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *NetNsLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *XDPLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *TcxLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *NetfilterLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *NetkitLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *KprobeMultiLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *UprobeMultiLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *RawTracepointLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *KprobeLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *UprobeLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *TracepointLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *EventLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } var _ Info = (*BtfInfo)(nil) func (i *BtfInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } func (i *PerfEventLinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } // ObjInfo retrieves information about a BPF Fd. // // info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo. func ObjInfo(fd *FD, info Info) error { ptr, len := info.info() err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{ BpfFd: fd.Uint(), InfoLen: len, Info: UnsafePointer(ptr), }) runtime.KeepAlive(fd) return err } // BPFObjName is a null-terminated string made up of // 'A-Za-z0-9_' characters. type ObjName [BPF_OBJ_NAME_LEN]byte // NewObjName truncates the result if it is too long. func NewObjName(name string) ObjName { var result ObjName copy(result[:BPF_OBJ_NAME_LEN-1], name) return result } // LogLevel controls the verbosity of the kernel's eBPF program verifier. type LogLevel uint32 const ( BPF_LOG_LEVEL1 LogLevel = 1 << iota BPF_LOG_LEVEL2 BPF_LOG_STATS ) // MapID uniquely identifies a bpf_map. type MapID uint32 // ProgramID uniquely identifies a bpf_map. type ProgramID uint32 // LinkID uniquely identifies a bpf_link. type LinkID uint32 // BTFID uniquely identifies a BTF blob loaded into the kernel. type BTFID uint32 // TypeID identifies a type in a BTF blob. type TypeID uint32 // Flags used by bpf_mprog. const ( BPF_F_REPLACE = 1 << (iota + 2) BPF_F_BEFORE BPF_F_AFTER BPF_F_ID BPF_F_LINK_MPROG = 1 << 13 // aka BPF_F_LINK ) // Flags used by BPF_PROG_LOAD. const ( BPF_F_SLEEPABLE = 1 << 4 BPF_F_XDP_HAS_FRAGS = 1 << 5 BPF_F_XDP_DEV_BOUND_ONLY = 1 << 6 ) const BPF_TAG_SIZE = 8 const BPF_OBJ_NAME_LEN = 16 // wrappedErrno wraps [unix.Errno] to prevent direct comparisons with // syscall.E* or unix.E* constants. // // You should never export an error of this type. type wrappedErrno struct { unix.Errno } func (we wrappedErrno) Unwrap() error { return we.Errno } func (we wrappedErrno) Error() string { if we.Errno == ENOTSUPP { return "operation not supported" } return we.Errno.Error() } type syscallError struct { error errno unix.Errno } func Error(err error, errno unix.Errno) error { return &syscallError{err, errno} } func (se *syscallError) Is(target error) bool { return target == se.error } func (se *syscallError) Unwrap() error { return se.errno } ================================================ FILE: internal/sys/syscall_other.go ================================================ //go:build !windows package sys import ( "fmt" "os" "path/filepath" "runtime" "strings" "unsafe" "github.com/cilium/ebpf/internal/unix" ) // BPF wraps SYS_BPF. // // Any pointers contained in attr must use the Pointer type from this package. func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { // Prevent the Go profiler from repeatedly interrupting the verifier, // which could otherwise lead to a livelock due to receiving EAGAIN. if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN { maskProfilerSignal() defer unmaskProfilerSignal() } for { r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) runtime.KeepAlive(attr) // As of ~4.20 the verifier can be interrupted by a signal, // and returns EAGAIN in that case. if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD { continue } var err error if errNo != 0 { err = wrappedErrno{errNo} } return r1, err } } // ObjGetTyped wraps [ObjGet] with a readlink call to extract the type of the // underlying bpf object. func ObjGetTyped(attr *ObjGetAttr) (*FD, ObjType, error) { fd, err := ObjGet(attr) if err != nil { return nil, 0, err } typ, err := readType(fd) if err != nil { _ = fd.Close() return nil, 0, fmt.Errorf("reading fd type: %w", err) } return fd, typ, nil } // readType returns the bpf object type of the file descriptor by calling // readlink(3). Returns an error if the file descriptor does not represent a bpf // object. func readType(fd *FD) (ObjType, error) { s, err := os.Readlink(filepath.Join("/proc/self/fd/", fd.String())) if err != nil { return 0, fmt.Errorf("readlink fd %d: %w", fd.Int(), err) } s = strings.TrimPrefix(s, "anon_inode:") switch s { case "bpf-map": return BPF_TYPE_MAP, nil case "bpf-prog": return BPF_TYPE_PROG, nil case "bpf-link": return BPF_TYPE_LINK, nil } return 0, fmt.Errorf("unknown type %s of fd %d", s, fd.Int()) } ================================================ FILE: internal/sys/syscall_test.go ================================================ package sys import ( "errors" "math" "testing" "unsafe" "github.com/cilium/ebpf/internal/testutils/testmain" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" ) func TestBPF(t *testing.T) { fd, err := MapCreate(&MapCreateAttr{ MapType: BPF_MAP_TYPE_HASH, KeySize: 4, ValueSize: 4, MaxEntries: 1, }) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(fd.Close())) } func TestBPFAllocations(t *testing.T) { n := testing.AllocsPerRun(10, func() { var attr struct { Foo uint64 } BPF(math.MaxUint32, unsafe.Pointer(&attr), 0) }) qt.Assert(t, qt.Equals(n, 0)) } func TestObjName(t *testing.T) { name := NewObjName("more_than_16_characters_long") if name[len(name)-1] != 0 { t.Error("NewBPFObjName doesn't null terminate") } if len(name) != BPF_OBJ_NAME_LEN { t.Errorf("Name is %d instead of %d bytes long", len(name), BPF_OBJ_NAME_LEN) } } func TestWrappedErrno(t *testing.T) { a := error(wrappedErrno{unix.EINVAL}) b := error(unix.EINVAL) if a == b { t.Error("wrappedErrno is comparable to plain errno") } if !errors.Is(a, b) { t.Error("errors.Is(wrappedErrno, errno) returns false") } if errors.Is(a, unix.EAGAIN) { t.Error("errors.Is(wrappedErrno, EAGAIN) returns true") } notsupp := wrappedErrno{ENOTSUPP} qt.Assert(t, qt.StringContains(notsupp.Error(), "operation not supported")) } func TestSyscallError(t *testing.T) { err := errors.New("foo") foo := Error(err, unix.EINVAL) if !errors.Is(foo, unix.EINVAL) { t.Error("SyscallError is not the wrapped errno") } if !errors.Is(foo, err) { t.Error("SyscallError is not the wrapped error") } if errors.Is(unix.EINVAL, foo) { t.Error("Errno is the SyscallError") } if errors.Is(err, foo) { t.Error("Error is the SyscallError") } } func TestMain(m *testing.M) { testmain.Run(m) } ================================================ FILE: internal/sys/syscall_windows.go ================================================ package sys import ( "fmt" "syscall" "unsafe" "golang.org/x/sys/windows" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/efw" "github.com/cilium/ebpf/internal/unix" ) // BPF calls the BPF syscall wrapper in ebpfapi.dll. // // Any pointers contained in attr must use the Pointer type from this package. // // The implementation lives in https://github.com/microsoft/ebpf-for-windows/blob/main/libs/api/bpf_syscall.cpp func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { // On Linux we need to guard against preemption by the profiler here. On // Windows it seems like a cgocall may not be preempted: // https://github.com/golang/go/blob/8b51146c698bcfcc2c2b73fa9390db5230f2ce0a/src/runtime/os_windows.go#L1240-L1246 addr, err := efw.BPF.Find() if err != nil { return 0, err } // Using [LazyProc.Call] forces attr to escape, which isn't the case when using syscall.Syscall directly. r1, _, lastError := syscall.SyscallN(addr, uintptr(cmd), uintptr(attr), size) if ret := int(efw.Int(r1)); ret < 0 { errNo := unix.Errno(-ret) if errNo == unix.EINVAL && lastError == windows.ERROR_CALL_NOT_IMPLEMENTED { return 0, internal.ErrNotSupportedOnOS } return 0, wrappedErrno{errNo} } return r1, nil } // ObjGetTyped retrieves an pinned object and its type. func ObjGetTyped(attr *ObjGetAttr) (*FD, ObjType, error) { fd, err := ObjGet(attr) if err != nil { return nil, 0, err } efwType, err := efw.EbpfObjectGetInfoByFd(fd.Int(), nil, nil) if err != nil { _ = fd.Close() return nil, 0, err } switch efwType { case efw.EBPF_OBJECT_UNKNOWN: return fd, BPF_TYPE_UNSPEC, nil case efw.EBPF_OBJECT_MAP: return fd, BPF_TYPE_MAP, nil case efw.EBPF_OBJECT_LINK: return fd, BPF_TYPE_LINK, nil case efw.EBPF_OBJECT_PROGRAM: return fd, BPF_TYPE_PROG, nil default: return nil, 0, fmt.Errorf("unrecognized object type %v", efwType) } } ================================================ FILE: internal/sys/types.go ================================================ // Code generated by internal/cmd/gentypes; DO NOT EDIT. package sys import ( "structs" "unsafe" ) const ( BPF_ADJ_ROOM_ENCAP_L2_MASK = 255 BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56 BPF_ANY = 0 BPF_CSUM_LEVEL_DEC = 2 BPF_CSUM_LEVEL_INC = 1 BPF_CSUM_LEVEL_QUERY = 0 BPF_CSUM_LEVEL_RESET = 3 BPF_EXIST = 2 BPF_FIB_LKUP_RET_BLACKHOLE = 1 BPF_FIB_LKUP_RET_FRAG_NEEDED = 8 BPF_FIB_LKUP_RET_FWD_DISABLED = 5 BPF_FIB_LKUP_RET_NOT_FWDED = 4 BPF_FIB_LKUP_RET_NO_NEIGH = 7 BPF_FIB_LKUP_RET_NO_SRC_ADDR = 9 BPF_FIB_LKUP_RET_PROHIBIT = 3 BPF_FIB_LKUP_RET_SUCCESS = 0 BPF_FIB_LKUP_RET_UNREACHABLE = 2 BPF_FIB_LKUP_RET_UNSUPP_LWT = 6 BPF_FIB_LOOKUP_DIRECT = 1 BPF_FIB_LOOKUP_MARK = 32 BPF_FIB_LOOKUP_OUTPUT = 2 BPF_FIB_LOOKUP_SKIP_NEIGH = 4 BPF_FIB_LOOKUP_SRC = 16 BPF_FIB_LOOKUP_TBID = 8 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = 1 BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = 4 BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = 2 BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = 128 BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = 256 BPF_F_ADJ_ROOM_ENCAP_L2_ETH = 64 BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = 2 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = 4 BPF_F_ADJ_ROOM_ENCAP_L4_GRE = 8 BPF_F_ADJ_ROOM_ENCAP_L4_UDP = 16 BPF_F_ADJ_ROOM_FIXED_GSO = 1 BPF_F_ADJ_ROOM_NO_CSUM_RESET = 32 BPF_F_BPRM_SECUREEXEC = 1 BPF_F_BROADCAST = 8 BPF_F_CLONE = 512 BPF_F_CTXLEN_MASK = 4503595332403200 BPF_F_CURRENT_CPU = 4294967295 BPF_F_CURRENT_NETNS = 18446744073709551615 BPF_F_DONT_FRAGMENT = 4 BPF_F_EXCLUDE_INGRESS = 16 BPF_F_FAST_STACK_CMP = 512 BPF_F_GET_BRANCH_RECORDS_SIZE = 1 BPF_F_HDR_FIELD_MASK = 15 BPF_F_INDEX_MASK = 4294967295 BPF_F_INGRESS = 1 BPF_F_INNER_MAP = 4096 BPF_F_INVALIDATE_HASH = 2 BPF_F_IPV6 = 128 BPF_F_KPROBE_MULTI_RETURN = 1 BPF_F_LINK = 8192 BPF_F_LOCK = 4 BPF_F_MARK_ENFORCE = 64 BPF_F_MARK_MANGLED_0 = 32 BPF_F_MMAPABLE = 1024 BPF_F_NEIGH = 65536 BPF_F_NEXTHOP = 262144 BPF_F_NO_COMMON_LRU = 2 BPF_F_NO_PREALLOC = 1 BPF_F_NO_TUNNEL_KEY = 16 BPF_F_NO_USER_CONV = 262144 BPF_F_NUMA_NODE = 4 BPF_F_PATH_FD = 16384 BPF_F_PEER = 131072 BPF_F_PRESERVE_ELEMS = 2048 BPF_F_PSEUDO_HDR = 16 BPF_F_RDONLY = 8 BPF_F_RDONLY_PROG = 128 BPF_F_RECOMPUTE_CSUM = 1 BPF_F_REUSE_STACKID = 1024 BPF_F_SEGV_ON_FAULT = 131072 BPF_F_SEQ_NUMBER = 8 BPF_F_SKIP_FIELD_MASK = 255 BPF_F_STACK_BUILD_ID = 32 BPF_F_SYSCTL_BASE_NAME = 1 BPF_F_TIMER_ABS = 1 BPF_F_TIMER_CPU_PIN = 2 BPF_F_TOKEN_FD = 65536 BPF_F_TUNINFO_FLAGS = 16 BPF_F_TUNINFO_IPV6 = 1 BPF_F_UPROBE_MULTI_RETURN = 1 BPF_F_USER_BUILD_ID = 2048 BPF_F_USER_STACK = 256 BPF_F_VTYPE_BTF_OBJ_FD = 32768 BPF_F_WRONLY = 16 BPF_F_WRONLY_PROG = 256 BPF_F_ZERO_CSUM_TX = 2 BPF_F_ZERO_SEED = 64 BPF_LOAD_HDR_OPT_TCP_SYN = 1 BPF_LOCAL_STORAGE_GET_F_CREATE = 1 BPF_MAX_LOOPS = 8388608 BPF_MAX_TIMED_LOOPS = 65535 BPF_MAX_TRAMP_LINKS = 38 BPF_NOEXIST = 1 BPF_RB_AVAIL_DATA = 0 BPF_RB_CONS_POS = 2 BPF_RB_FORCE_WAKEUP = 2 BPF_RB_NO_WAKEUP = 1 BPF_RB_PROD_POS = 3 BPF_RB_RING_SIZE = 1 BPF_REG_0 = 0 BPF_REG_1 = 1 BPF_REG_10 = 10 BPF_REG_2 = 2 BPF_REG_3 = 3 BPF_REG_4 = 4 BPF_REG_5 = 5 BPF_REG_6 = 6 BPF_REG_7 = 7 BPF_REG_8 = 8 BPF_REG_9 = 9 BPF_RINGBUF_BUSY_BIT = 2147483648 BPF_RINGBUF_DISCARD_BIT = 1073741824 BPF_RINGBUF_HDR_SZ = 8 BPF_SKB_CLOCK_MONOTONIC = 1 BPF_SKB_CLOCK_REALTIME = 0 BPF_SKB_CLOCK_TAI = 2 BPF_SKB_TSTAMP_DELIVERY_MONO = 1 BPF_SKB_TSTAMP_UNSPEC = 0 BPF_SK_LOOKUP_F_NO_REUSEPORT = 2 BPF_SK_LOOKUP_F_REPLACE = 1 BPF_SK_STORAGE_GET_F_CREATE = 1 BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB = 4 BPF_SOCK_OPS_ALL_CB_FLAGS = 127 BPF_SOCK_OPS_BASE_RTT = 7 BPF_SOCK_OPS_HDR_OPT_LEN_CB = 14 BPF_SOCK_OPS_NEEDS_ECN = 6 BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = 16 BPF_SOCK_OPS_PARSE_HDR_OPT_CB = 13 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = 32 BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB = 5 BPF_SOCK_OPS_RETRANS_CB = 9 BPF_SOCK_OPS_RETRANS_CB_FLAG = 2 BPF_SOCK_OPS_RTO_CB = 8 BPF_SOCK_OPS_RTO_CB_FLAG = 1 BPF_SOCK_OPS_RTT_CB = 12 BPF_SOCK_OPS_RTT_CB_FLAG = 8 BPF_SOCK_OPS_RWND_INIT = 2 BPF_SOCK_OPS_STATE_CB = 10 BPF_SOCK_OPS_STATE_CB_FLAG = 4 BPF_SOCK_OPS_TCP_CONNECT_CB = 3 BPF_SOCK_OPS_TCP_LISTEN_CB = 11 BPF_SOCK_OPS_TIMEOUT_INIT = 1 BPF_SOCK_OPS_TSTAMP_ACK_CB = 19 BPF_SOCK_OPS_TSTAMP_SCHED_CB = 16 BPF_SOCK_OPS_TSTAMP_SENDMSG_CB = 20 BPF_SOCK_OPS_TSTAMP_SND_HW_CB = 18 BPF_SOCK_OPS_TSTAMP_SND_SW_CB = 17 BPF_SOCK_OPS_VOID = 0 BPF_SOCK_OPS_WRITE_HDR_OPT_CB = 15 BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = 64 BPF_STREAM_MAX_CAPACITY = 100000 BPF_TASK_ITER_ALL_PROCS = 0 BPF_TASK_ITER_ALL_THREADS = 1 BPF_TASK_ITER_PROC_THREADS = 2 BPF_TCP_BOUND_INACTIVE = 13 BPF_TCP_CLOSE = 7 BPF_TCP_CLOSE_WAIT = 8 BPF_TCP_CLOSING = 11 BPF_TCP_ESTABLISHED = 1 BPF_TCP_FIN_WAIT1 = 4 BPF_TCP_FIN_WAIT2 = 5 BPF_TCP_LAST_ACK = 9 BPF_TCP_LISTEN = 10 BPF_TCP_MAX_STATES = 14 BPF_TCP_NEW_SYN_RECV = 12 BPF_TCP_SYN_RECV = 3 BPF_TCP_SYN_SENT = 2 BPF_TCP_TIME_WAIT = 6 BPF_WRITE_HDR_TCP_CURRENT_MSS = 1 BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2 BPF_XFRM_STATE_OPTS_SZ = 36 ) type AdjRoomMode uint32 const ( BPF_ADJ_ROOM_NET AdjRoomMode = 0 BPF_ADJ_ROOM_MAC AdjRoomMode = 1 ) type AttachType uint32 const ( BPF_CGROUP_INET_INGRESS AttachType = 0 BPF_CGROUP_INET_EGRESS AttachType = 1 BPF_CGROUP_INET_SOCK_CREATE AttachType = 2 BPF_CGROUP_SOCK_OPS AttachType = 3 BPF_SK_SKB_STREAM_PARSER AttachType = 4 BPF_SK_SKB_STREAM_VERDICT AttachType = 5 BPF_CGROUP_DEVICE AttachType = 6 BPF_SK_MSG_VERDICT AttachType = 7 BPF_CGROUP_INET4_BIND AttachType = 8 BPF_CGROUP_INET6_BIND AttachType = 9 BPF_CGROUP_INET4_CONNECT AttachType = 10 BPF_CGROUP_INET6_CONNECT AttachType = 11 BPF_CGROUP_INET4_POST_BIND AttachType = 12 BPF_CGROUP_INET6_POST_BIND AttachType = 13 BPF_CGROUP_UDP4_SENDMSG AttachType = 14 BPF_CGROUP_UDP6_SENDMSG AttachType = 15 BPF_LIRC_MODE2 AttachType = 16 BPF_FLOW_DISSECTOR AttachType = 17 BPF_CGROUP_SYSCTL AttachType = 18 BPF_CGROUP_UDP4_RECVMSG AttachType = 19 BPF_CGROUP_UDP6_RECVMSG AttachType = 20 BPF_CGROUP_GETSOCKOPT AttachType = 21 BPF_CGROUP_SETSOCKOPT AttachType = 22 BPF_TRACE_RAW_TP AttachType = 23 BPF_TRACE_FENTRY AttachType = 24 BPF_TRACE_FEXIT AttachType = 25 BPF_MODIFY_RETURN AttachType = 26 BPF_LSM_MAC AttachType = 27 BPF_TRACE_ITER AttachType = 28 BPF_CGROUP_INET4_GETPEERNAME AttachType = 29 BPF_CGROUP_INET6_GETPEERNAME AttachType = 30 BPF_CGROUP_INET4_GETSOCKNAME AttachType = 31 BPF_CGROUP_INET6_GETSOCKNAME AttachType = 32 BPF_XDP_DEVMAP AttachType = 33 BPF_CGROUP_INET_SOCK_RELEASE AttachType = 34 BPF_XDP_CPUMAP AttachType = 35 BPF_SK_LOOKUP AttachType = 36 BPF_XDP AttachType = 37 BPF_SK_SKB_VERDICT AttachType = 38 BPF_SK_REUSEPORT_SELECT AttachType = 39 BPF_SK_REUSEPORT_SELECT_OR_MIGRATE AttachType = 40 BPF_PERF_EVENT AttachType = 41 BPF_TRACE_KPROBE_MULTI AttachType = 42 BPF_LSM_CGROUP AttachType = 43 BPF_STRUCT_OPS AttachType = 44 BPF_NETFILTER AttachType = 45 BPF_TCX_INGRESS AttachType = 46 BPF_TCX_EGRESS AttachType = 47 BPF_TRACE_UPROBE_MULTI AttachType = 48 BPF_CGROUP_UNIX_CONNECT AttachType = 49 BPF_CGROUP_UNIX_SENDMSG AttachType = 50 BPF_CGROUP_UNIX_RECVMSG AttachType = 51 BPF_CGROUP_UNIX_GETPEERNAME AttachType = 52 BPF_CGROUP_UNIX_GETSOCKNAME AttachType = 53 BPF_NETKIT_PRIMARY AttachType = 54 BPF_NETKIT_PEER AttachType = 55 BPF_TRACE_KPROBE_SESSION AttachType = 56 BPF_TRACE_UPROBE_SESSION AttachType = 57 __MAX_BPF_ATTACH_TYPE AttachType = 58 ) type Cmd uint32 const ( BPF_MAP_CREATE Cmd = 0 BPF_MAP_LOOKUP_ELEM Cmd = 1 BPF_MAP_UPDATE_ELEM Cmd = 2 BPF_MAP_DELETE_ELEM Cmd = 3 BPF_MAP_GET_NEXT_KEY Cmd = 4 BPF_PROG_LOAD Cmd = 5 BPF_OBJ_PIN Cmd = 6 BPF_OBJ_GET Cmd = 7 BPF_PROG_ATTACH Cmd = 8 BPF_PROG_DETACH Cmd = 9 BPF_PROG_TEST_RUN Cmd = 10 BPF_PROG_RUN Cmd = 10 BPF_PROG_GET_NEXT_ID Cmd = 11 BPF_MAP_GET_NEXT_ID Cmd = 12 BPF_PROG_GET_FD_BY_ID Cmd = 13 BPF_MAP_GET_FD_BY_ID Cmd = 14 BPF_OBJ_GET_INFO_BY_FD Cmd = 15 BPF_PROG_QUERY Cmd = 16 BPF_RAW_TRACEPOINT_OPEN Cmd = 17 BPF_BTF_LOAD Cmd = 18 BPF_BTF_GET_FD_BY_ID Cmd = 19 BPF_TASK_FD_QUERY Cmd = 20 BPF_MAP_LOOKUP_AND_DELETE_ELEM Cmd = 21 BPF_MAP_FREEZE Cmd = 22 BPF_BTF_GET_NEXT_ID Cmd = 23 BPF_MAP_LOOKUP_BATCH Cmd = 24 BPF_MAP_LOOKUP_AND_DELETE_BATCH Cmd = 25 BPF_MAP_UPDATE_BATCH Cmd = 26 BPF_MAP_DELETE_BATCH Cmd = 27 BPF_LINK_CREATE Cmd = 28 BPF_LINK_UPDATE Cmd = 29 BPF_LINK_GET_FD_BY_ID Cmd = 30 BPF_LINK_GET_NEXT_ID Cmd = 31 BPF_ENABLE_STATS Cmd = 32 BPF_ITER_CREATE Cmd = 33 BPF_LINK_DETACH Cmd = 34 BPF_PROG_BIND_MAP Cmd = 35 BPF_TOKEN_CREATE Cmd = 36 BPF_PROG_STREAM_READ_BY_FD Cmd = 37 __MAX_BPF_CMD Cmd = 38 ) type FunctionId uint32 const ( BPF_FUNC_unspec FunctionId = 0 BPF_FUNC_map_lookup_elem FunctionId = 1 BPF_FUNC_map_update_elem FunctionId = 2 BPF_FUNC_map_delete_elem FunctionId = 3 BPF_FUNC_probe_read FunctionId = 4 BPF_FUNC_ktime_get_ns FunctionId = 5 BPF_FUNC_trace_printk FunctionId = 6 BPF_FUNC_get_prandom_u32 FunctionId = 7 BPF_FUNC_get_smp_processor_id FunctionId = 8 BPF_FUNC_skb_store_bytes FunctionId = 9 BPF_FUNC_l3_csum_replace FunctionId = 10 BPF_FUNC_l4_csum_replace FunctionId = 11 BPF_FUNC_tail_call FunctionId = 12 BPF_FUNC_clone_redirect FunctionId = 13 BPF_FUNC_get_current_pid_tgid FunctionId = 14 BPF_FUNC_get_current_uid_gid FunctionId = 15 BPF_FUNC_get_current_comm FunctionId = 16 BPF_FUNC_get_cgroup_classid FunctionId = 17 BPF_FUNC_skb_vlan_push FunctionId = 18 BPF_FUNC_skb_vlan_pop FunctionId = 19 BPF_FUNC_skb_get_tunnel_key FunctionId = 20 BPF_FUNC_skb_set_tunnel_key FunctionId = 21 BPF_FUNC_perf_event_read FunctionId = 22 BPF_FUNC_redirect FunctionId = 23 BPF_FUNC_get_route_realm FunctionId = 24 BPF_FUNC_perf_event_output FunctionId = 25 BPF_FUNC_skb_load_bytes FunctionId = 26 BPF_FUNC_get_stackid FunctionId = 27 BPF_FUNC_csum_diff FunctionId = 28 BPF_FUNC_skb_get_tunnel_opt FunctionId = 29 BPF_FUNC_skb_set_tunnel_opt FunctionId = 30 BPF_FUNC_skb_change_proto FunctionId = 31 BPF_FUNC_skb_change_type FunctionId = 32 BPF_FUNC_skb_under_cgroup FunctionId = 33 BPF_FUNC_get_hash_recalc FunctionId = 34 BPF_FUNC_get_current_task FunctionId = 35 BPF_FUNC_probe_write_user FunctionId = 36 BPF_FUNC_current_task_under_cgroup FunctionId = 37 BPF_FUNC_skb_change_tail FunctionId = 38 BPF_FUNC_skb_pull_data FunctionId = 39 BPF_FUNC_csum_update FunctionId = 40 BPF_FUNC_set_hash_invalid FunctionId = 41 BPF_FUNC_get_numa_node_id FunctionId = 42 BPF_FUNC_skb_change_head FunctionId = 43 BPF_FUNC_xdp_adjust_head FunctionId = 44 BPF_FUNC_probe_read_str FunctionId = 45 BPF_FUNC_get_socket_cookie FunctionId = 46 BPF_FUNC_get_socket_uid FunctionId = 47 BPF_FUNC_set_hash FunctionId = 48 BPF_FUNC_setsockopt FunctionId = 49 BPF_FUNC_skb_adjust_room FunctionId = 50 BPF_FUNC_redirect_map FunctionId = 51 BPF_FUNC_sk_redirect_map FunctionId = 52 BPF_FUNC_sock_map_update FunctionId = 53 BPF_FUNC_xdp_adjust_meta FunctionId = 54 BPF_FUNC_perf_event_read_value FunctionId = 55 BPF_FUNC_perf_prog_read_value FunctionId = 56 BPF_FUNC_getsockopt FunctionId = 57 BPF_FUNC_override_return FunctionId = 58 BPF_FUNC_sock_ops_cb_flags_set FunctionId = 59 BPF_FUNC_msg_redirect_map FunctionId = 60 BPF_FUNC_msg_apply_bytes FunctionId = 61 BPF_FUNC_msg_cork_bytes FunctionId = 62 BPF_FUNC_msg_pull_data FunctionId = 63 BPF_FUNC_bind FunctionId = 64 BPF_FUNC_xdp_adjust_tail FunctionId = 65 BPF_FUNC_skb_get_xfrm_state FunctionId = 66 BPF_FUNC_get_stack FunctionId = 67 BPF_FUNC_skb_load_bytes_relative FunctionId = 68 BPF_FUNC_fib_lookup FunctionId = 69 BPF_FUNC_sock_hash_update FunctionId = 70 BPF_FUNC_msg_redirect_hash FunctionId = 71 BPF_FUNC_sk_redirect_hash FunctionId = 72 BPF_FUNC_lwt_push_encap FunctionId = 73 BPF_FUNC_lwt_seg6_store_bytes FunctionId = 74 BPF_FUNC_lwt_seg6_adjust_srh FunctionId = 75 BPF_FUNC_lwt_seg6_action FunctionId = 76 BPF_FUNC_rc_repeat FunctionId = 77 BPF_FUNC_rc_keydown FunctionId = 78 BPF_FUNC_skb_cgroup_id FunctionId = 79 BPF_FUNC_get_current_cgroup_id FunctionId = 80 BPF_FUNC_get_local_storage FunctionId = 81 BPF_FUNC_sk_select_reuseport FunctionId = 82 BPF_FUNC_skb_ancestor_cgroup_id FunctionId = 83 BPF_FUNC_sk_lookup_tcp FunctionId = 84 BPF_FUNC_sk_lookup_udp FunctionId = 85 BPF_FUNC_sk_release FunctionId = 86 BPF_FUNC_map_push_elem FunctionId = 87 BPF_FUNC_map_pop_elem FunctionId = 88 BPF_FUNC_map_peek_elem FunctionId = 89 BPF_FUNC_msg_push_data FunctionId = 90 BPF_FUNC_msg_pop_data FunctionId = 91 BPF_FUNC_rc_pointer_rel FunctionId = 92 BPF_FUNC_spin_lock FunctionId = 93 BPF_FUNC_spin_unlock FunctionId = 94 BPF_FUNC_sk_fullsock FunctionId = 95 BPF_FUNC_tcp_sock FunctionId = 96 BPF_FUNC_skb_ecn_set_ce FunctionId = 97 BPF_FUNC_get_listener_sock FunctionId = 98 BPF_FUNC_skc_lookup_tcp FunctionId = 99 BPF_FUNC_tcp_check_syncookie FunctionId = 100 BPF_FUNC_sysctl_get_name FunctionId = 101 BPF_FUNC_sysctl_get_current_value FunctionId = 102 BPF_FUNC_sysctl_get_new_value FunctionId = 103 BPF_FUNC_sysctl_set_new_value FunctionId = 104 BPF_FUNC_strtol FunctionId = 105 BPF_FUNC_strtoul FunctionId = 106 BPF_FUNC_sk_storage_get FunctionId = 107 BPF_FUNC_sk_storage_delete FunctionId = 108 BPF_FUNC_send_signal FunctionId = 109 BPF_FUNC_tcp_gen_syncookie FunctionId = 110 BPF_FUNC_skb_output FunctionId = 111 BPF_FUNC_probe_read_user FunctionId = 112 BPF_FUNC_probe_read_kernel FunctionId = 113 BPF_FUNC_probe_read_user_str FunctionId = 114 BPF_FUNC_probe_read_kernel_str FunctionId = 115 BPF_FUNC_tcp_send_ack FunctionId = 116 BPF_FUNC_send_signal_thread FunctionId = 117 BPF_FUNC_jiffies64 FunctionId = 118 BPF_FUNC_read_branch_records FunctionId = 119 BPF_FUNC_get_ns_current_pid_tgid FunctionId = 120 BPF_FUNC_xdp_output FunctionId = 121 BPF_FUNC_get_netns_cookie FunctionId = 122 BPF_FUNC_get_current_ancestor_cgroup_id FunctionId = 123 BPF_FUNC_sk_assign FunctionId = 124 BPF_FUNC_ktime_get_boot_ns FunctionId = 125 BPF_FUNC_seq_printf FunctionId = 126 BPF_FUNC_seq_write FunctionId = 127 BPF_FUNC_sk_cgroup_id FunctionId = 128 BPF_FUNC_sk_ancestor_cgroup_id FunctionId = 129 BPF_FUNC_ringbuf_output FunctionId = 130 BPF_FUNC_ringbuf_reserve FunctionId = 131 BPF_FUNC_ringbuf_submit FunctionId = 132 BPF_FUNC_ringbuf_discard FunctionId = 133 BPF_FUNC_ringbuf_query FunctionId = 134 BPF_FUNC_csum_level FunctionId = 135 BPF_FUNC_skc_to_tcp6_sock FunctionId = 136 BPF_FUNC_skc_to_tcp_sock FunctionId = 137 BPF_FUNC_skc_to_tcp_timewait_sock FunctionId = 138 BPF_FUNC_skc_to_tcp_request_sock FunctionId = 139 BPF_FUNC_skc_to_udp6_sock FunctionId = 140 BPF_FUNC_get_task_stack FunctionId = 141 BPF_FUNC_load_hdr_opt FunctionId = 142 BPF_FUNC_store_hdr_opt FunctionId = 143 BPF_FUNC_reserve_hdr_opt FunctionId = 144 BPF_FUNC_inode_storage_get FunctionId = 145 BPF_FUNC_inode_storage_delete FunctionId = 146 BPF_FUNC_d_path FunctionId = 147 BPF_FUNC_copy_from_user FunctionId = 148 BPF_FUNC_snprintf_btf FunctionId = 149 BPF_FUNC_seq_printf_btf FunctionId = 150 BPF_FUNC_skb_cgroup_classid FunctionId = 151 BPF_FUNC_redirect_neigh FunctionId = 152 BPF_FUNC_per_cpu_ptr FunctionId = 153 BPF_FUNC_this_cpu_ptr FunctionId = 154 BPF_FUNC_redirect_peer FunctionId = 155 BPF_FUNC_task_storage_get FunctionId = 156 BPF_FUNC_task_storage_delete FunctionId = 157 BPF_FUNC_get_current_task_btf FunctionId = 158 BPF_FUNC_bprm_opts_set FunctionId = 159 BPF_FUNC_ktime_get_coarse_ns FunctionId = 160 BPF_FUNC_ima_inode_hash FunctionId = 161 BPF_FUNC_sock_from_file FunctionId = 162 BPF_FUNC_check_mtu FunctionId = 163 BPF_FUNC_for_each_map_elem FunctionId = 164 BPF_FUNC_snprintf FunctionId = 165 BPF_FUNC_sys_bpf FunctionId = 166 BPF_FUNC_btf_find_by_name_kind FunctionId = 167 BPF_FUNC_sys_close FunctionId = 168 BPF_FUNC_timer_init FunctionId = 169 BPF_FUNC_timer_set_callback FunctionId = 170 BPF_FUNC_timer_start FunctionId = 171 BPF_FUNC_timer_cancel FunctionId = 172 BPF_FUNC_get_func_ip FunctionId = 173 BPF_FUNC_get_attach_cookie FunctionId = 174 BPF_FUNC_task_pt_regs FunctionId = 175 BPF_FUNC_get_branch_snapshot FunctionId = 176 BPF_FUNC_trace_vprintk FunctionId = 177 BPF_FUNC_skc_to_unix_sock FunctionId = 178 BPF_FUNC_kallsyms_lookup_name FunctionId = 179 BPF_FUNC_find_vma FunctionId = 180 BPF_FUNC_loop FunctionId = 181 BPF_FUNC_strncmp FunctionId = 182 BPF_FUNC_get_func_arg FunctionId = 183 BPF_FUNC_get_func_ret FunctionId = 184 BPF_FUNC_get_func_arg_cnt FunctionId = 185 BPF_FUNC_get_retval FunctionId = 186 BPF_FUNC_set_retval FunctionId = 187 BPF_FUNC_xdp_get_buff_len FunctionId = 188 BPF_FUNC_xdp_load_bytes FunctionId = 189 BPF_FUNC_xdp_store_bytes FunctionId = 190 BPF_FUNC_copy_from_user_task FunctionId = 191 BPF_FUNC_skb_set_tstamp FunctionId = 192 BPF_FUNC_ima_file_hash FunctionId = 193 BPF_FUNC_kptr_xchg FunctionId = 194 BPF_FUNC_map_lookup_percpu_elem FunctionId = 195 BPF_FUNC_skc_to_mptcp_sock FunctionId = 196 BPF_FUNC_dynptr_from_mem FunctionId = 197 BPF_FUNC_ringbuf_reserve_dynptr FunctionId = 198 BPF_FUNC_ringbuf_submit_dynptr FunctionId = 199 BPF_FUNC_ringbuf_discard_dynptr FunctionId = 200 BPF_FUNC_dynptr_read FunctionId = 201 BPF_FUNC_dynptr_write FunctionId = 202 BPF_FUNC_dynptr_data FunctionId = 203 BPF_FUNC_tcp_raw_gen_syncookie_ipv4 FunctionId = 204 BPF_FUNC_tcp_raw_gen_syncookie_ipv6 FunctionId = 205 BPF_FUNC_tcp_raw_check_syncookie_ipv4 FunctionId = 206 BPF_FUNC_tcp_raw_check_syncookie_ipv6 FunctionId = 207 BPF_FUNC_ktime_get_tai_ns FunctionId = 208 BPF_FUNC_user_ringbuf_drain FunctionId = 209 BPF_FUNC_cgrp_storage_get FunctionId = 210 BPF_FUNC_cgrp_storage_delete FunctionId = 211 __BPF_FUNC_MAX_ID FunctionId = 212 ) type HdrStartOff uint32 const ( BPF_HDR_START_MAC HdrStartOff = 0 BPF_HDR_START_NET HdrStartOff = 1 ) type LinkType uint32 const ( BPF_LINK_TYPE_UNSPEC LinkType = 0 BPF_LINK_TYPE_RAW_TRACEPOINT LinkType = 1 BPF_LINK_TYPE_TRACING LinkType = 2 BPF_LINK_TYPE_CGROUP LinkType = 3 BPF_LINK_TYPE_ITER LinkType = 4 BPF_LINK_TYPE_NETNS LinkType = 5 BPF_LINK_TYPE_XDP LinkType = 6 BPF_LINK_TYPE_PERF_EVENT LinkType = 7 BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8 BPF_LINK_TYPE_STRUCT_OPS LinkType = 9 BPF_LINK_TYPE_NETFILTER LinkType = 10 BPF_LINK_TYPE_TCX LinkType = 11 BPF_LINK_TYPE_UPROBE_MULTI LinkType = 12 BPF_LINK_TYPE_NETKIT LinkType = 13 BPF_LINK_TYPE_SOCKMAP LinkType = 14 __MAX_BPF_LINK_TYPE LinkType = 15 ) type MapType uint32 const ( BPF_MAP_TYPE_UNSPEC MapType = 0 BPF_MAP_TYPE_HASH MapType = 1 BPF_MAP_TYPE_ARRAY MapType = 2 BPF_MAP_TYPE_PROG_ARRAY MapType = 3 BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4 BPF_MAP_TYPE_PERCPU_HASH MapType = 5 BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6 BPF_MAP_TYPE_STACK_TRACE MapType = 7 BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8 BPF_MAP_TYPE_LRU_HASH MapType = 9 BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10 BPF_MAP_TYPE_LPM_TRIE MapType = 11 BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12 BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13 BPF_MAP_TYPE_DEVMAP MapType = 14 BPF_MAP_TYPE_SOCKMAP MapType = 15 BPF_MAP_TYPE_CPUMAP MapType = 16 BPF_MAP_TYPE_XSKMAP MapType = 17 BPF_MAP_TYPE_SOCKHASH MapType = 18 BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED MapType = 19 BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED MapType = 21 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21 BPF_MAP_TYPE_QUEUE MapType = 22 BPF_MAP_TYPE_STACK MapType = 23 BPF_MAP_TYPE_SK_STORAGE MapType = 24 BPF_MAP_TYPE_DEVMAP_HASH MapType = 25 BPF_MAP_TYPE_STRUCT_OPS MapType = 26 BPF_MAP_TYPE_RINGBUF MapType = 27 BPF_MAP_TYPE_INODE_STORAGE MapType = 28 BPF_MAP_TYPE_TASK_STORAGE MapType = 29 BPF_MAP_TYPE_BLOOM_FILTER MapType = 30 BPF_MAP_TYPE_USER_RINGBUF MapType = 31 BPF_MAP_TYPE_CGRP_STORAGE MapType = 32 BPF_MAP_TYPE_ARENA MapType = 33 __MAX_BPF_MAP_TYPE MapType = 34 ) type NetfilterInetHook uint32 const ( NF_INET_PRE_ROUTING NetfilterInetHook = 0 NF_INET_LOCAL_IN NetfilterInetHook = 1 NF_INET_FORWARD NetfilterInetHook = 2 NF_INET_LOCAL_OUT NetfilterInetHook = 3 NF_INET_POST_ROUTING NetfilterInetHook = 4 NF_INET_NUMHOOKS NetfilterInetHook = 5 NF_INET_INGRESS NetfilterInetHook = 5 ) type ObjType uint32 const ( BPF_TYPE_UNSPEC ObjType = 0 BPF_TYPE_PROG ObjType = 1 BPF_TYPE_MAP ObjType = 2 BPF_TYPE_LINK ObjType = 3 ) type PerfEventType uint32 const ( BPF_PERF_EVENT_UNSPEC PerfEventType = 0 BPF_PERF_EVENT_UPROBE PerfEventType = 1 BPF_PERF_EVENT_URETPROBE PerfEventType = 2 BPF_PERF_EVENT_KPROBE PerfEventType = 3 BPF_PERF_EVENT_KRETPROBE PerfEventType = 4 BPF_PERF_EVENT_TRACEPOINT PerfEventType = 5 BPF_PERF_EVENT_EVENT PerfEventType = 6 ) type ProgType uint32 const ( BPF_PROG_TYPE_UNSPEC ProgType = 0 BPF_PROG_TYPE_SOCKET_FILTER ProgType = 1 BPF_PROG_TYPE_KPROBE ProgType = 2 BPF_PROG_TYPE_SCHED_CLS ProgType = 3 BPF_PROG_TYPE_SCHED_ACT ProgType = 4 BPF_PROG_TYPE_TRACEPOINT ProgType = 5 BPF_PROG_TYPE_XDP ProgType = 6 BPF_PROG_TYPE_PERF_EVENT ProgType = 7 BPF_PROG_TYPE_CGROUP_SKB ProgType = 8 BPF_PROG_TYPE_CGROUP_SOCK ProgType = 9 BPF_PROG_TYPE_LWT_IN ProgType = 10 BPF_PROG_TYPE_LWT_OUT ProgType = 11 BPF_PROG_TYPE_LWT_XMIT ProgType = 12 BPF_PROG_TYPE_SOCK_OPS ProgType = 13 BPF_PROG_TYPE_SK_SKB ProgType = 14 BPF_PROG_TYPE_CGROUP_DEVICE ProgType = 15 BPF_PROG_TYPE_SK_MSG ProgType = 16 BPF_PROG_TYPE_RAW_TRACEPOINT ProgType = 17 BPF_PROG_TYPE_CGROUP_SOCK_ADDR ProgType = 18 BPF_PROG_TYPE_LWT_SEG6LOCAL ProgType = 19 BPF_PROG_TYPE_LIRC_MODE2 ProgType = 20 BPF_PROG_TYPE_SK_REUSEPORT ProgType = 21 BPF_PROG_TYPE_FLOW_DISSECTOR ProgType = 22 BPF_PROG_TYPE_CGROUP_SYSCTL ProgType = 23 BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE ProgType = 24 BPF_PROG_TYPE_CGROUP_SOCKOPT ProgType = 25 BPF_PROG_TYPE_TRACING ProgType = 26 BPF_PROG_TYPE_STRUCT_OPS ProgType = 27 BPF_PROG_TYPE_EXT ProgType = 28 BPF_PROG_TYPE_LSM ProgType = 29 BPF_PROG_TYPE_SK_LOOKUP ProgType = 30 BPF_PROG_TYPE_SYSCALL ProgType = 31 BPF_PROG_TYPE_NETFILTER ProgType = 32 __MAX_BPF_PROG_TYPE ProgType = 33 ) type RetCode uint32 const ( BPF_OK RetCode = 0 BPF_DROP RetCode = 2 BPF_REDIRECT RetCode = 7 BPF_LWT_REROUTE RetCode = 128 BPF_FLOW_DISSECTOR_CONTINUE RetCode = 129 ) type SkAction uint32 const ( SK_DROP SkAction = 0 SK_PASS SkAction = 1 ) type StackBuildIdStatus uint32 const ( BPF_STACK_BUILD_ID_EMPTY StackBuildIdStatus = 0 BPF_STACK_BUILD_ID_VALID StackBuildIdStatus = 1 BPF_STACK_BUILD_ID_IP StackBuildIdStatus = 2 ) type StatsType uint32 const ( BPF_STATS_RUN_TIME StatsType = 0 ) type TcxActionBase int32 const ( TCX_NEXT TcxActionBase = -1 TCX_PASS TcxActionBase = 0 TCX_DROP TcxActionBase = 2 TCX_REDIRECT TcxActionBase = 7 ) type XdpAction uint32 const ( XDP_ABORTED XdpAction = 0 XDP_DROP XdpAction = 1 XDP_PASS XdpAction = 2 XDP_TX XdpAction = 3 XDP_REDIRECT XdpAction = 4 ) type NetfilterProtocolFamily uint32 const ( NFPROTO_UNSPEC NetfilterProtocolFamily = 0 NFPROTO_INET NetfilterProtocolFamily = 1 NFPROTO_IPV4 NetfilterProtocolFamily = 2 NFPROTO_ARP NetfilterProtocolFamily = 3 NFPROTO_NETDEV NetfilterProtocolFamily = 5 NFPROTO_BRIDGE NetfilterProtocolFamily = 7 NFPROTO_IPV6 NetfilterProtocolFamily = 10 NFPROTO_NUMPROTO NetfilterProtocolFamily = 11 ) type BtfInfo struct { _ structs.HostLayout Btf TypedPointer[uint8] BtfSize uint32 Id BTFID Name TypedPointer[uint8] NameLen uint32 KernelBtf uint32 } type FuncInfo struct { _ structs.HostLayout InsnOff uint32 TypeId uint32 } type LineInfo struct { _ structs.HostLayout InsnOff uint32 FileNameOff uint32 LineOff uint32 LineCol uint32 } type LinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Extra [48]uint8 } type MapInfo struct { _ structs.HostLayout Type uint32 Id MapID KeySize uint32 ValueSize uint32 MaxEntries uint32 MapFlags uint32 Name ObjName Ifindex uint32 BtfVmlinuxValueTypeId TypeID NetnsDev uint64 NetnsIno uint64 BtfId uint32 BtfKeyTypeId TypeID BtfValueTypeId TypeID BtfVmlinuxId uint32 MapExtra uint64 Hash uint64 HashSize uint32 _ [4]byte } type ProgInfo struct { _ structs.HostLayout Type uint32 Id uint32 Tag [8]uint8 JitedProgLen uint32 XlatedProgLen uint32 JitedProgInsns TypedPointer[uint8] XlatedProgInsns TypedPointer[uint8] LoadTime uint64 CreatedByUid uint32 NrMapIds uint32 MapIds TypedPointer[MapID] Name ObjName Ifindex uint32 _ [4]byte /* unsupported bitfield */ NetnsDev uint64 NetnsIno uint64 NrJitedKsyms uint32 NrJitedFuncLens uint32 JitedKsyms TypedPointer[uint64] JitedFuncLens TypedPointer[uint32] BtfId BTFID FuncInfoRecSize uint32 FuncInfo TypedPointer[uint8] NrFuncInfo uint32 NrLineInfo uint32 LineInfo TypedPointer[uint8] JitedLineInfo TypedPointer[uint64] NrJitedLineInfo uint32 LineInfoRecSize uint32 JitedLineInfoRecSize uint32 NrProgTags uint32 ProgTags uint64 RunTimeNs uint64 RunCnt uint64 RecursionMisses uint64 VerifiedInsns uint32 AttachBtfObjId BTFID AttachBtfId TypeID _ [4]byte } type SkLookup struct { _ structs.HostLayout Cookie uint64 Family uint32 Protocol uint32 RemoteIp4 [4]uint8 RemoteIp6 [16]uint8 RemotePort uint16 _ [2]byte LocalIp4 [4]uint8 LocalIp6 [16]uint8 LocalPort uint32 IngressIfindex uint32 _ [4]byte } type XdpMd struct { _ structs.HostLayout Data uint32 DataEnd uint32 DataMeta uint32 IngressIfindex uint32 RxQueueIndex uint32 EgressIfindex uint32 } type BtfGetFdByIdAttr struct { _ structs.HostLayout Id uint32 } func BtfGetFdById(attr *BtfGetFdByIdAttr) (*FD, error) { fd, err := BPF(BPF_BTF_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type BtfGetNextIdAttr struct { _ structs.HostLayout Id BTFID NextId BTFID } func BtfGetNextId(attr *BtfGetNextIdAttr) error { _, err := BPF(BPF_BTF_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type BtfLoadAttr struct { _ structs.HostLayout Btf TypedPointer[uint8] BtfLogBuf TypedPointer[uint8] BtfSize uint32 BtfLogSize uint32 BtfLogLevel uint32 BtfLogTrueSize uint32 BtfFlags uint32 BtfTokenFd int32 } func BtfLoad(attr *BtfLoadAttr) (*FD, error) { fd, err := BPF(BPF_BTF_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type EnableStatsAttr struct { _ structs.HostLayout Type uint32 } func EnableStats(attr *EnableStatsAttr) (*FD, error) { fd, err := BPF(BPF_ENABLE_STATS, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type IterCreateAttr struct { _ structs.HostLayout LinkFd uint32 Flags uint32 } func IterCreate(attr *IterCreateAttr) (*FD, error) { fd, err := BPF(BPF_ITER_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 TargetBtfId TypeID _ [44]byte } func LinkCreate(attr *LinkCreateAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateIterAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 IterInfo Pointer IterInfoLen uint32 _ [36]byte } func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateKprobeMultiAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 KprobeMultiFlags uint32 Count uint32 Syms StringSlicePointer Addrs TypedPointer[uintptr] Cookies TypedPointer[uint64] _ [16]byte } func LinkCreateKprobeMulti(attr *LinkCreateKprobeMultiAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateNetfilterAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 Pf NetfilterProtocolFamily Hooknum NetfilterInetHook Priority int32 NetfilterFlags uint32 _ [32]byte } func LinkCreateNetfilter(attr *LinkCreateNetfilterAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateNetkitAttr struct { _ structs.HostLayout ProgFd uint32 TargetIfindex uint32 AttachType AttachType Flags uint32 RelativeFdOrId uint32 _ [4]byte ExpectedRevision uint64 _ [32]byte } func LinkCreateNetkit(attr *LinkCreateNetkitAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreatePerfEventAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 BpfCookie uint64 _ [40]byte } func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateTcxAttr struct { _ structs.HostLayout ProgFd uint32 TargetIfindex uint32 AttachType AttachType Flags uint32 RelativeFdOrId uint32 _ [4]byte ExpectedRevision uint64 _ [32]byte } func LinkCreateTcx(attr *LinkCreateTcxAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateTracingAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 TargetBtfId BTFID _ [4]byte Cookie uint64 _ [32]byte } func LinkCreateTracing(attr *LinkCreateTracingAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkCreateUprobeMultiAttr struct { _ structs.HostLayout ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 Path StringPointer Offsets TypedPointer[uint64] RefCtrOffsets TypedPointer[uint64] Cookies TypedPointer[uint64] Count uint32 UprobeMultiFlags uint32 Pid uint32 _ [4]byte } func LinkCreateUprobeMulti(attr *LinkCreateUprobeMultiAttr) (*FD, error) { fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkDetachAttr struct { _ structs.HostLayout LinkFd uint32 } func LinkDetach(attr *LinkDetachAttr) error { _, err := BPF(BPF_LINK_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type LinkGetFdByIdAttr struct { _ structs.HostLayout Id LinkID } func LinkGetFdById(attr *LinkGetFdByIdAttr) (*FD, error) { fd, err := BPF(BPF_LINK_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type LinkGetNextIdAttr struct { _ structs.HostLayout Id LinkID NextId LinkID } func LinkGetNextId(attr *LinkGetNextIdAttr) error { _, err := BPF(BPF_LINK_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type LinkUpdateAttr struct { _ structs.HostLayout LinkFd uint32 NewProgFd uint32 Flags uint32 OldProgFd uint32 } func LinkUpdate(attr *LinkUpdateAttr) error { _, err := BPF(BPF_LINK_UPDATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapCreateAttr struct { _ structs.HostLayout MapType MapType KeySize uint32 ValueSize uint32 MaxEntries uint32 MapFlags uint32 InnerMapFd uint32 NumaNode uint32 MapName ObjName MapIfindex uint32 BtfFd uint32 BtfKeyTypeId TypeID BtfValueTypeId TypeID BtfVmlinuxValueTypeId TypeID MapExtra uint64 ValueTypeBtfObjFd int32 MapTokenFd int32 ExclProgHash uint64 ExclProgHashSize uint32 _ [4]byte } func MapCreate(attr *MapCreateAttr) (*FD, error) { fd, err := BPF(BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type MapDeleteBatchAttr struct { _ structs.HostLayout InBatch Pointer OutBatch Pointer Keys Pointer Values Pointer Count uint32 MapFd uint32 ElemFlags uint64 Flags uint64 } func MapDeleteBatch(attr *MapDeleteBatchAttr) error { _, err := BPF(BPF_MAP_DELETE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapDeleteElemAttr struct { _ structs.HostLayout MapFd uint32 _ [4]byte Key Pointer Value Pointer Flags uint64 } func MapDeleteElem(attr *MapDeleteElemAttr) error { _, err := BPF(BPF_MAP_DELETE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapFreezeAttr struct { _ structs.HostLayout MapFd uint32 } func MapFreeze(attr *MapFreezeAttr) error { _, err := BPF(BPF_MAP_FREEZE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapGetFdByIdAttr struct { _ structs.HostLayout Id uint32 } func MapGetFdById(attr *MapGetFdByIdAttr) (*FD, error) { fd, err := BPF(BPF_MAP_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type MapGetNextIdAttr struct { _ structs.HostLayout Id uint32 NextId uint32 } func MapGetNextId(attr *MapGetNextIdAttr) error { _, err := BPF(BPF_MAP_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapGetNextKeyAttr struct { _ structs.HostLayout MapFd uint32 _ [4]byte Key Pointer NextKey Pointer } func MapGetNextKey(attr *MapGetNextKeyAttr) error { _, err := BPF(BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapLookupAndDeleteBatchAttr struct { _ structs.HostLayout InBatch Pointer OutBatch Pointer Keys Pointer Values Pointer Count uint32 MapFd uint32 ElemFlags uint64 Flags uint64 } func MapLookupAndDeleteBatch(attr *MapLookupAndDeleteBatchAttr) error { _, err := BPF(BPF_MAP_LOOKUP_AND_DELETE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapLookupAndDeleteElemAttr struct { _ structs.HostLayout MapFd uint32 _ [4]byte Key Pointer Value Pointer Flags uint64 } func MapLookupAndDeleteElem(attr *MapLookupAndDeleteElemAttr) error { _, err := BPF(BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapLookupBatchAttr struct { _ structs.HostLayout InBatch Pointer OutBatch Pointer Keys Pointer Values Pointer Count uint32 MapFd uint32 ElemFlags uint64 Flags uint64 } func MapLookupBatch(attr *MapLookupBatchAttr) error { _, err := BPF(BPF_MAP_LOOKUP_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapLookupElemAttr struct { _ structs.HostLayout MapFd uint32 _ [4]byte Key Pointer Value Pointer Flags uint64 } func MapLookupElem(attr *MapLookupElemAttr) error { _, err := BPF(BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapUpdateBatchAttr struct { _ structs.HostLayout InBatch Pointer OutBatch Pointer Keys Pointer Values Pointer Count uint32 MapFd uint32 ElemFlags uint64 Flags uint64 } func MapUpdateBatch(attr *MapUpdateBatchAttr) error { _, err := BPF(BPF_MAP_UPDATE_BATCH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type MapUpdateElemAttr struct { _ structs.HostLayout MapFd uint32 _ [4]byte Key Pointer Value Pointer Flags uint64 } func MapUpdateElem(attr *MapUpdateElemAttr) error { _, err := BPF(BPF_MAP_UPDATE_ELEM, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ObjGetAttr struct { _ structs.HostLayout Pathname StringPointer BpfFd uint32 FileFlags uint32 PathFd int32 _ [4]byte } func ObjGet(attr *ObjGetAttr) (*FD, error) { fd, err := BPF(BPF_OBJ_GET, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type ObjGetInfoByFdAttr struct { _ structs.HostLayout BpfFd uint32 InfoLen uint32 Info Pointer } func ObjGetInfoByFd(attr *ObjGetInfoByFdAttr) error { _, err := BPF(BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ObjPinAttr struct { _ structs.HostLayout Pathname StringPointer BpfFd uint32 FileFlags uint32 PathFd int32 _ [4]byte } func ObjPin(attr *ObjPinAttr) error { _, err := BPF(BPF_OBJ_PIN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgAttachAttr struct { _ structs.HostLayout TargetFdOrIfindex uint32 AttachBpfFd uint32 AttachType uint32 AttachFlags uint32 ReplaceBpfFd uint32 RelativeFdOrId uint32 ExpectedRevision uint64 } func ProgAttach(attr *ProgAttachAttr) error { _, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgBindMapAttr struct { _ structs.HostLayout ProgFd uint32 MapFd uint32 Flags uint32 } func ProgBindMap(attr *ProgBindMapAttr) error { _, err := BPF(BPF_PROG_BIND_MAP, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgDetachAttr struct { _ structs.HostLayout TargetFdOrIfindex uint32 AttachBpfFd uint32 AttachType uint32 AttachFlags uint32 _ [4]byte RelativeFdOrId uint32 ExpectedRevision uint64 } func ProgDetach(attr *ProgDetachAttr) error { _, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgGetFdByIdAttr struct { _ structs.HostLayout Id uint32 } func ProgGetFdById(attr *ProgGetFdByIdAttr) (*FD, error) { fd, err := BPF(BPF_PROG_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type ProgGetNextIdAttr struct { _ structs.HostLayout Id uint32 NextId uint32 } func ProgGetNextId(attr *ProgGetNextIdAttr) error { _, err := BPF(BPF_PROG_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgLoadAttr struct { _ structs.HostLayout ProgType ProgType InsnCnt uint32 Insns TypedPointer[uint8] License StringPointer LogLevel LogLevel LogSize uint32 LogBuf TypedPointer[uint8] KernVersion uint32 ProgFlags uint32 ProgName ObjName ProgIfindex uint32 ExpectedAttachType AttachType ProgBtfFd uint32 FuncInfoRecSize uint32 FuncInfo TypedPointer[uint8] FuncInfoCnt uint32 LineInfoRecSize uint32 LineInfo TypedPointer[uint8] LineInfoCnt uint32 AttachBtfId TypeID AttachBtfObjFd uint32 CoreReloCnt uint32 FdArray TypedPointer[int32] CoreRelos TypedPointer[uint8] CoreReloRecSize uint32 LogTrueSize uint32 ProgTokenFd int32 FdArrayCnt uint32 Signature uint64 SignatureSize uint32 KeyringId int32 } func ProgLoad(attr *ProgLoadAttr) (*FD, error) { fd, err := BPF(BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type ProgQueryAttr struct { _ structs.HostLayout TargetFdOrIfindex uint32 AttachType AttachType QueryFlags uint32 AttachFlags uint32 ProgIds TypedPointer[ProgramID] Count uint32 _ [4]byte ProgAttachFlags TypedPointer[ProgramID] LinkIds TypedPointer[LinkID] LinkAttachFlags TypedPointer[LinkID] Revision uint64 } func ProgQuery(attr *ProgQueryAttr) error { _, err := BPF(BPF_PROG_QUERY, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type ProgRunAttr struct { _ structs.HostLayout ProgFd uint32 Retval uint32 DataSizeIn uint32 DataSizeOut uint32 DataIn TypedPointer[uint8] DataOut TypedPointer[uint8] Repeat uint32 Duration uint32 CtxSizeIn uint32 CtxSizeOut uint32 CtxIn TypedPointer[uint8] CtxOut TypedPointer[uint8] Flags uint32 Cpu uint32 BatchSize uint32 _ [4]byte } func ProgRun(attr *ProgRunAttr) error { _, err := BPF(BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) return err } type RawTracepointOpenAttr struct { _ structs.HostLayout Name StringPointer ProgFd uint32 _ [4]byte Cookie uint64 } func RawTracepointOpen(attr *RawTracepointOpenAttr) (*FD, error) { fd, err := BPF(BPF_RAW_TRACEPOINT_OPEN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) if err != nil { return nil, err } return NewFD(int(fd)) } type CgroupLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte CgroupId uint64 AttachType AttachType _ [36]byte } type EventLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte PerfEventType PerfEventType _ [4]byte Config uint64 EventType uint32 _ [4]byte Cookie uint64 _ [16]byte } type IterLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte TargetName TypedPointer[uint8] TargetNameLen uint32 } type KprobeLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte PerfEventType PerfEventType _ [4]byte FuncName TypedPointer[uint8] NameLen uint32 Offset uint32 Addr uint64 Missed uint64 Cookie uint64 } type KprobeMultiLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Addrs TypedPointer[uint64] Count uint32 Flags uint32 Missed uint64 Cookies TypedPointer[uint64] _ [16]byte } type NetNsLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte NetnsIno uint32 AttachType AttachType _ [40]byte } type NetfilterLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Pf NetfilterProtocolFamily Hooknum NetfilterInetHook Priority int32 Flags uint32 _ [32]byte } type NetkitLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Ifindex uint32 AttachType AttachType _ [40]byte } type PerfEventLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte PerfEventType PerfEventType } type RawTracepointLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte TpName TypedPointer[uint8] TpNameLen uint32 _ [4]byte Cookie uint64 _ [24]byte } type TcxLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Ifindex uint32 AttachType AttachType _ [40]byte } type TracepointLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte PerfEventType PerfEventType _ [4]byte TpName TypedPointer[uint8] NameLen uint32 _ [4]byte Cookie uint64 _ [16]byte } type TracingLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte AttachType AttachType TargetObjId uint32 TargetBtfId TypeID _ [4]byte Cookie uint64 _ [24]byte } type UprobeLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte PerfEventType PerfEventType _ [4]byte FileName TypedPointer[uint8] NameLen uint32 Offset uint32 Cookie uint64 RefCtrOffset uint64 _ [8]byte } type UprobeMultiLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Path TypedPointer[uint8] Offsets TypedPointer[uint64] RefCtrOffsets TypedPointer[uint64] Cookies TypedPointer[uint64] PathSize uint32 Count uint32 Flags uint32 Pid uint32 } type XDPLinkInfo struct { _ structs.HostLayout Type LinkType Id LinkID ProgId uint32 _ [4]byte Ifindex uint32 _ [44]byte } ================================================ FILE: internal/sysenc/buffer.go ================================================ package sysenc import ( "unsafe" "github.com/cilium/ebpf/internal/sys" ) type Buffer struct { ptr unsafe.Pointer // Size of the buffer. syscallPointerOnly if created from UnsafeBuffer or when using // zero-copy unmarshaling. size int } const syscallPointerOnly = -1 func newBuffer(buf []byte) Buffer { if len(buf) == 0 { return Buffer{} } return Buffer{unsafe.Pointer(&buf[0]), len(buf)} } // UnsafeBuffer constructs a Buffer for zero-copy unmarshaling. // // [Pointer] is the only valid method to call on such a Buffer. // Use [SyscallBuffer] instead if possible. func UnsafeBuffer(ptr unsafe.Pointer) Buffer { return Buffer{ptr, syscallPointerOnly} } // SyscallOutput prepares a Buffer for a syscall to write into. // // size is the length of the desired buffer in bytes. // The buffer may point at the underlying memory of dst, in which case [Unmarshal] // becomes a no-op. // // The contents of the buffer are undefined and may be non-zero. func SyscallOutput(dst any, size int) Buffer { if dstBuf := unsafeBackingMemory(dst); len(dstBuf) == size { buf := newBuffer(dstBuf) buf.size = syscallPointerOnly return buf } return newBuffer(make([]byte, size)) } // CopyTo copies the buffer into dst. // // Returns the number of copied bytes. func (b Buffer) CopyTo(dst []byte) int { return copy(dst, b.Bytes()) } // AppendTo appends the buffer onto dst. func (b Buffer) AppendTo(dst []byte) []byte { return append(dst, b.Bytes()...) } // Pointer returns the location where a syscall should write. func (b Buffer) Pointer() sys.Pointer { // NB: This deliberately ignores b.length to support zero-copy // marshaling / unmarshaling using unsafe.Pointer. return sys.UnsafePointer(b.ptr) } // Unmarshal the buffer into the provided value. func (b Buffer) Unmarshal(data any) error { if b.size == syscallPointerOnly { return nil } return Unmarshal(data, b.Bytes()) } // Bytes returns the buffer as a byte slice. Returns nil if the Buffer was // created using UnsafeBuffer or by zero-copy unmarshaling. func (b Buffer) Bytes() []byte { if b.size == syscallPointerOnly { return nil } return unsafe.Slice((*byte)(b.ptr), b.size) } ================================================ FILE: internal/sysenc/buffer_test.go ================================================ package sysenc_test import ( "testing" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/sysenc" ) func TestZeroBuffer(t *testing.T) { var zero sysenc.Buffer qt.Assert(t, qt.Equals(zero.CopyTo(make([]byte, 1)), 0)) qt.Assert(t, qt.Equals(zero.Pointer(), sys.Pointer{})) qt.Assert(t, qt.IsNotNil(zero.Unmarshal(new(uint16)))) } func TestUnsafeBuffer(t *testing.T) { ptr := unsafe.Pointer(new(uint16)) buf := sysenc.UnsafeBuffer(ptr) qt.Assert(t, qt.Equals(buf.CopyTo(make([]byte, 1)), 0)) qt.Assert(t, qt.Equals(buf.Pointer(), sys.UnsafePointer(ptr))) qt.Assert(t, qt.IsNil(buf.Unmarshal(new(uint16)))) } ================================================ FILE: internal/sysenc/doc.go ================================================ // Package sysenc provides efficient conversion of Go values to system // call interfaces. package sysenc ================================================ FILE: internal/sysenc/layout.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found at https://go.dev/LICENSE. package sysenc import ( "reflect" "sync" ) var hasUnexportedFieldsCache sync.Map // map[reflect.Type]bool func hasUnexportedFields(typ reflect.Type) bool { switch typ.Kind() { case reflect.Slice, reflect.Array, reflect.Pointer: return hasUnexportedFields(typ.Elem()) case reflect.Struct: if unexported, ok := hasUnexportedFieldsCache.Load(typ); ok { return unexported.(bool) } unexported := false for i, n := 0, typ.NumField(); i < n; i++ { field := typ.Field(i) // Package binary allows _ fields but always writes zeroes into them. if (!field.IsExported() && field.Name != "_") || hasUnexportedFields(field.Type) { unexported = true break } } hasUnexportedFieldsCache.Store(typ, unexported) return unexported default: // NB: It's not clear what this means for Chan and so on. return false } } ================================================ FILE: internal/sysenc/layout_test.go ================================================ package sysenc import ( "fmt" "reflect" "testing" "github.com/go-quicktest/qt" ) func TestHasUnexportedFields(t *testing.T) { for _, test := range []struct { value any result bool }{ {struct{ A any }{}, false}, {(*struct{ A any })(nil), false}, {([]struct{ A any })(nil), false}, {[1]struct{ A any }{}, false}, {struct{ _ any }{}, false}, {struct{ _ struct{ a any } }{}, true}, {(*struct{ _ any })(nil), false}, {([]struct{ _ any })(nil), false}, {[1]struct{ _ any }{}, false}, {struct{ a any }{}, true}, {(*struct{ a any })(nil), true}, {([]struct{ a any })(nil), true}, {[1]struct{ a any }{}, true}, {(*struct{ A []struct{ a any } })(nil), true}, {(*struct{ A [1]struct{ a any } })(nil), true}, } { t.Run(fmt.Sprintf("%T", test.value), func(t *testing.T) { have := hasUnexportedFields(reflect.TypeOf(test.value)) qt.Assert(t, qt.Equals(have, test.result)) }) } } ================================================ FILE: internal/sysenc/marshal.go ================================================ package sysenc import ( "encoding" "encoding/binary" "errors" "fmt" "reflect" "slices" "unsafe" "github.com/cilium/ebpf/internal" ) // Marshal turns data into a byte slice using the system's native endianness. // // If possible, avoids allocations by directly using the backing memory // of data. This means that the variable must not be modified for the lifetime // of the returned [Buffer]. // // Returns an error if the data can't be turned into a byte slice according to // the behaviour of [binary.Write]. func Marshal(data any, size int) (Buffer, error) { if data == nil { return Buffer{}, errors.New("can't marshal a nil value") } var buf []byte var err error switch value := data.(type) { case encoding.BinaryMarshaler: buf, err = value.MarshalBinary() case string: buf = unsafe.Slice(unsafe.StringData(value), len(value)) case []byte: buf = value case int16: buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), uint16(value)) case uint16: buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), value) case int32: buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), uint32(value)) case uint32: buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), value) case int64: buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), uint64(value)) case uint64: buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), value) default: if buf := unsafeBackingMemory(data); len(buf) == size { return newBuffer(buf), nil } buf, err = binary.Append(nil, internal.NativeEndian, value) } if err != nil { return Buffer{}, err } if len(buf) != size { return Buffer{}, fmt.Errorf("%T doesn't marshal to %d bytes", data, size) } return newBuffer(buf), nil } // Unmarshal a byte slice in the system's native endianness into data. // // Returns an error if buf can't be unmarshalled according to the behaviour // of [binary.Decode]. func Unmarshal(data interface{}, buf []byte) error { switch value := data.(type) { case encoding.BinaryUnmarshaler: return value.UnmarshalBinary(buf) case *string: *value = string(buf) return nil case *[]byte: // Backwards compat: unmarshaling into a slice replaces the whole slice. *value = slices.Clone(buf) return nil default: if dataBuf := unsafeBackingMemory(data); len(dataBuf) == len(buf) { copy(dataBuf, buf) return nil } n, err := binary.Decode(buf, internal.NativeEndian, value) if err != nil { return err } if n != len(buf) { return fmt.Errorf("unmarshaling %T doesn't consume all data", data) } return nil } } // unsafeBackingMemory returns the backing memory of data if it can be used // instead of calling into package binary. // // Returns nil if the value is not a pointer or a slice, or if it contains // padding or unexported fields. func unsafeBackingMemory(data any) []byte { if data == nil { return nil } value := reflect.ValueOf(data) var valueSize int switch value.Kind() { case reflect.Pointer: if value.IsNil() { return nil } if elemType := value.Type().Elem(); elemType.Kind() != reflect.Slice { valueSize = int(elemType.Size()) break } // We're dealing with a pointer to a slice. Dereference and // handle it like a regular slice. value = value.Elem() fallthrough case reflect.Slice: valueSize = int(value.Type().Elem().Size()) * value.Len() default: // Prevent Value.UnsafePointer from panicking. return nil } // Some nil pointer types currently crash binary.Size. Call it after our own // code so that the panic isn't reachable. // See https://github.com/golang/go/issues/60892 if size := binary.Size(data); size == -1 || size != valueSize { // The type contains padding or unsupported types. return nil } if hasUnexportedFields(reflect.TypeOf(data)) { return nil } // Reinterpret the pointer as a byte slice. This violates the unsafe.Pointer // rules because it's very unlikely that the source data has "an equivalent // memory layout". However, we can make it safe-ish because of the // following reasons: // - There is no alignment mismatch since we cast to a type with an // alignment of 1. // - There are no pointers in the source type so we don't upset the GC. // - The length is verified at runtime. return unsafe.Slice((*byte)(value.UnsafePointer()), valueSize) } ================================================ FILE: internal/sysenc/marshal_test.go ================================================ package sysenc import ( "bytes" "encoding/binary" "fmt" "math" "reflect" "testing" "github.com/go-quicktest/qt" "github.com/google/go-cmp/cmp/cmpopts" "github.com/cilium/ebpf/internal" ) type testcase struct { new func() any zeroAllocs bool // marshaling does not allocate } type struc struct { A uint64 B uint32 } type explicitPad struct { _ uint32 } func testcases() []testcase { return []testcase{ {func() any { return new([1]uint64) }, true}, {func() any { return new(int16) }, true}, {func() any { return new(uint16) }, true}, {func() any { return new(int32) }, true}, {func() any { return new(uint32) }, true}, {func() any { return new(int64) }, true}, {func() any { return new(uint64) }, true}, {func() any { return make([]byte, 9) }, true}, {func() any { return new(explicitPad) }, true}, {func() any { return make([]explicitPad, 0) }, false}, {func() any { return make([]explicitPad, 1) }, false}, {func() any { return make([]explicitPad, 2) }, false}, {func() any { return new(struc) }, false}, {func() any { return make([]struc, 0) }, false}, {func() any { return make([]struc, 1) }, false}, {func() any { return make([]struc, 2) }, false}, {func() any { return int16(math.MaxInt16) }, false}, {func() any { return uint16(math.MaxUint16) }, false}, {func() any { return int32(math.MaxInt32) }, false}, {func() any { return uint32(math.MaxUint32) }, false}, {func() any { return int64(math.MaxInt64) }, false}, {func() any { return uint64(math.MaxUint64) }, false}, {func() any { return struc{math.MaxUint64, math.MaxUint32} }, false}, } } func TestMarshal(t *testing.T) { for _, test := range testcases() { value := test.new() t.Run(fmt.Sprintf("%T", value), func(t *testing.T) { want, err := binary.Append(nil, internal.NativeEndian, value) qt.Assert(t, qt.IsNil(err)) have := make([]byte, len(want)) buf, err := Marshal(value, binary.Size(value)) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(buf.CopyTo(have), len(want))) qt.Assert(t, qt.CmpEquals(have, want, cmpopts.EquateEmpty())) }) } } func TestMarshalAllocations(t *testing.T) { allocationsPerMarshal := func(t *testing.T, data any) float64 { size := binary.Size(data) return testing.AllocsPerRun(5, func() { _, err := Marshal(data, size) if err != nil { t.Fatal(err) } }) } for _, test := range testcases() { if !test.zeroAllocs { continue } value := test.new() t.Run(fmt.Sprintf("%T", value), func(t *testing.T) { qt.Assert(t, qt.Equals(allocationsPerMarshal(t, value), 0)) }) } } func TestUnmarshal(t *testing.T) { for _, test := range testcases() { value := test.new() if !canUnmarshalInto(value) { continue } t.Run(fmt.Sprintf("%T", value), func(t *testing.T) { want := test.new() buf := randomiseValue(t, want) qt.Assert(t, qt.IsNil(Unmarshal(value, buf))) qt.Assert(t, qt.DeepEquals(value, want)) }) } } func TestUnmarshalAllocations(t *testing.T) { allocationsPerUnmarshal := func(t *testing.T, data any, buf []byte) float64 { return testing.AllocsPerRun(5, func() { err := Unmarshal(data, buf) if err != nil { t.Fatal(err) } }) } for _, test := range testcases() { value := test.new() if !canUnmarshalInto(value) { continue } t.Run(fmt.Sprintf("%T", value), func(t *testing.T) { buf := make([]byte, binary.Size(value)) qt.Assert(t, qt.Equals(allocationsPerUnmarshal(t, value, buf), 0)) }) } } func TestUnsafeBackingMemory(t *testing.T) { marshalNative := func(t *testing.T, data any) []byte { t.Helper() buf, err := binary.Append(nil, internal.NativeEndian, data) qt.Assert(t, qt.IsNil(err)) return buf } for _, test := range []struct { name string value any }{ { "slice", []uint32{1, 2}, }, { "pointer to slice", &[]uint32{2}, }, { "pointer to array", &[2]uint64{}, }, { "pointer to int64", new(int64), }, { "pointer to struct", &struct { A, B uint16 C uint32 }{}, }, { "struct with explicit padding", &struct{ _ uint64 }{}, }, } { t.Run("valid: "+test.name, func(t *testing.T) { want := marshalNative(t, test.value) have := unsafeBackingMemory(test.value) qt.Assert(t, qt.DeepEquals(have, want)) }) } for _, test := range []struct { name string value any }{ { "nil", nil, }, { "nil slice", ([]byte)(nil), }, { "nil pointer", (*uint64)(nil), }, { "nil pointer to slice", (*[]uint32)(nil), }, { "nil pointer to array", (*[2]uint64)(nil), }, { "unexported field", &struct{ a uint64 }{}, }, { "struct containing pointer", &struct{ A *uint64 }{}, }, { "struct with trailing padding", &struc{}, }, { "struct with interspersed padding", &struct { B uint32 A uint64 }{}, }, { "padding between slice entries", &[]struc{{}}, }, { "padding between array entries", &[2]struc{}, }, } { t.Run("invalid: "+test.name, func(t *testing.T) { qt.Assert(t, qt.IsNil(unsafeBackingMemory(test.value))) }) } } func BenchmarkMarshal(b *testing.B) { for _, test := range testcases() { value := test.new() b.Run(fmt.Sprintf("%T", value), func(b *testing.B) { size := binary.Size(value) b.ReportAllocs() for b.Loop() { _, _ = Marshal(value, size) } }) } } func BenchmarkUnmarshal(b *testing.B) { for _, test := range testcases() { value := test.new() if !canUnmarshalInto(value) { continue } b.Run(fmt.Sprintf("%T", value), func(b *testing.B) { size := binary.Size(value) buf := make([]byte, size) b.ReportAllocs() for b.Loop() { _ = Unmarshal(value, buf) } }) } } func randomiseValue(tb testing.TB, value any) []byte { tb.Helper() size := binary.Size(value) if size == -1 { tb.Fatalf("Can't unmarshal into %T", value) } buf := make([]byte, size) for i := range buf { buf[i] = byte(i) } err := binary.Read(bytes.NewReader(buf), internal.NativeEndian, value) qt.Assert(tb, qt.IsNil(err)) return buf } func canUnmarshalInto(data any) bool { kind := reflect.TypeOf(data).Kind() return kind == reflect.Slice || kind == reflect.Pointer } ================================================ FILE: internal/testutils/bpffs_other.go ================================================ //go:build !windows package testutils import ( "os" "testing" ) // TempBPFFS creates a temporary directory on a BPF FS. // // The directory is automatically cleaned up at the end of the test run. func TempBPFFS(tb testing.TB) string { tb.Helper() tmp, err := os.MkdirTemp("/sys/fs/bpf", "ebpf-test") if err != nil { tb.Fatal("Create temporary directory on BPFFS:", err) } tb.Cleanup(func() { os.RemoveAll(tmp) }) return tmp } ================================================ FILE: internal/testutils/bpffs_windows.go ================================================ package testutils import ( "errors" "math/rand" "path/filepath" "strconv" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/efw" ) // TempBPFFS creates a random prefix to use when pinning on Windows. func TempBPFFS(tb testing.TB) string { tb.Helper() path := filepath.Join("ebpf-go-test", strconv.Itoa(rand.Int())) path, err := efw.EbpfCanonicalizePinPath(path) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { tb.Helper() cursor := path for { next, _, err := efw.EbpfGetNextPinnedObjectPath(cursor, efw.EBPF_OBJECT_UNKNOWN) if errors.Is(err, efw.EBPF_NO_MORE_KEYS) { break } qt.Assert(tb, qt.IsNil(err)) if !strings.HasPrefix(next, path) { break } if err := efw.EbpfObjectUnpin(next); err != nil { tb.Errorf("Failed to unpin %s: %s", next, err) } cursor = next } }) return path } ================================================ FILE: internal/testutils/bpffs_windows_test.go ================================================ package testutils_test import ( "bytes" "os" "path/filepath" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) func TestTempBPFFS(t *testing.T) { var progPath, mapPath string t.Run("pin", func(t *testing.T) { tmp := testutils.TempBPFFS(t) progPath = filepath.Join(tmp, "prog") mapPath = filepath.Join(tmp, "map") var buffer bytes.Buffer insns := asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), } err := insns.Marshal(&buffer, internal.NativeEndian) qt.Assert(t, qt.IsNil(err)) progFd, err := sys.ProgLoad(&sys.ProgLoadAttr{ ProgType: 999, // SAMPLE License: sys.NewStringPointer(""), InsnCnt: uint32(buffer.Len() / asm.InstructionSize), Insns: sys.SlicePointer(buffer.Bytes()), }) qt.Assert(t, qt.IsNil(err)) defer progFd.Close() err = sys.ObjPin(&sys.ObjPinAttr{ BpfFd: progFd.Uint(), Pathname: sys.NewStringPointer(progPath), }) qt.Assert(t, qt.IsNil(err)) mapFd, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: 2, // ARRAY KeySize: 4, ValueSize: 4, MaxEntries: 1, }) qt.Assert(t, qt.IsNil(err)) defer mapFd.Close() err = sys.ObjPin(&sys.ObjPinAttr{ BpfFd: progFd.Uint(), Pathname: sys.NewStringPointer(mapPath), }) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(mapFd.Close())) }) _, err := sys.ObjGet(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(progPath), }) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) _, err = sys.ObjGet(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(mapPath), }) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) } ================================================ FILE: internal/testutils/cap.go ================================================ package testutils import ( "runtime" "testing" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/unix" ) type Capability int // Mirrors of constants from x/sys/unix const ( CAP_NET_ADMIN Capability = 12 CAP_SYS_ADMIN Capability = 21 CAP_SYS_RESOURCE Capability = 24 CAP_PERFMON Capability = 38 CAP_BPF Capability = 39 ) // WithCapabilities runs `f` with only the given capabilities // in the effective set. This allows us to assert that certain operations // only require specific capabilities. // // The code in `f` and any code called by `f` must NOT call [runtime.LockOSThread], // as this could leave the current goroutine permanently pinned to an OS thread. // It must also not create any goroutines of its own, as that will result in a new // OS thread being created that may or may not inherit the new capabilities of its // parent, and will later be released into the schedulable pool of threads available // for goroutine scheduling. // // Warning: on non-linux platforms, this function calls through to `f` without // side effects. func WithCapabilities(tb testing.TB, caps []Capability, f func()) { tb.Helper() if !platform.IsLinux { f() return } runtime.LockOSThread() defer runtime.UnlockOSThread() orig, err := capget() if err != nil { tb.Fatal("Can't get capabilities:", err) } var set capUserData for _, cap := range caps { set.Effective |= 1 << uint(cap) } set.Permitted = orig.Permitted if err := capset(set); err != nil { tb.Fatal("Can't set capabilities:", err) } f() if err := capset(orig); err != nil { tb.Fatal("Can't restore capabilities:", err) } } type capUserData struct { Effective uint64 Permitted uint64 Inheritable uint64 } func capget() (capUserData, error) { var hdr = &unix.CapUserHeader{ Version: unix.LINUX_CAPABILITY_VERSION_3, } var data [2]unix.CapUserData err := unix.Capget(hdr, &data[0]) if err != nil { return capUserData{}, err } return capUserData{ Effective: uint64(data[0].Effective) | uint64(data[1].Effective)<<32, Permitted: uint64(data[0].Permitted) | uint64(data[1].Permitted)<<32, Inheritable: uint64(data[0].Inheritable) | uint64(data[1].Inheritable)<<32, }, err } func capset(data capUserData) error { var hdr = &unix.CapUserHeader{ Version: unix.LINUX_CAPABILITY_VERSION_3, } var linuxData [2]unix.CapUserData linuxData[0].Effective = uint32(data.Effective & 0xFFFFFFFF) linuxData[0].Permitted = uint32(data.Permitted & 0xFFFFFFFF) linuxData[0].Inheritable = uint32(data.Inheritable & 0xFFFFFFFF) linuxData[1].Effective = uint32(data.Effective >> 32) linuxData[1].Permitted = uint32(data.Permitted >> 32) linuxData[1].Inheritable = uint32(data.Inheritable >> 32) return unix.Capset(hdr, &linuxData[0]) } ================================================ FILE: internal/testutils/cgroup.go ================================================ package testutils import ( "errors" "os" "strings" "sync" "testing" "github.com/cilium/ebpf/internal/unix" ) var cgroup2Path = sync.OnceValues(func() (string, error) { mounts, err := os.ReadFile("/proc/mounts") if err != nil { return "", err } for _, line := range strings.Split(string(mounts), "\n") { mount := strings.SplitN(line, " ", 3) if mount[0] == "cgroup2" { return mount[1], nil } continue } return "", errors.New("cgroup2 not mounted") }) func CreateCgroup(tb testing.TB) *os.File { tb.Helper() cg2, err := cgroup2Path() if err != nil { tb.Fatal("Can't locate cgroup2 mount:", err) } cgdir, err := os.MkdirTemp(cg2, "ebpf-link") if err != nil { tb.Fatal("Can't create cgroupv2:", err) } cgroup, err := os.Open(cgdir) if err != nil { os.Remove(cgdir) tb.Fatal(err) } tb.Cleanup(func() { cgroup.Close() os.Remove(cgdir) }) return cgroup } func GetCgroupIno(t *testing.T, cgroup *os.File) uint64 { cgroupStat := unix.Stat_t{} err := unix.Fstat(int(cgroup.Fd()), &cgroupStat) if err != nil { t.Fatal(err) } return cgroupStat.Ino } ================================================ FILE: internal/testutils/chan.go ================================================ package testutils import ( "testing" "time" ) // WaitChan waits for a value to be sent on a channel, or for a timeout to // occur. If the timeout is reached, the test will fail. func WaitChan[T any](tb testing.TB, ch <-chan T, timeout time.Duration) { tb.Helper() select { case <-ch: return case <-time.After(timeout): tb.Fatalf("timeout waiting for channel") } } ================================================ FILE: internal/testutils/checkers.go ================================================ package testutils import ( "bytes" "fmt" "reflect" "github.com/go-quicktest/qt" ) // Contains checks if interface value I is of type T. Use with qt.Satisfies: // // qt.Assert(t, qt.Satisfies(p, testutils.Contains[*ebpf.Program])) func Contains[T, I any](i I) bool { _, ok := any(i).(T) return ok } // IsDeepCopy checks that got is a deep copy of want. // // All primitive values must be equal, but pointers must be distinct. // This is different from [reflect.DeepEqual] which will accept equal pointer values. // That is, reflect.DeepEqual(a, a) is true, while IsDeepCopy(a, a) is false. func IsDeepCopy[T any](got, want T) qt.Checker { return &deepCopyChecker[T]{got, want, make(map[pair]struct{})} } type pair struct { got, want reflect.Value } type deepCopyChecker[T any] struct { got, want T visited map[pair]struct{} } func (dcc *deepCopyChecker[T]) Check(_ func(key string, value any)) error { return dcc.check(reflect.ValueOf(dcc.got), reflect.ValueOf(dcc.want)) } func (dcc *deepCopyChecker[T]) check(got, want reflect.Value) error { switch want.Kind() { case reflect.Interface: return dcc.check(got.Elem(), want.Elem()) case reflect.Pointer: if got.IsNil() && want.IsNil() { return nil } if got.IsNil() { return fmt.Errorf("expected non-nil pointer") } if want.IsNil() { return fmt.Errorf("expected nil pointer") } if got.UnsafePointer() == want.UnsafePointer() { return fmt.Errorf("equal pointer values") } switch want.Type() { case reflect.TypeOf((*bytes.Reader)(nil)): // bytes.Reader doesn't allow modifying it's contents, so we // allow a shallow copy. return nil } if _, ok := dcc.visited[pair{got, want}]; ok { // Deal with recursive types. return nil } dcc.visited[pair{got, want}] = struct{}{} return dcc.check(got.Elem(), want.Elem()) case reflect.Slice: if got.IsNil() && want.IsNil() { return nil } if got.IsNil() { return fmt.Errorf("expected non-nil slice") } if want.IsNil() { return fmt.Errorf("expected nil slice") } if got.Len() != want.Len() { return fmt.Errorf("expected %d elements, got %d", want.Len(), got.Len()) } if want.Len() == 0 { return nil } if got.UnsafePointer() == want.UnsafePointer() { return fmt.Errorf("equal backing memory") } fallthrough case reflect.Array: for i := 0; i < want.Len(); i++ { if err := dcc.check(got.Index(i), want.Index(i)); err != nil { return fmt.Errorf("index %d: %w", i, err) } } return nil case reflect.Struct: for i := 0; i < want.NumField(); i++ { if err := dcc.check(got.Field(i), want.Field(i)); err != nil { return fmt.Errorf("%q: %w", want.Type().Field(i).Name, err) } } return nil case reflect.Map: if got.Len() != want.Len() { return fmt.Errorf("expected %d items, got %d", want.Len(), got.Len()) } if got.UnsafePointer() == want.UnsafePointer() { return fmt.Errorf("maps are equal") } iter := want.MapRange() for iter.Next() { key := iter.Key() got := got.MapIndex(iter.Key()) if !got.IsValid() { return fmt.Errorf("key %v is missing", key) } want := iter.Value() if err := dcc.check(got, want); err != nil { return fmt.Errorf("key %v: %w", key, err) } } return nil case reflect.Chan, reflect.UnsafePointer: return fmt.Errorf("%s is not supported", want.Type()) default: // Compare by value as usual. if !got.Equal(want) { return fmt.Errorf("%#v is not equal to %#v", got, want) } return nil } } func (dcc *deepCopyChecker[T]) Args() []qt.Arg { return []qt.Arg{ {Name: "got", Value: dcc.got}, {Name: "want", Value: dcc.want}, } } ================================================ FILE: internal/testutils/checkers_test.go ================================================ package testutils import ( "testing" "github.com/go-quicktest/qt" ) func TestIsDeepCopy(t *testing.T) { type s struct { basic int array [1]*int array0 [0]int ptr *int slice []*int ifc any m map[*int]*int rec *s } key := 1 copy := func() *s { v := &s{ 0, [...]*int{new(int)}, [...]int{}, new(int), []*int{new(int)}, new(int), map[*int]*int{&key: new(int)}, nil, } v.rec = v return v } a, b := copy(), copy() qt.Check(t, qt.IsNil(IsDeepCopy(a, b).Check(nil))) a.basic++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"basic": .*`)) a = copy() (*a.array[0])++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"array": index 0: .*`)) a = copy() a.array[0] = nil qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"array": index 0: .*`)) a = copy() a.array = b.array qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"array": index 0: .*`)) a = copy() (*a.ptr)++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"ptr": .*`)) a = copy() a.ptr = b.ptr qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"ptr": .*`)) a = copy() (*a.slice[0])++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"slice": .*`)) a = copy() a.slice[0] = nil qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"slice": .*`)) a = copy() a.slice = nil qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"slice": .*`)) a = copy() a.slice = b.slice qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"slice": .*`)) a = copy() *(a.ifc.(*int))++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"ifc": .*`)) a = copy() a.ifc = b.ifc qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"ifc": .*`)) a = copy() a.rec = b.rec qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"rec": .*`)) a = copy() a.m = b.m qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"m": .*`)) a = copy() (*a.m[&key])++ qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"m": .*`)) a = copy() a.m[new(int)] = new(int) qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"m": .*`)) a = copy() delete(a.m, &key) qt.Check(t, qt.ErrorMatches(IsDeepCopy(a, b).Check(nil), `"m": .*`)) } ================================================ FILE: internal/testutils/cpu.go ================================================ package testutils import ( "runtime" "testing" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" ) // LockOSThreadToSingleCPU force the current goroutine to run on a single CPU. func LockOSThreadToSingleCPU(tb testing.TB) { tb.Helper() runtime.LockOSThread() tb.Cleanup(runtime.UnlockOSThread) var old unix.CPUSet err := unix.SchedGetaffinity(0, &old) qt.Assert(tb, qt.IsNil(err)) // Schedule test to run on only CPU 0 var first unix.CPUSet first.Set(0) err = unix.SchedSetaffinity(0, &first) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { _ = unix.SchedSetaffinity(0, &old) }) } ================================================ FILE: internal/testutils/fd_other.go ================================================ //go:build !windows package testutils import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/unix" ) func DupFD(tb testing.TB, fd int) int { tb.Helper() dup, err := unix.FcntlInt(uintptr(fd), unix.F_DUPFD_CLOEXEC, 1) qt.Assert(tb, qt.IsNil(err)) return dup } ================================================ FILE: internal/testutils/fd_windows.go ================================================ package testutils import ( "testing" "github.com/cilium/ebpf/internal/efw" "github.com/go-quicktest/qt" ) func DupFD(tb testing.TB, fd int) int { tb.Helper() dup, err := efw.EbpfDuplicateFd(fd) qt.Assert(tb, qt.IsNil(err)) return dup } ================================================ FILE: internal/testutils/feature.go ================================================ package testutils import ( "encoding/binary" "errors" "os" "runtime" "strings" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" ) const ( ignoreVersionEnvVar = "EBPF_TEST_IGNORE_VERSION" ) func CheckFeatureTest(t *testing.T, fn func() error) { t.Helper() checkFeatureTestError(t, fn()) } func checkFeatureTestError(t *testing.T, err error) { t.Helper() if err == nil { return } if errors.Is(err, internal.ErrNotSupportedOnOS) { t.Skip(err) } var ufe *internal.UnsupportedFeatureError if errors.As(err, &ufe) { checkVersion(t, ufe) } else { t.Error("Feature test failed:", err) } } func CheckFeatureMatrix[K comparable](t *testing.T, fm internal.FeatureMatrix[K]) { t.Helper() for key, ft := range fm { t.Run(ft.Name, func(t *testing.T) { checkFeatureTestError(t, fm.Result(key)) }) } } func SkipIfNotSupported(tb testing.TB, err error) { tb.Helper() if err == internal.ErrNotSupported { tb.Fatal("Unwrapped ErrNotSupported") } var ufe *internal.UnsupportedFeatureError if errors.As(err, &ufe) { checkVersion(tb, ufe) tb.Skip(ufe.Error()) } if errors.Is(err, internal.ErrNotSupported) { tb.Skip(err.Error()) } } func SkipIfNotSupportedOnOS(tb testing.TB, err error) { tb.Helper() if err == internal.ErrNotSupportedOnOS { tb.Fatal("Unwrapped ErrNotSupportedOnOS") } if errors.Is(err, internal.ErrNotSupportedOnOS) { tb.Skip(err.Error()) } } func checkVersion(tb testing.TB, ufe *internal.UnsupportedFeatureError) { if ufe.MinimumVersion.Unspecified() { return } tb.Helper() if ignoreVersionCheck(tb.Name()) { tb.Logf("Ignoring error due to %s: %s", ignoreVersionEnvVar, ufe.Error()) return } if !isPlatformVersionLessThan(tb, ufe.MinimumVersion, platformVersion(tb)) { tb.Fatalf("Feature '%s' isn't supported even though kernel is newer than %s", ufe.Name, ufe.MinimumVersion) } } // Skip a test based on the Linux version we are running on. // // Warning: this function does not have an effect on platforms other than Linux. func SkipOnOldKernel(tb testing.TB, minVersion, feature string) { tb.Helper() if !platform.IsLinux { tb.Logf("Ignoring version constraint %s for %s on %s", minVersion, feature, runtime.GOOS) return } if IsVersionLessThan(tb, minVersion) { tb.Skipf("Test requires at least kernel %s (due to missing %s)", minVersion, feature) } } // Check whether the current runtime version is less than some minimum. func IsVersionLessThan(tb testing.TB, minVersions ...string) bool { tb.Helper() version, err := platform.SelectVersion(minVersions) qt.Assert(tb, qt.IsNil(err)) if version == "" { // No matching version means that the platform // doesn't support whatever feature. return true } minv, err := internal.NewVersion(version) if err != nil { tb.Fatalf("Invalid version %s: %s", version, err) } return isPlatformVersionLessThan(tb, minv, platformVersion(tb)) } func isPlatformVersionLessThan(tb testing.TB, minv, runv internal.Version) bool { tb.Helper() key := "CI_MAX_KERNEL_VERSION" if platform.IsWindows { key = "CI_MAX_EFW_VERSION" } if max := os.Getenv(key); max != "" { maxv, err := internal.NewVersion(max) if err != nil { tb.Fatalf("Invalid version %q in %s: %s", max, key, err) } if maxv.Less(minv) { tb.Fatalf("Test for %s will never execute on CI since %s is the most recent runtime", minv, maxv) } } return runv.Less(minv) } // ignoreVersionCheck checks whether to omit the version check for a test. // // It reads a comma separated list of test names from an environment variable. // // For example: // // EBPF_TEST_IGNORE_VERSION=TestABC,TestXYZ go test ... func ignoreVersionCheck(tName string) bool { tNames := os.Getenv(ignoreVersionEnvVar) if tNames == "" { return false } ignored := strings.Split(tNames, ",") for _, n := range ignored { if strings.TrimSpace(n) == tName { return true } } return false } // SkipNonNativeEndian skips the test or benchmark if bo doesn't match the // host's native endianness. func SkipNonNativeEndian(tb testing.TB, bo binary.ByteOrder) { tb.Helper() if bo != internal.NativeEndian { tb.Skip("Skipping due to non-native endianness") } } ================================================ FILE: internal/testutils/feature_other.go ================================================ //go:build !windows package testutils import ( "testing" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" ) func platformVersion(tb testing.TB) internal.Version { tb.Helper() v, err := linux.KernelVersion() if err != nil { tb.Fatal(err) } return v } ================================================ FILE: internal/testutils/feature_test.go ================================================ package testutils import ( "testing" "github.com/go-quicktest/qt" ) func TestIgnoreKernelVersionCheckWhenEnvVarIsSet(t *testing.T) { tests := []struct { name string toIgnoreNamesEnvValue string testName string ignoreKernelVersionCheck bool }{ { name: "should NOT ignore kernel version check if environment var set to empty string", toIgnoreNamesEnvValue: "", testName: "TestABC", ignoreKernelVersionCheck: false, }, { name: "should ignore kernel version check if environment var set to skip test name with single value", toIgnoreNamesEnvValue: "TestABC", testName: "TestABC", ignoreKernelVersionCheck: true, }, { name: "should match test name when multiple comma separated names list is provided", toIgnoreNamesEnvValue: "TestABC,TestXYZ", testName: "TestXYZ", ignoreKernelVersionCheck: true, }, { name: "should NOT match test name when multiple comma separated names list is provided but name is not present in list", toIgnoreNamesEnvValue: "TestABC,TestXYZ", testName: "TestPQR", ignoreKernelVersionCheck: false, }, { name: "should match test name if names list has leading/trailing spaces", toIgnoreNamesEnvValue: "TestABC, TestXYZ , TestPQR", testName: "TestXYZ", ignoreKernelVersionCheck: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Setenv(ignoreVersionEnvVar, tt.toIgnoreNamesEnvValue) if got := ignoreVersionCheck(tt.testName); got != tt.ignoreKernelVersionCheck { t.Errorf("ignoreKernelVersionCheck() = %v, want %v", got, tt.ignoreKernelVersionCheck) } }) } } func TestPlatformVersion(t *testing.T) { qt.Assert(t, qt.IsFalse(platformVersion(t).Unspecified())) } ================================================ FILE: internal/testutils/feature_windows.go ================================================ package testutils import ( "os" "testing" "github.com/cilium/ebpf/internal" "github.com/go-quicktest/qt" ) func platformVersion(tb testing.TB) internal.Version { tb.Helper() versionStr, ok := os.LookupEnv("CI_EFW_VERSION") qt.Assert(tb, qt.IsTrue(ok), qt.Commentf("Missing CI_EFW_VERSION environment variable")) version, err := internal.NewVersion(versionStr) qt.Assert(tb, qt.IsNil(err), qt.Commentf("Parse eBPF for Windows version")) return version } ================================================ FILE: internal/testutils/glob.go ================================================ package testutils import ( "fmt" "path/filepath" "strings" "testing" "golang.org/x/sys/cpu" ) // Files calls fn for each given file. // // The function errors out if the pattern matches no files. func Files(t *testing.T, files []string, fn func(*testing.T, string)) { t.Helper() if len(files) == 0 { t.Fatalf("No files given") } for _, f := range files { file := f // force copy name := filepath.Base(file) t.Run(name, func(t *testing.T) { fn(t, file) }) } } // Glob finds files matching a pattern. // // The pattern should may include full path. Excludes use the same syntax as // pattern, but are only applied to the basename instead of the full path. func Glob(tb testing.TB, pattern string, excludes ...string) []string { tb.Helper() files, err := filepath.Glob(pattern) if err != nil { tb.Fatal("Can't glob files:", err) } if len(excludes) == 0 { return files } var filtered []string nextFile: for _, file := range files { base := filepath.Base(file) for _, exclude := range excludes { if matched, err := filepath.Match(exclude, base); err != nil { tb.Fatal(err) } else if matched { continue nextFile } } filtered = append(filtered, file) } return filtered } // NativeFile substitutes %s with an abbreviation of the host endianness. func NativeFile(tb testing.TB, path string) string { tb.Helper() if !strings.Contains(path, "%s") { tb.Fatalf("File %q doesn't contain %%s", path) } if cpu.IsBigEndian { return fmt.Sprintf(path, "eb") } return fmt.Sprintf(path, "el") } ================================================ FILE: internal/testutils/netns_linux.go ================================================ //go:build linux // The netns implementation in this file was taken from cilium/cilium. package testutils import ( "fmt" "os" "runtime" "testing" "golang.org/x/sync/errgroup" "github.com/cilium/ebpf/internal/unix" ) type NetNS struct { f *os.File } // NewNetNS returns a new network namespace. func NewNetNS(tb testing.TB) *NetNS { tb.Helper() ns, err := newNetNS() if err != nil { tb.Fatal(err) } tb.Cleanup(func() { ns.close() }) return ns } // Do runs the provided func in the netns without changing the calling thread's // netns. // // The code in f and any code called by f must NOT call [runtime.LockOSThread], // as this could leave the goroutine created by Do permanently pinned to an OS // thread. func (h *NetNS) Do(f func() error) error { // Start the func in a new goroutine and lock it to an exclusive thread. This // ensures that if execution of the goroutine fails unexpectedly before we // call UnlockOSThread, the go runtime will ensure the underlying OS thread is // disposed of, rather than reused in a potentially undefined state. // // See also: https://pkg.go.dev/runtime#UnlockOSThread var g errgroup.Group g.Go(func() error { // Lock the newly-created goroutine to the OS thread it's running on so we // can safely move it into another network namespace. (per-thread state) restoreUnlock, err := lockOSThread() if err != nil { return err } if err := set(h.f); err != nil { return fmt.Errorf("set netns: %w (terminating OS thread)", err) } ferr := f() // Attempt to restore the underlying OS thread to its original network // namespace and unlock the running goroutine from its OS thread. Any // failures during this process will leave the goroutine locked, making the // underlying OS thread terminate when this function returns. if err := restoreUnlock(); err != nil { return fmt.Errorf("restore original netns: %w (terminating OS thread)", err) } return ferr }) return g.Wait() } func newNetNS() (*NetNS, error) { var f *os.File // Perform network namespace creation in a new goroutine to give us the // possibility of terminating the underlying OS thread (by terminating the // goroutine) if something goes wrong. var g errgroup.Group g.Go(func() error { restoreUnlock, err := lockOSThread() if err != nil { return fmt.Errorf("lock OS thread: %w", err) } // Move the underlying OS thread to a new network namespace. This can be // undone by calling restoreUnlock(). if err := unshare(); err != nil { return fmt.Errorf("create new netns: %w", err) } // Take out a reference to the new netns. f, err = getCurrent() if err != nil { return fmt.Errorf("get current netns: %w (terminating OS thread)", err) } // Restore the OS thread to its original network namespace or implicitly // terminate it if something went wrong. if err := restoreUnlock(); err != nil { return fmt.Errorf("restore current netns: %w (terminating OS thread)", err) } return nil }) if err := g.Wait(); err != nil { return nil, err } ns := &NetNS{f: f} // Prevent resource leaks by eventually closing the underlying file descriptor // after ns is garbage collected. runtime.SetFinalizer(ns, (*NetNS).close) return ns, nil } func (h *NetNS) close() error { if h.f == nil { return nil } // Close closes the handle to the network namespace. This does not necessarily // mean destroying the network namespace itself, which only happens when all // references to it are gone and all of its processes have been terminated. if err := h.f.Close(); err != nil { return err } h.f = nil return nil } func lockOSThread() (func() error, error) { runtime.LockOSThread() orig, err := getCurrent() if err != nil { runtime.UnlockOSThread() return nil, fmt.Errorf("get current namespace: %w", err) } return func() error { defer orig.Close() if err := set(orig); err != nil { // We didn't manage to restore the OS thread to its original namespace. // Don't unlock the current goroutine from its thread, so the thread will // terminate when the current goroutine does. return err } // Original netns was restored, release the OS thread back into the // schedulable pool. runtime.UnlockOSThread() return nil }, nil } // unshare moves the calling OS thread of the calling goroutine to a new network // namespace. Must only be called after a prior call to lockOSThread(). func unshare() error { if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { return err } return nil } // set sets the underlying OS thread of the calling goroutine to the netns // pointed at by f. func set(f *os.File) error { return unix.Setns(int(f.Fd()), unix.CLONE_NEWNET) } // getCurrent gets a file descriptor to the current thread network namespace. func getCurrent() (*os.File, error) { return getFromThread(os.Getpid(), unix.Gettid()) } // getFromPath gets a file descriptor to the network namespace pinned at path. func getFromPath(path string) (*os.File, error) { return os.OpenFile(path, unix.O_RDONLY|unix.O_CLOEXEC, 0) } // getFromThread gets a file descriptor to the network namespace of a given pid // and tid. func getFromThread(pid, tid int) (*os.File, error) { return getFromPath(fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid)) } ================================================ FILE: internal/testutils/netns_other.go ================================================ //go:build !linux // This file is a stub to allow netns to be compiled on non-Linux platforms. package testutils import ( "testing" "github.com/cilium/ebpf/internal" ) type NetNS struct { } func NewNetNS(tb testing.TB) *NetNS { return nil } func (h *NetNS) Do(f func() error) error { return internal.ErrNotSupportedOnOS } ================================================ FILE: internal/testutils/programs.go ================================================ package testutils import ( "fmt" "os" "testing" ) func ClangBin(tb testing.TB) string { tb.Helper() if testing.Short() { tb.Skip("Not compiling with -short") } // Use a floating clang version for local development, but allow CI to run // against oldest supported clang. clang := "clang" if minVersion := os.Getenv("CI_MIN_CLANG_VERSION"); minVersion != "" { clang = fmt.Sprintf("clang-%s", minVersion) } tb.Log("Testing against", clang) return clang } ================================================ FILE: internal/testutils/rlimit.go ================================================ package testutils import ( "fmt" "os" "github.com/cilium/ebpf/rlimit" ) func init() { // Increase the memlock for all tests unconditionally. It's a great source of // weird bugs, since different distros have different default limits. if err := rlimit.RemoveMemlock(); err != nil { fmt.Fprintln(os.Stderr, "WARNING: Failed to adjust rlimit, tests may fail") } } ================================================ FILE: internal/testutils/seed.go ================================================ package testutils import ( "math/rand" "os" "strconv" "sync" "testing" "time" ) var randSeed struct { value int64 once sync.Once } func Rand(tb testing.TB) *rand.Rand { randSeed.once.Do(func() { randSeed.value = time.Now().UnixMicro() }) seed := randSeed.value if seedStr, ok := os.LookupEnv("TEST_SEED"); ok { var err error seed, err = strconv.ParseInt(seedStr, 0, 64) if err != nil { tb.Fatal("Parse TEST_SEED environment variable:", err) } } tb.Logf("TEST_SEED=%d\n", seed) return rand.New(rand.NewSource(seed)) } ================================================ FILE: internal/testutils/testmain/fd_trace.go ================================================ package testmain import ( "bytes" "fmt" "os" "runtime" "sync" "sync/atomic" ) // foundLeak is atomic since the GC may collect objects in parallel. var foundLeak atomic.Bool func onLeakFD(fs *runtime.Frames) { foundLeak.Store(true) fmt.Fprintln(os.Stderr, "leaked fd created at:") fmt.Fprintln(os.Stderr, formatFrames(fs)) } // fds is a registry of all file descriptors wrapped into sys.fds that were // created while an fd tracer was active. var fds *sync.Map // map[int]*runtime.Frames // TraceFD associates raw with the current execution stack. // // skip controls how many entries of the stack the function should skip. func TraceFD(raw int, skip int) { if fds == nil { return } // Attempt to store the caller's stack for the given fd value. // Panic if fds contains an existing stack for the fd. old, exist := fds.LoadOrStore(raw, callersFrames(skip)) if exist { f := old.(*runtime.Frames) panic(fmt.Sprintf("found existing stack for fd %d:\n%s", raw, formatFrames(f))) } } // ForgetFD removes any existing association for raw. func ForgetFD(raw int) { if fds != nil { fds.Delete(raw) } } // LeakFD indicates that raw was leaked. // // Calling the function with a value that was not passed to [TraceFD] before // is undefined. func LeakFD(raw int) { if fds == nil { return } // Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback // is invoked at most once for one sys.FD allocation, runtime.Frames can only // be unwound once. f, ok := fds.LoadAndDelete(raw) if ok { onLeakFD(f.(*runtime.Frames)) } } // flushFrames removes all elements from fds and returns them as a slice. This // deals with the fact that a runtime.Frames can only be unwound once using // Next(). func flushFrames() []*runtime.Frames { var frames []*runtime.Frames fds.Range(func(key, value any) bool { frames = append(frames, value.(*runtime.Frames)) fds.Delete(key) return true }) return frames } func callersFrames(skip int) *runtime.Frames { c := make([]uintptr, 32) // Skip runtime.Callers and this function. i := runtime.Callers(skip+2, c) if i == 0 { return nil } return runtime.CallersFrames(c) } // formatFrames formats a runtime.Frames as a human-readable string. func formatFrames(fs *runtime.Frames) string { var b bytes.Buffer for { f, more := fs.Next() b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line)) if !more { break } } return b.String() } ================================================ FILE: internal/testutils/testmain/main.go ================================================ package testmain import ( "flag" "fmt" "os" "sync" "github.com/cilium/ebpf/internal/platform" ) type testingM interface { Run() int } // Run m with various debug aids enabled. // // The function calls [os.Exit] and does not return. func Run(m testingM) { const traceLogFlag = "trace-log" var ts *traceSession if platform.IsWindows { traceLog := flag.Bool(traceLogFlag, false, "Output a trace of eBPF runtime activity") flag.Parse() if *traceLog { var err error ts, err = newTraceSession() if err != nil { fmt.Fprintln(os.Stderr, "Disabling trace logging:", err) } } } defer ts.Close() fds = new(sync.Map) ret := m.Run() for _, f := range flushFrames() { onLeakFD(f) } if foundLeak.Load() { ret = 99 } if err := ts.Dump(os.Stderr); err != nil { fmt.Fprintln(os.Stderr, "Error while dumping trace log:", err) ret = 99 } if platform.IsWindows && ret != 0 && ts == nil { fmt.Fprintf(os.Stderr, "Consider enabling trace logging with -%s\n", traceLogFlag) } os.Exit(ret) } ================================================ FILE: internal/testutils/testmain/windows.go ================================================ package testmain import ( "encoding/xml" "fmt" "io" "os" "os/exec" "path/filepath" "slices" "strconv" "strings" "text/tabwriter" ) type tracelogKeywords uint64 // Know tracelog keywords. // // See https://github.com/microsoft/ebpf-for-windows/blob/main/libs/shared/ebpf_tracelog.h var allKeywords = []string{ "entry-exit", "base", "error", "epoch", "core", "link", "map", "program", "api", "printk", "native", } func (kw *tracelogKeywords) UnmarshalText(text []byte) error { decoded, err := strconv.ParseUint(string(text), 0, 64) if err != nil { return fmt.Errorf("foo: %w", err) } *kw = tracelogKeywords(decoded) return nil } func (kw tracelogKeywords) decode() []string { var keywords []string for _, keyword := range allKeywords { if kw&1 > 0 { keywords = append(keywords, keyword) } kw >>= 1 } if kw > 0 { keywords = append(keywords, fmt.Sprintf("0x%x", kw)) } return keywords } type traceSession struct { session string } // newTraceSession starts a trace log for eBPF for Windows related events. // // * https://github.com/microsoft/ebpf-for-windows/blob/main/docs/GettingStarted.md#using-tracing // * https://devblogs.microsoft.com/performance-diagnostics/controlling-the-event-session-name-with-the-instance-name/ and func newTraceSession() (*traceSession, error) { def := filepath.Join(os.Getenv("ProgramFiles"), "ebpf-for-windows\\ebpfforwindows.wprp") if _, err := os.Stat(def); err != nil { return nil, err } session := fmt.Sprintf("epbf-go-%d", os.Getpid()) wpr := exec.Command("wpr.exe", "-start", def, "-filemode", "-instancename", session) wpr.Stderr = os.Stderr if err := wpr.Run(); err != nil { return nil, err } return &traceSession{session}, nil } func (ts *traceSession) Close() error { if ts == nil { return nil } return ts.stop(os.DevNull) } func (ts *traceSession) stop(file string) error { if ts.session == "" { return nil } wpr := exec.Command("wpr.exe", "-stop", file, "-instancename", ts.session) if err := wpr.Run(); err != nil { return err } ts.session = "" return nil } func (ts *traceSession) Dump(w io.Writer) error { if ts == nil { return nil } path, err := os.MkdirTemp("", "ebpf-go-trace") if err != nil { return err } defer os.RemoveAll(path) trace := filepath.Join(path, "trace.etl") if err := ts.stop(trace); err != nil { return fmt.Errorf("write trace: %w", err) } netsh := exec.Command("netsh.exe", "trace", "convert", trace, "dump=XML") if err := netsh.Run(); err != nil { return err } f, err := os.Open(filepath.Join(path, "trace.xml")) if err != nil { return err } defer f.Close() return summariseWPRTrace(f, w) } func summariseWPRTrace(r io.Reader, w io.Writer) error { type nameValue struct { Name string `xml:"Name,attr"` Value string `xml:",chardata"` } type event struct { XMLName xml.Name `xml:"Event"` System struct { Provider struct { Name string `xml:"Name,attr"` } `xml:"Provider"` TimeCreated struct { SystemTime string `xml:"SystemTime,attr"` } `xml:"TimeCreated"` Keywords tracelogKeywords `xml:"Keywords"` Level uint64 `xml:"Level"` } `xml:"System"` EventData struct { Data []nameValue `xml:"Data"` } `xml:"EventData"` RenderingInfo struct { Task string `xml:"Task"` } `xml:"RenderingInfo"` } var events struct { Events []event `xml:"Event"` } err := xml.NewDecoder(r).Decode(&events) if err != nil { return fmt.Errorf("unmarshal trace XML: %w", err) } tw := tabwriter.NewWriter(w, 0, 0, 1, ' ', 0) for _, event := range events.Events { if !strings.Contains(event.System.Provider.Name, "Ebpf") { continue } flag := " " // See https://learn.microsoft.com/en-us/windows/win32/api/traceloggingprovider/nf-traceloggingprovider-tracelogginglevel#remarks if event.System.Level > 0 && event.System.Level <= 3 { flag = "!" } kw := event.System.Keywords.decode() fmt.Fprintf(tw, "%s\t%s\t", flag, strings.Join(kw, ",")) data := event.EventData.Data slices.SortFunc(data, func(a, b nameValue) int { return strings.Compare(a.Name, b.Name) }) var first string for _, name := range []string{ "Entry", "Message", "ErrorMessage", } { i := slices.IndexFunc(data, func(kv nameValue) bool { return kv.Name == name }) if i == -1 { continue } first = data[i].Value data = slices.Delete(data, i, i+1) break } // NB: This may be empty. fmt.Fprintf(tw, "%s\t", first) for _, data := range data { fmt.Fprintf(tw, "%s=%s\t", data.Name, data.Value) } fmt.Fprintln(tw) } return tw.Flush() } ================================================ FILE: internal/testutils/testmain/windows_test.go ================================================ package testmain import ( "bytes" "compress/gzip" "os" "testing" "github.com/go-quicktest/qt" ) func TestSummariseWPRTrace(t *testing.T) { f, err := os.Open("testdata/trace.xml.gz") qt.Assert(t, qt.IsNil(err)) defer f.Close() trace, err := gzip.NewReader(f) qt.Assert(t, qt.IsNil(err)) var buf bytes.Buffer qt.Assert(t, qt.IsNil(summariseWPRTrace(trace, &buf))) t.Log("\n", buf.String()) } ================================================ FILE: internal/tracefs/kprobe.go ================================================ package tracefs import ( "crypto/rand" "errors" "fmt" "os" "path/filepath" "runtime" "strings" "sync" "syscall" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/unix" ) var ( ErrInvalidInput = errors.New("invalid input") ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes") ) //go:generate go tool stringer -type=ProbeType -linecomment type ProbeType uint8 const ( Kprobe ProbeType = iota // kprobe Uprobe // uprobe ) func (pt ProbeType) eventsFile() (*os.File, error) { path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String())) if err != nil { return nil, err } return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666) } type ProbeArgs struct { Type ProbeType Symbol, Group, Path string Offset, RefCtrOffset, Cookie uint64 Pid, RetprobeMaxActive int Ret bool } // RandomGroup generates a pseudorandom string for use as a tracefs group name. // Returns an error when the output string would exceed 63 characters (kernel // limitation), when rand.Read() fails or when prefix contains characters not // allowed by IsValidTraceID. func RandomGroup(prefix string) (string, error) { if !validIdentifier(prefix) { return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput) } b := make([]byte, 8) if _, err := rand.Read(b); err != nil { return "", fmt.Errorf("reading random bytes: %w", err) } group := fmt.Sprintf("%s_%x", prefix, b) if len(group) > 63 { return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput) } return group, nil } // validIdentifier implements the equivalent of a regex match // against "^[a-zA-Z_][0-9a-zA-Z_-]*$". // // Trace event groups, names and kernel symbols must adhere to this set of // characters. Non-empty, first character must not be a number or hyphen, all // characters must be alphanumeric, underscore or hyphen. func validIdentifier(s string) bool { if len(s) < 1 { return false } for i, c := range []byte(s) { switch { case c >= 'a' && c <= 'z': case c >= 'A' && c <= 'Z': case c == '_': case i > 0 && (c == '-' || c >= '0' && c <= '9'): default: return false } } return true } func sanitizeTracefsPath(path ...string) (string, error) { base, err := getTracefsPath() if err != nil { return "", err } l := filepath.Join(path...) p := filepath.Join(base, l) if !strings.HasPrefix(p, base) { return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput) } return p, nil } // getTracefsPath will return a correct path to the tracefs mount point. // Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing, // but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted. // The available tracefs paths will depends on distribution choices. var getTracefsPath = sync.OnceValues(func() (string, error) { if !platform.IsLinux { return "", fmt.Errorf("tracefs: %w", internal.ErrNotSupportedOnOS) } for _, p := range []struct { path string fsType int64 }{ {"/sys/kernel/tracing", unix.TRACEFS_MAGIC}, {"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC}, // RHEL/CentOS {"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC}, } { if fsType, err := linux.FSType(p.path); err == nil && fsType == p.fsType { return p.path, nil } } return "", errors.New("neither debugfs nor tracefs are mounted") }) // sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore. // // It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_"). func sanitizeIdentifier(s string) string { var skip bool return strings.Map(func(c rune) rune { switch { case c >= 'a' && c <= 'z', c >= 'A' && c <= 'Z', c >= '0' && c <= '9': skip = false return c case skip: return -1 default: skip = true return '_' } }, s) } // EventID reads a trace event's ID from tracefs given its group and name. // The kernel requires group and name to be alphanumeric or underscore. func EventID(group, name string) (uint64, error) { if !validIdentifier(group) { return 0, fmt.Errorf("invalid tracefs group: %q", group) } if !validIdentifier(name) { return 0, fmt.Errorf("invalid tracefs name: %q", name) } path, err := sanitizeTracefsPath("events", group, name, "id") if err != nil { return 0, err } tid, err := internal.ReadUint64FromFile("%d\n", path) if errors.Is(err, os.ErrNotExist) { return 0, err } if err != nil { return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) } return tid, nil } func probePrefix(ret bool, maxActive int) string { if ret { if maxActive > 0 { return fmt.Sprintf("r%d", maxActive) } return "r" } return "p" } // Event represents an entry in a tracefs probe events file. type Event struct { typ ProbeType group, name string // event id allocated by the kernel. 0 if the event has already been removed. id uint64 cleanup runtime.Cleanup } // NewEvent creates a new ephemeral trace event. // // Returns os.ErrNotExist if symbol is not a valid // kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist // if a probe with the same group and symbol already exists. Returns an error if // args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if // the kernel is too old to support kretprobe maxactive. func NewEvent(args ProbeArgs) (*Event, error) { // Before attempting to create a trace event through tracefs, // check if an event with the same group and name already exists. // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate // entry, so we need to rely on reads for detecting uniqueness. eventName := sanitizeIdentifier(args.Symbol) _, err := EventID(args.Group, eventName) if err == nil { return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist) } if errors.Is(err, unix.EINVAL) { return nil, fmt.Errorf("trace event %s/%s: %w (unknown symbol?)", args.Group, eventName, err) } if !errors.Is(err, os.ErrNotExist) { return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err) } // Open the kprobe_events file in tracefs. f, err := args.Type.eventsFile() if err != nil { return nil, err } defer f.Close() var pe, token string switch args.Type { case Kprobe: // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe // -:[GRP/]EVENT : Clear a probe // // Some examples: // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy // p:ebpf_5678/p_my_kprobe __x64_sys_execve // // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the // kernel default to NR_CPUS. This is desired in most eBPF cases since // subsampling or rate limiting logic can be more accurately implemented in // the eBPF program itself. // See Documentation/kprobes.txt for more details. if args.RetprobeMaxActive != 0 && !args.Ret { return nil, ErrInvalidMaxActive } token = KprobeToken(args) pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token) case Uprobe: // The uprobe_events syntax is as follows: // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe // -:[GRP/]EVENT : Clear a probe // // Some examples: // r:ebpf_1234/readline /bin/bash:0x12345 // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) // // See Documentation/trace/uprobetracer.txt for more details. if args.RetprobeMaxActive != 0 { return nil, ErrInvalidMaxActive } token = UprobeToken(args) pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token) } _, err = f.WriteString(pe) // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL // when trying to create a retprobe for a missing symbol. if errors.Is(err, os.ErrNotExist) { return nil, fmt.Errorf("token %s: not found: %w", token, err) } // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved // to an invalid insn boundary. The exact conditions that trigger this error are // arch specific however. if errors.Is(err, syscall.EILSEQ) { return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) } // ERANGE is returned when the `SYM[+offs]` token is too big and cannot // be resolved. if errors.Is(err, syscall.ERANGE) { return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) } if err != nil { return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err) } // Get the newly-created trace event's id. tid, err := EventID(args.Group, eventName) if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) { // Kernels < 4.12 don't support maxactive and therefore auto generate // group and event names from the symbol and offset. The symbol is used // without any sanitization. // See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712 event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset) if err := removeEvent(args.Type, event); err != nil { return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err) } return nil, &internal.UnsupportedFeatureError{ MinimumVersion: internal.Version{4, 12}, Name: "trace event with non-default maxactive", } } if err != nil { return nil, fmt.Errorf("get trace event id: %w", err) } evt := &Event{typ: args.Type, group: args.Group, name: eventName, id: tid} evt.cleanup = runtime.AddCleanup(evt, func(*byte) { _ = removeEvent(args.Type, fmt.Sprintf("%s/%s", args.Group, eventName)) }, nil) return evt, nil } // Close removes the event from tracefs. // // Returns os.ErrClosed if the event has already been closed before. func (evt *Event) Close() error { if evt.id == 0 { return os.ErrClosed } evt.id = 0 evt.cleanup.Stop() pe := fmt.Sprintf("%s/%s", evt.group, evt.name) return removeEvent(evt.typ, pe) } func removeEvent(typ ProbeType, pe string) error { f, err := typ.eventsFile() if err != nil { return err } defer f.Close() // See [k,u]probe_events syntax above. The probe type does not need to be specified // for removals. if _, err = f.WriteString("-:" + pe); err != nil { return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err) } return nil } // ID returns the tracefs ID associated with the event. func (evt *Event) ID() uint64 { return evt.id } // Group returns the tracefs group used by the event. func (evt *Event) Group() string { return evt.group } // KprobeToken creates the SYM[+offs] token for the tracefs api. func KprobeToken(args ProbeArgs) string { po := args.Symbol if args.Offset != 0 { po += fmt.Sprintf("+%#x", args.Offset) } return po } ================================================ FILE: internal/tracefs/kprobe_test.go ================================================ package tracefs import ( "fmt" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/testutils" ) // Global symbol, present on all tested kernels. const ksym = "vprintk" func TestKprobeTraceFSGroup(t *testing.T) { // Expect _<16 random hex chars>. g, err := RandomGroup("ebpftest") qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Matches(g, `ebpftest_[a-f0-9]{16}`)) // Expect error when the generator's output exceeds 63 characters. p := make([]byte, 47) // 63 - 17 (length of the random suffix and underscore) + 1 for i := range p { p[i] = byte('a') } _, err = RandomGroup(string(p)) qt.Assert(t, qt.Not(qt.IsNil(err))) // Reject non-alphanumeric characters. _, err = RandomGroup("/") qt.Assert(t, qt.Not(qt.IsNil(err))) } func TestKprobeToken(t *testing.T) { tests := []struct { args ProbeArgs expected string }{ {ProbeArgs{Symbol: "symbol"}, "symbol"}, {ProbeArgs{Symbol: "symbol", Offset: 1}, "symbol+0x1"}, {ProbeArgs{Symbol: "symbol", Offset: 65535}, "symbol+0xffff"}, {ProbeArgs{Symbol: "symbol", Offset: 65536}, "symbol+0x10000"}, } for i, tt := range tests { t.Run(fmt.Sprint(i), func(t *testing.T) { po := KprobeToken(tt.args) if tt.expected != po { t.Errorf("Expected symbol+offset to be '%s', got '%s'", tt.expected, po) } }) } } func TestNewEvent(t *testing.T) { for _, args := range []ProbeArgs{ {Type: Kprobe, Symbol: ksym}, {Type: Kprobe, Symbol: ksym, Ret: true}, {Type: Uprobe, Path: "/bin/bash", Symbol: "main"}, {Type: Uprobe, Path: "/bin/bash", Symbol: "main", Ret: true}, } { name := fmt.Sprintf("%s ret=%v", args.Type, args.Ret) t.Run(name, func(t *testing.T) { args.Group, _ = RandomGroup("ebpftest") evt, err := NewEvent(args) testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) defer evt.Close() _, err = NewEvent(args) qt.Assert(t, qt.ErrorIs(err, os.ErrExist), qt.Commentf("expected consecutive event creation to contain os.ErrExist")) qt.Assert(t, qt.IsNil(evt.Close())) qt.Assert(t, qt.ErrorIs(evt.Close(), os.ErrClosed)) }) } } ================================================ FILE: internal/tracefs/perf_event_test.go ================================================ package tracefs import ( "errors" "fmt" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/testutils" ) func TestEventID(t *testing.T) { eid, err := EventID("syscalls", "sys_enter_mmap") testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Not(qt.Equals(eid, 0))) } func TestSanitizePath(t *testing.T) { _, err := sanitizeTracefsPath("../escaped") testutils.SkipIfNotSupportedOnOS(t, err) if !errors.Is(err, ErrInvalidInput) { t.Errorf("expected error %s, got: %s", ErrInvalidInput, err) } _, err = sanitizeTracefsPath("./not/escaped") if err != nil { t.Errorf("expected no error, got: %s", err) } } func TestValidIdentifier(t *testing.T) { tests := []struct { name string in string fail bool }{ {"empty string", "", true}, {"leading number", "1test", true}, {"underscore first", "__x64_syscall", false}, {"contains number", "bpf_trace_run1", false}, {"underscore", "_", false}, {"leading dash", "-EINVAL", true}, {"contains number", "all0wed", false}, {"contains dash", "trace-group", false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { exp := "pass" if tt.fail { exp = "fail" } if validIdentifier(tt.in) == tt.fail { t.Errorf("expected string '%s' to %s valid ID check", tt.in, exp) } }) } } func TestSanitizeIdentifier(t *testing.T) { tests := []struct { symbol string expected string }{ {"readline", "readline"}, {"main.Func123", "main_Func123"}, {"a.....a", "a_a"}, {"./;'{}[]a", "_a"}, {"***xx**xx###", "_xx_xx_"}, {`@P#r$i%v^3*+t)i&k++--`, "_P_r_i_v_3_t_i_k_"}, } for i, tt := range tests { t.Run(fmt.Sprint(i), func(t *testing.T) { sanitized := sanitizeIdentifier(tt.symbol) if tt.expected != sanitized { t.Errorf("Expected sanitized symbol to be '%s', got '%s'", tt.expected, sanitized) } }) } } func TestGetTracefsPath(t *testing.T) { path, err := getTracefsPath() testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) _, err = os.Stat(path) qt.Assert(t, qt.IsNil(err)) } ================================================ FILE: internal/tracefs/probetype_string.go ================================================ // Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT. package tracefs import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[Kprobe-0] _ = x[Uprobe-1] } const _ProbeType_name = "kprobeuprobe" var _ProbeType_index = [...]uint8{0, 6, 12} func (i ProbeType) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_ProbeType_index)-1 { return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")" } return _ProbeType_name[_ProbeType_index[idx]:_ProbeType_index[idx+1]] } ================================================ FILE: internal/tracefs/uprobe.go ================================================ package tracefs import "fmt" // UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api. func UprobeToken(args ProbeArgs) string { po := fmt.Sprintf("%s:%#x", args.Path, args.Offset) if args.RefCtrOffset != 0 { // This is not documented in Documentation/trace/uprobetracer.txt. // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564 po += fmt.Sprintf("(%#x)", args.RefCtrOffset) } return po } ================================================ FILE: internal/tracefs/uprobe_test.go ================================================ package tracefs import ( "fmt" "testing" ) func TestUprobeToken(t *testing.T) { tests := []struct { args ProbeArgs expected string }{ {ProbeArgs{Path: "/bin/bash"}, "/bin/bash:0x0"}, {ProbeArgs{Path: "/bin/bash", Offset: 1}, "/bin/bash:0x1"}, {ProbeArgs{Path: "/bin/bash", Offset: 65535}, "/bin/bash:0xffff"}, {ProbeArgs{Path: "/bin/bash", Offset: 65536}, "/bin/bash:0x10000"}, {ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 1}, "/bin/bash:0x1(0x1)"}, {ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 65535}, "/bin/bash:0x1(0xffff)"}, } for i, tt := range tests { t.Run(fmt.Sprint(i), func(t *testing.T) { po := UprobeToken(tt.args) if tt.expected != po { t.Errorf("Expected path:offset to be '%s', got '%s'", tt.expected, po) } }) } } ================================================ FILE: internal/unix/doc.go ================================================ // Package unix re-exports Linux specific parts of golang.org/x/sys/unix. // // It avoids breaking compilation on other OS by providing stubs as follows: // - Invoking a function always returns an error. // - Errnos have distinct, non-zero values. // - Constants have distinct but meaningless values. // - Types use the same names for members, but may or may not follow the // Linux layout. package unix // Note: please don't add any custom API to this package. Use internal/sys instead. ================================================ FILE: internal/unix/errno_linux.go ================================================ package unix import ( "syscall" linux "golang.org/x/sys/unix" ) type Errno = syscall.Errno const ( E2BIG = linux.E2BIG EACCES = linux.EACCES EAGAIN = linux.EAGAIN EBADF = linux.EBADF EEXIST = linux.EEXIST EFAULT = linux.EFAULT EILSEQ = linux.EILSEQ EINTR = linux.EINTR EINVAL = linux.EINVAL ENODEV = linux.ENODEV ENOENT = linux.ENOENT ENOSPC = linux.ENOSPC EOPNOTSUPP = linux.EOPNOTSUPP EPERM = linux.EPERM EPOLLIN = linux.EPOLLIN ESRCH = linux.ESRCH ESTALE = linux.ESTALE ) ================================================ FILE: internal/unix/errno_linux_test.go ================================================ package unix import ( "testing" "github.com/go-quicktest/qt" "golang.org/x/sys/unix" ) func TestErrnoIsUnix(t *testing.T) { qt.Assert(t, qt.ErrorIs(EPERM, unix.EPERM)) qt.Assert(t, qt.ErrorIs(ENOENT, unix.ENOENT)) } ================================================ FILE: internal/unix/errno_other.go ================================================ //go:build !linux && !windows package unix import "syscall" type Errno = syscall.Errno // Errnos are distinct and non-zero. const ( E2BIG Errno = iota + 1 EACCES EAGAIN EBADF EEXIST EFAULT EILSEQ EINTR EINVAL ENODEV ENOENT ENOSPC ENOTSUP ENOTSUPP EOPNOTSUPP EPERM ESRCH ESTALE ) ================================================ FILE: internal/unix/errno_string_windows.go ================================================ // Code generated by "stringer -type=Errno -tags=windows -output=errno_string_windows.go"; DO NOT EDIT. package unix import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[EPERM-1] _ = x[ENOENT-2] _ = x[ESRCH-3] _ = x[EINTR-4] _ = x[E2BIG-7] _ = x[EBADF-9] _ = x[EAGAIN-11] _ = x[EACCES-13] _ = x[EFAULT-14] _ = x[EEXIST-17] _ = x[ENODEV-19] _ = x[EINVAL-22] _ = x[ENOSPC-28] _ = x[EILSEQ-42] _ = x[ENOTSUP-129] _ = x[EOPNOTSUPP-130] _ = x[ENOTSUPP-536870912] _ = x[ESTALE-536870913] } const _Errno_name = "EPERMENOENTESRCHEINTRE2BIGEBADFEAGAINEACCESEFAULTEEXISTENODEVEINVALENOSPCEILSEQENOTSUPEOPNOTSUPPENOTSUPPESTALE" var _Errno_map = map[Errno]string{ 1: _Errno_name[0:5], 2: _Errno_name[5:11], 3: _Errno_name[11:16], 4: _Errno_name[16:21], 7: _Errno_name[21:26], 9: _Errno_name[26:31], 11: _Errno_name[31:37], 13: _Errno_name[37:43], 14: _Errno_name[43:49], 17: _Errno_name[49:55], 19: _Errno_name[55:61], 22: _Errno_name[61:67], 28: _Errno_name[67:73], 42: _Errno_name[73:79], 129: _Errno_name[79:86], 130: _Errno_name[86:96], 536870912: _Errno_name[96:104], 536870913: _Errno_name[104:110], } func (i Errno) String() string { if str, ok := _Errno_map[i]; ok { return str } return "Errno(" + strconv.FormatInt(int64(i), 10) + ")" } ================================================ FILE: internal/unix/errno_test.go ================================================ package unix import ( "os" "testing" "github.com/go-quicktest/qt" ) func TestErrno(t *testing.T) { qt.Assert(t, qt.ErrorIs(ENOENT, os.ErrNotExist)) } ================================================ FILE: internal/unix/errno_windows.go ================================================ package unix // The code in this file is derived from syscall_unix.go in the Go source code, // licensed under the MIT license. import ( "errors" "os" "syscall" ) //go:generate go tool stringer -type=Errno -tags=windows -output=errno_string_windows.go // Windows specific constants for Unix errnos. // // The values do not always match Linux, for example EILSEQ and EOPNOTSUPP. // // See https://learn.microsoft.com/en-us/cpp/c-runtime-library/errno-constants?view=msvc-170 const ( EPERM Errno = 1 ENOENT Errno = 2 ESRCH Errno = 3 EINTR Errno = 4 E2BIG Errno = 7 EBADF Errno = 9 EAGAIN Errno = 11 EACCES Errno = 13 EFAULT Errno = 14 EEXIST Errno = 17 ENODEV Errno = 19 EINVAL Errno = 22 ENFILE Errno = 23 EMFILE Errno = 24 ENOSPC Errno = 28 ENOSYS Errno = 40 ENOTEMPTY Errno = 41 EILSEQ Errno = 42 ENOTSUP Errno = 129 EOPNOTSUPP Errno = 130 EOTHER Errno = 131 ETIMEDOUT Errno = 138 EWOULDBLOCK Errno = 140 ) // These constants do not exist on Windows and therefore have a non-zero // dummy value. const ( ENOTSUPP Errno = Errno(syscall.APPLICATION_ERROR) + iota ESTALE ) // Errno is a Windows compatibility shim for Unix errnos. type Errno uintptr func (e Errno) Error() string { return e.String() } func (e Errno) Is(target error) bool { switch target { case os.ErrPermission: return e == EACCES || e == EPERM case os.ErrExist: return e == EEXIST || e == ENOTEMPTY case os.ErrNotExist: return e == ENOENT case errors.ErrUnsupported: return e == ENOSYS || e == ENOTSUP || e == EOPNOTSUPP } return false } func (e Errno) Temporary() bool { return e == EINTR || e == EMFILE || e == ENFILE || e.Timeout() } func (e Errno) Timeout() bool { return e == EAGAIN || e == EWOULDBLOCK || e == ETIMEDOUT } ================================================ FILE: internal/unix/error.go ================================================ package unix import ( "fmt" "runtime" "strings" "github.com/cilium/ebpf/internal" ) // errNonLinux returns an error which wraps [internal.ErrNotSupportedOnOS] and // includes the name of the calling function. func errNonLinux() error { name := "unknown" pc, _, _, ok := runtime.Caller(1) if ok { name = runtime.FuncForPC(pc).Name() if pos := strings.LastIndexByte(name, '.'); pos != -1 { name = name[pos+1:] } } return fmt.Errorf("unix: %s: %w", name, internal.ErrNotSupportedOnOS) } ================================================ FILE: internal/unix/error_test.go ================================================ package unix import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal" ) func TestErrNonLinux(t *testing.T) { err := errNonLinux() qt.Assert(t, qt.StringContains(err.Error(), t.Name())) qt.Assert(t, qt.ErrorIs(err, internal.ErrNotSupportedOnOS)) } ================================================ FILE: internal/unix/strings_other.go ================================================ //go:build !linux && !windows package unix func BytePtrFromString(s string) (*byte, error) { return nil, errNonLinux() } func ByteSliceToString(s []byte) string { return "" } func ByteSliceFromString(s string) ([]byte, error) { return nil, errNonLinux() } ================================================ FILE: internal/unix/strings_windows.go ================================================ package unix import ( "syscall" "golang.org/x/sys/windows" ) func BytePtrFromString(s string) (*byte, error) { p, err := windows.BytePtrFromString(s) if err == syscall.EINVAL { err = EINVAL } return p, err } func ByteSliceToString(s []byte) string { return windows.ByteSliceToString(s) } func ByteSliceFromString(s string) ([]byte, error) { return windows.ByteSliceFromString(s) } ================================================ FILE: internal/unix/types_linux.go ================================================ //go:build linux package unix import ( "syscall" "unsafe" linux "golang.org/x/sys/unix" ) const ( BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE BPF_F_RDONLY = linux.BPF_F_RDONLY BPF_F_WRONLY = linux.BPF_F_WRONLY BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE BPF_F_XDP_HAS_FRAGS = linux.BPF_F_XDP_HAS_FRAGS BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN BPF_F_UPROBE_MULTI_RETURN = linux.BPF_F_UPROBE_MULTI_RETURN BPF_F_LOCK = linux.BPF_F_LOCK BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN BPF_TAG_SIZE = linux.BPF_TAG_SIZE BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ SYS_BPF = linux.SYS_BPF F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC O_RDONLY = linux.O_RDONLY O_CLOEXEC = linux.O_CLOEXEC O_NONBLOCK = linux.O_NONBLOCK PROT_NONE = linux.PROT_NONE PROT_READ = linux.PROT_READ PROT_WRITE = linux.PROT_WRITE MAP_ANON = linux.MAP_ANON MAP_SHARED = linux.MAP_SHARED MAP_FIXED = linux.MAP_FIXED MAP_PRIVATE = linux.MAP_PRIVATE PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF PerfBitWatermark = linux.PerfBitWatermark PerfBitWriteBackward = linux.PerfBitWriteBackward PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC RLIM_INFINITY = linux.RLIM_INFINITY RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME PERF_RECORD_LOST = linux.PERF_RECORD_LOST PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE AT_FDCWD = linux.AT_FDCWD RENAME_NOREPLACE = linux.RENAME_NOREPLACE SO_ATTACH_BPF = linux.SO_ATTACH_BPF SO_DETACH_BPF = linux.SO_DETACH_BPF SOL_SOCKET = linux.SOL_SOCKET SIGPROF = linux.SIGPROF SIGUSR1 = linux.SIGUSR1 SIG_BLOCK = linux.SIG_BLOCK SIG_UNBLOCK = linux.SIG_UNBLOCK BPF_FS_MAGIC = linux.BPF_FS_MAGIC TRACEFS_MAGIC = linux.TRACEFS_MAGIC DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC BPF_RB_NO_WAKEUP = linux.BPF_RB_NO_WAKEUP BPF_RB_FORCE_WAKEUP = linux.BPF_RB_FORCE_WAKEUP AF_UNSPEC = linux.AF_UNSPEC IFF_UP = linux.IFF_UP CLONE_NEWNET = linux.CLONE_NEWNET LINUX_CAPABILITY_VERSION_3 = linux.LINUX_CAPABILITY_VERSION_3 ) type Statfs_t = linux.Statfs_t type Stat_t = linux.Stat_t type Rlimit = linux.Rlimit type Signal = linux.Signal type Sigset_t = linux.Sigset_t type PerfEventMmapPage = linux.PerfEventMmapPage type EpollEvent = linux.EpollEvent type PerfEventAttr = linux.PerfEventAttr type Utsname = linux.Utsname type CPUSet = linux.CPUSet type CapUserData = linux.CapUserData type CapUserHeader = linux.CapUserHeader func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { return linux.Syscall(trap, a1, a2, a3) } func PthreadSigmask(how int, set, oldset *Sigset_t) error { return linux.PthreadSigmask(how, set, oldset) } func FcntlInt(fd uintptr, cmd, arg int) (int, error) { return linux.FcntlInt(fd, cmd, arg) } func IoctlSetInt(fd int, req uint, value int) error { return linux.IoctlSetInt(fd, req, value) } func Statfs(path string, buf *Statfs_t) (err error) { return linux.Statfs(path, buf) } func Close(fd int) (err error) { return linux.Close(fd) } func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { return linux.EpollWait(epfd, events, msec) } func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { return linux.EpollCtl(epfd, op, fd, event) } func Eventfd(initval uint, flags int) (fd int, err error) { return linux.Eventfd(initval, flags) } func Write(fd int, p []byte) (n int, err error) { return linux.Write(fd, p) } func EpollCreate1(flag int) (fd int, err error) { return linux.EpollCreate1(flag) } func SetNonblock(fd int, nonblocking bool) (err error) { return linux.SetNonblock(fd, nonblocking) } func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { return linux.Mmap(fd, offset, length, prot, flags) } //go:nocheckptr func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { return linux.MmapPtr(fd, offset, addr, length, prot, flags) } func Munmap(b []byte) (err error) { return linux.Munmap(b) } func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) } func Uname(buf *Utsname) (err error) { return linux.Uname(buf) } func Getpid() int { return linux.Getpid() } func Gettid() int { return linux.Gettid() } func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { return linux.Tgkill(tgid, tid, sig) } func BytePtrFromString(s string) (*byte, error) { return linux.BytePtrFromString(s) } func ByteSliceToString(s []byte) string { return linux.ByteSliceToString(s) } func ByteSliceFromString(s string) ([]byte, error) { return linux.ByteSliceFromString(s) } func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags) } func Prlimit(pid, resource int, new, old *Rlimit) error { return linux.Prlimit(pid, resource, new, old) } func Open(path string, mode int, perm uint32) (int, error) { return linux.Open(path, mode, perm) } func Fstat(fd int, stat *Stat_t) error { return linux.Fstat(fd, stat) } func SetsockoptInt(fd, level, opt, value int) error { return linux.SetsockoptInt(fd, level, opt, value) } func SchedSetaffinity(pid int, set *CPUSet) error { return linux.SchedSetaffinity(pid, set) } func SchedGetaffinity(pid int, set *CPUSet) error { return linux.SchedGetaffinity(pid, set) } func Auxv() ([][2]uintptr, error) { return linux.Auxv() } func Unshare(flag int) error { return linux.Unshare(flag) } func Setns(fd int, nstype int) error { return linux.Setns(fd, nstype) } func Capget(hdr *CapUserHeader, data *CapUserData) (err error) { return linux.Capget(hdr, data) } func Capset(hdr *CapUserHeader, data *CapUserData) (err error) { return linux.Capset(hdr, data) } ================================================ FILE: internal/unix/types_other.go ================================================ //go:build !linux package unix import ( "syscall" "unsafe" ) // Constants are distinct to avoid breaking switch statements. const ( BPF_F_NO_PREALLOC = iota BPF_F_NUMA_NODE BPF_F_RDONLY BPF_F_WRONLY BPF_F_RDONLY_PROG BPF_F_WRONLY_PROG BPF_F_SLEEPABLE BPF_F_MMAPABLE BPF_F_INNER_MAP BPF_F_KPROBE_MULTI_RETURN BPF_F_UPROBE_MULTI_RETURN BPF_F_XDP_HAS_FRAGS BPF_OBJ_NAME_LEN BPF_TAG_SIZE BPF_RINGBUF_BUSY_BIT BPF_RINGBUF_DISCARD_BIT BPF_RINGBUF_HDR_SZ SYS_BPF F_DUPFD_CLOEXEC EPOLLIN EPOLL_CTL_ADD EPOLL_CLOEXEC O_CLOEXEC O_NONBLOCK PROT_NONE PROT_READ PROT_WRITE MAP_ANON MAP_SHARED MAP_FIXED MAP_PRIVATE PERF_ATTR_SIZE_VER1 PERF_TYPE_SOFTWARE PERF_TYPE_TRACEPOINT PERF_COUNT_SW_BPF_OUTPUT PERF_EVENT_IOC_DISABLE PERF_EVENT_IOC_ENABLE PERF_EVENT_IOC_SET_BPF PerfBitWatermark PerfBitWriteBackward PERF_SAMPLE_RAW PERF_FLAG_FD_CLOEXEC RLIM_INFINITY RLIMIT_MEMLOCK BPF_STATS_RUN_TIME PERF_RECORD_LOST PERF_RECORD_SAMPLE AT_FDCWD RENAME_NOREPLACE SO_ATTACH_BPF SO_DETACH_BPF SOL_SOCKET SIGPROF SIGUSR1 SIG_BLOCK SIG_UNBLOCK BPF_FS_MAGIC TRACEFS_MAGIC DEBUGFS_MAGIC BPF_RB_NO_WAKEUP BPF_RB_FORCE_WAKEUP BPF_F_LOCK AF_UNSPEC IFF_UP LINUX_CAPABILITY_VERSION_3 ) type Statfs_t struct { Type int64 Bsize int64 Blocks uint64 Bfree uint64 Bavail uint64 Files uint64 Ffree uint64 Fsid [2]int32 Namelen int64 Frsize int64 Flags int64 Spare [4]int64 } type Stat_t struct { Dev uint64 Ino uint64 Nlink uint64 Mode uint32 Uid uint32 Gid uint32 _ int32 Rdev uint64 Size int64 Blksize int64 Blocks int64 } type Rlimit struct { Cur uint64 Max uint64 } type Signal int type Sigset_t struct { Val [4]uint64 } type CapUserHeader struct { Version uint32 Pid int32 } type CapUserData struct { Effective uint32 Permitted uint32 Inheritable uint32 } func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) { return 0, 0, ENOTSUP } func PthreadSigmask(how int, set, oldset *Sigset_t) error { return errNonLinux() } func FcntlInt(fd uintptr, cmd, arg int) (int, error) { return -1, errNonLinux() } func IoctlSetInt(fd int, req uint, value int) error { return errNonLinux() } func Statfs(path string, buf *Statfs_t) error { return errNonLinux() } func Close(fd int) (err error) { return errNonLinux() } type EpollEvent struct { Events uint32 Fd int32 Pad int32 } func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { return 0, errNonLinux() } func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { return errNonLinux() } func Eventfd(initval uint, flags int) (fd int, err error) { return 0, errNonLinux() } func Write(fd int, p []byte) (n int, err error) { return 0, errNonLinux() } func EpollCreate1(flag int) (fd int, err error) { return 0, errNonLinux() } type PerfEventMmapPage struct { Version uint32 Compat_version uint32 Lock uint32 Index uint32 Offset int64 Time_enabled uint64 Time_running uint64 Capabilities uint64 Pmc_width uint16 Time_shift uint16 Time_mult uint32 Time_offset uint64 Time_zero uint64 Size uint32 Data_head uint64 Data_tail uint64 Data_offset uint64 Data_size uint64 Aux_head uint64 Aux_tail uint64 Aux_offset uint64 Aux_size uint64 } func SetNonblock(fd int, nonblocking bool) (err error) { return errNonLinux() } func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { return []byte{}, errNonLinux() } func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { return nil, errNonLinux() } func Munmap(b []byte) (err error) { return errNonLinux() } type PerfEventAttr struct { Type uint32 Size uint32 Config uint64 Sample uint64 Sample_type uint64 Read_format uint64 Bits uint64 Wakeup uint32 Bp_type uint32 Ext1 uint64 Ext2 uint64 Branch_sample_type uint64 Sample_regs_user uint64 Sample_stack_user uint32 Clockid int32 Sample_regs_intr uint64 Aux_watermark uint32 Sample_max_stack uint16 } func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { return 0, errNonLinux() } type Utsname struct { Release [65]byte Version [65]byte } func Uname(buf *Utsname) (err error) { return errNonLinux() } func Getpid() int { return -1 } func Gettid() int { return -1 } func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { return errNonLinux() } func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { return errNonLinux() } func Prlimit(pid, resource int, new, old *Rlimit) error { return errNonLinux() } func Open(path string, mode int, perm uint32) (int, error) { return -1, errNonLinux() } func Fstat(fd int, stat *Stat_t) error { return errNonLinux() } func SetsockoptInt(fd, level, opt, value int) error { return errNonLinux() } type CPUSet struct{} func (*CPUSet) Set(int) {} func SchedSetaffinity(pid int, set *CPUSet) error { return errNonLinux() } func SchedGetaffinity(pid int, set *CPUSet) error { return errNonLinux() } func Auxv() ([][2]uintptr, error) { return nil, errNonLinux() } func Capget(hdr *CapUserHeader, data *CapUserData) (err error) { return errNonLinux() } func Capset(hdr *CapUserHeader, data *CapUserData) (err error) { return errNonLinux() } ================================================ FILE: internal/version.go ================================================ package internal import ( "fmt" ) const ( // Version constant used in ELF binaries indicating that the loader needs to // substitute the eBPF program's version with the value of the kernel's // KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf // and RedSift. MagicKernelVersion = 0xFFFFFFFE ) // A Version in the form Major.Minor.Patch. type Version [3]uint16 // NewVersion creates a version from a string like "Major.Minor.Patch". // // Patch is optional. func NewVersion(ver string) (Version, error) { var major, minor, patch uint16 n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch) if n < 2 { return Version{}, fmt.Errorf("invalid version: %s", ver) } return Version{major, minor, patch}, nil } // NewVersionFromCode creates a version from a LINUX_VERSION_CODE. func NewVersionFromCode(code uint32) Version { return Version{ uint16(uint8(code >> 16)), uint16(uint8(code >> 8)), uint16(uint8(code)), } } func (v Version) String() string { if v[2] == 0 { return fmt.Sprintf("v%d.%d", v[0], v[1]) } return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2]) } // Less returns true if the version is less than another version. func (v Version) Less(other Version) bool { for i, a := range v { if a == other[i] { continue } return a < other[i] } return false } // Unspecified returns true if the version is all zero. func (v Version) Unspecified() bool { return v[0] == 0 && v[1] == 0 && v[2] == 0 } // Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h. // It represents the kernel version and patch level as a single value. func (v Version) Kernel() uint32 { // Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid // overflowing into PATCHLEVEL. // See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255"). s := min(v[2], 255) // Truncate members to uint8 to prevent them from spilling over into // each other when overflowing 8 bits. return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s)) } ================================================ FILE: internal/version_test.go ================================================ package internal import ( "testing" ) func TestVersion(t *testing.T) { a, err := NewVersion("1.2") if err != nil { t.Fatal(err) } b, err := NewVersion("2.2.1") if err != nil { t.Fatal(err) } if !a.Less(b) { t.Error("A should be less than B") } if b.Less(a) { t.Error("B shouldn't be less than A") } v200 := Version{2, 0, 0} if !a.Less(v200) { t.Error("1.2.1 should not be less than 2.0.0") } if v200.Less(a) { t.Error("2.0.0 should not be less than 1.2.1") } } func TestKernelVersion(t *testing.T) { // Kernels 4.4 and 4.9 have a SUBLEVEL of over 255 and clamp it to 255. // In our implementation, the other version segments are truncated. if v, want := (Version{256, 256, 256}), uint32(255); v.Kernel() != want { t.Errorf("256.256.256 should result in a kernel version of %d, got: %d", want, v.Kernel()) } // Known good version. if v, want := (Version{4, 9, 128}), uint32(264576); v.Kernel() != want { t.Errorf("4.9.1 should result in a kernel version of %d, got: %d", want, v.Kernel()) } } func TestVersionFromCode(t *testing.T) { var tests = []struct { name string code uint32 v Version }{ {"0.0.0", 0, Version{0, 0, 0}}, {"1.0.0", 0x10000, Version{1, 0, 0}}, {"4.4.255", 0x404ff, Version{4, 4, 255}}, {"255.255.255", 0xffffff, Version{255, 255, 255}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { v := NewVersionFromCode(tt.code) if v != tt.v { t.Errorf("unexpected version for code '%d'. got: %v, want: %v", tt.code, v, tt.v) } }) } } ================================================ FILE: link/anchor.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) const anchorFlags = sys.BPF_F_REPLACE | sys.BPF_F_BEFORE | sys.BPF_F_AFTER | sys.BPF_F_ID | sys.BPF_F_LINK_MPROG // Anchor is a reference to a link or program. // // It is used to describe where an attachment or detachment should take place // for link types which support multiple attachment. type Anchor interface { // anchor returns an fd or ID and a set of flags. // // By default fdOrID is taken to reference a program, but BPF_F_LINK_MPROG // changes this to refer to a link instead. // // BPF_F_BEFORE, BPF_F_AFTER, BPF_F_REPLACE modify where a link or program // is attached. The default behaviour if none of these flags is specified // matches BPF_F_AFTER. anchor() (fdOrID, flags uint32, _ error) } type firstAnchor struct{} func (firstAnchor) anchor() (fdOrID, flags uint32, _ error) { return 0, sys.BPF_F_BEFORE, nil } // Head is the position before all other programs or links. func Head() Anchor { return firstAnchor{} } type lastAnchor struct{} func (lastAnchor) anchor() (fdOrID, flags uint32, _ error) { return 0, sys.BPF_F_AFTER, nil } // Tail is the position after all other programs or links. func Tail() Anchor { return lastAnchor{} } // Before is the position just in front of target. func BeforeLink(target Link) Anchor { return anchor{target, sys.BPF_F_BEFORE} } // After is the position just after target. func AfterLink(target Link) Anchor { return anchor{target, sys.BPF_F_AFTER} } // Before is the position just in front of target. func BeforeLinkByID(target ID) Anchor { return anchor{target, sys.BPF_F_BEFORE} } // After is the position just after target. func AfterLinkByID(target ID) Anchor { return anchor{target, sys.BPF_F_AFTER} } // Before is the position just in front of target. func BeforeProgram(target *ebpf.Program) Anchor { return anchor{target, sys.BPF_F_BEFORE} } // After is the position just after target. func AfterProgram(target *ebpf.Program) Anchor { return anchor{target, sys.BPF_F_AFTER} } // Replace the target itself. func ReplaceProgram(target *ebpf.Program) Anchor { return anchor{target, sys.BPF_F_REPLACE} } // Before is the position just in front of target. func BeforeProgramByID(target ebpf.ProgramID) Anchor { return anchor{target, sys.BPF_F_BEFORE} } // After is the position just after target. func AfterProgramByID(target ebpf.ProgramID) Anchor { return anchor{target, sys.BPF_F_AFTER} } // Replace the target itself. func ReplaceProgramByID(target ebpf.ProgramID) Anchor { return anchor{target, sys.BPF_F_REPLACE} } type anchor struct { target any position uint32 } func (ap anchor) anchor() (fdOrID, flags uint32, _ error) { var typeFlag uint32 switch target := ap.target.(type) { case *ebpf.Program: fd := target.FD() if fd < 0 { return 0, 0, sys.ErrClosedFd } fdOrID = uint32(fd) typeFlag = 0 case ebpf.ProgramID: fdOrID = uint32(target) typeFlag = sys.BPF_F_ID case interface{ FD() int }: fd := target.FD() if fd < 0 { return 0, 0, sys.ErrClosedFd } fdOrID = uint32(fd) typeFlag = sys.BPF_F_LINK_MPROG case ID: fdOrID = uint32(target) typeFlag = sys.BPF_F_LINK_MPROG | sys.BPF_F_ID default: return 0, 0, fmt.Errorf("invalid target %T", ap.target) } return fdOrID, ap.position | typeFlag, nil } ================================================ FILE: link/cgroup.go ================================================ //go:build !windows package link import ( "errors" "fmt" "os" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type cgroupAttachFlags uint32 const ( // Allow programs attached to sub-cgroups to override the verdict of this // program. flagAllowOverride cgroupAttachFlags = 1 << iota // Allow attaching multiple programs to the cgroup. Only works if the cgroup // has zero or more programs attached using the Multi flag. Implies override. flagAllowMulti // Set automatically by progAttachCgroup.Update(). Used for updating a // specific given program attached in multi-mode. flagReplace ) type CgroupOptions struct { // Path to a cgroupv2 folder. Path string // One of the AttachCgroup* constants Attach ebpf.AttachType // Program must be of type CGroup*, and the attach type must match Attach. Program *ebpf.Program } // AttachCgroup links a BPF program to a cgroup. // // If the running kernel doesn't support bpf_link, attempts to emulate its // semantics using the legacy PROG_ATTACH mechanism. If bpf_link is not // available, the returned [Link] will not support pinning to bpffs. // // If you need more control over attachment flags or the attachment mechanism // used, look at [RawAttachProgram] and [AttachRawLink] instead. func AttachCgroup(opts CgroupOptions) (cg Link, err error) { cgroup, err := os.Open(opts.Path) if err != nil { return nil, fmt.Errorf("can't open cgroup: %s", err) } defer func() { if _, ok := cg.(*progAttachCgroup); ok { // Skip closing the cgroup handle if we return a valid progAttachCgroup, // where the handle is retained to implement Update(). return } cgroup.Close() }() cg, err = newLinkCgroup(cgroup, opts.Attach, opts.Program) if err == nil { return cg, nil } if errors.Is(err, ErrNotSupported) { cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowMulti) } if errors.Is(err, ErrNotSupported) { cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowOverride) } if err != nil { return nil, err } return cg, nil } type progAttachCgroup struct { cgroup *os.File current *ebpf.Program attachType ebpf.AttachType flags cgroupAttachFlags } var _ Link = (*progAttachCgroup)(nil) func (cg *progAttachCgroup) isLink() {} // newProgAttachCgroup attaches prog to cgroup using BPF_PROG_ATTACH. // cgroup and prog are retained by [progAttachCgroup]. func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) { if flags&flagAllowMulti > 0 { if err := haveProgAttachReplace(); err != nil { return nil, fmt.Errorf("can't support multiple programs: %w", err) } } // Use a program handle that cannot be closed by the caller. clone, err := prog.Clone() if err != nil { return nil, err } err = RawAttachProgram(RawAttachProgramOptions{ Target: int(cgroup.Fd()), Program: clone, Flags: uint32(flags), Attach: attach, }) if err != nil { clone.Close() return nil, fmt.Errorf("cgroup: %w", err) } return &progAttachCgroup{cgroup, clone, attach, flags}, nil } func (cg *progAttachCgroup) Close() error { defer cg.cgroup.Close() defer cg.current.Close() err := RawDetachProgram(RawDetachProgramOptions{ Target: int(cg.cgroup.Fd()), Program: cg.current, Attach: cg.attachType, }) if err != nil { return fmt.Errorf("close cgroup: %s", err) } return nil } func (cg *progAttachCgroup) Update(prog *ebpf.Program) error { new, err := prog.Clone() if err != nil { return err } args := RawAttachProgramOptions{ Target: int(cg.cgroup.Fd()), Program: prog, Attach: cg.attachType, Flags: uint32(cg.flags), } if cg.flags&flagAllowMulti > 0 { // Atomically replacing multiple programs requires at least // 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf // program in MULTI mode") args.Anchor = ReplaceProgram(cg.current) } if err := RawAttachProgram(args); err != nil { new.Close() return fmt.Errorf("can't update cgroup: %s", err) } cg.current.Close() cg.current = new return nil } func (cg *progAttachCgroup) Pin(string) error { return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) } func (cg *progAttachCgroup) Unpin() error { return fmt.Errorf("can't unpin cgroup: %w", ErrNotSupported) } func (cg *progAttachCgroup) Detach() error { return fmt.Errorf("can't detach cgroup: %w", ErrNotSupported) } func (cg *progAttachCgroup) Info() (*Info, error) { return nil, fmt.Errorf("can't get cgroup info: %w", ErrNotSupported) } type linkCgroup struct { RawLink } var _ Link = (*linkCgroup)(nil) // newLinkCgroup attaches prog to cgroup using BPF_LINK_CREATE. func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) { link, err := AttachRawLink(RawLinkOptions{ Target: int(cgroup.Fd()), Program: prog, Attach: attach, }) if err != nil { return nil, err } return &linkCgroup{*link}, err } func (cg *linkCgroup) Info() (*Info, error) { var info sys.CgroupLinkInfo if err := sys.ObjInfo(cg.fd, &info); err != nil { return nil, fmt.Errorf("cgroup link info: %s", err) } extra := &CgroupInfo{ CgroupId: info.CgroupId, AttachType: info.AttachType, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/cgroup_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) func TestAttachCgroup(t *testing.T) { cgroup, prog := mustCgroupFixtures(t) link, err := AttachCgroup(CgroupOptions{ Path: cgroup.Name(), Attach: ebpf.AttachCGroupInetEgress, Program: prog, }) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer link.Close() if haveBPFLink() == nil { if _, ok := link.(*linkCgroup); !ok { t.Fatalf("Have support for bpf_link, but got %T instead of linkCgroup", link) } } else { if _, ok := link.(*progAttachCgroup); !ok { t.Fatalf("Expected progAttachCgroup, got %T instead", link) } } } func TestProgAttachCgroup(t *testing.T) { cgroup, prog := mustCgroupFixtures(t) link, err := newProgAttachCgroup(cgroup, ebpf.AttachCGroupInetEgress, prog, 0) if err != nil { t.Fatal("Can't create link:", err) } testLink(t, link, prog) } func TestProgAttachCgroupAllowMulti(t *testing.T) { cgroup, prog := mustCgroupFixtures(t) link, err := newProgAttachCgroup(cgroup, ebpf.AttachCGroupInetEgress, prog, flagAllowMulti) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create link:", err) } // It's currently not possible for a program to replace // itself. prog2 := mustLoadProgram(t, ebpf.CGroupSKB, ebpf.AttachCGroupInetEgress, "") testLink(t, link, prog2) } func TestLinkCgroup(t *testing.T) { cgroup, prog := mustCgroupFixtures(t) link, err := newLinkCgroup(cgroup, ebpf.AttachCGroupInetEgress, prog) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create link:", err) } testLink(t, link, prog) } ================================================ FILE: link/doc.go ================================================ // Package link allows attaching eBPF programs to various kernel hooks. package link ================================================ FILE: link/helpers_windows_test.go ================================================ package link import ( "errors" "os" "testing" "golang.org/x/sys/windows" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" ) // windowsProgramTypeForGUID resolves a GUID to a ProgramType. func windowsProgramTypeForGUID(tb testing.TB, guid windows.GUID) ebpf.ProgramType { programType, err := ebpf.WindowsProgramTypeForGUID(guid.String()) if errors.Is(err, os.ErrNotExist) { tb.Skipf("Attach type not found for GUID %v", guid) } qt.Assert(tb, qt.IsNil(err)) return programType } // windowsAttachTypeForGUID resolves a GUID to an AttachType. func windowsAttachTypeForGUID(tb testing.TB, guid windows.GUID) ebpf.AttachType { attachType, err := ebpf.WindowsAttachTypeForGUID(guid.String()) if errors.Is(err, os.ErrNotExist) { tb.Skipf("Attach type not found for GUID %v", guid) } qt.Assert(tb, qt.IsNil(err)) return attachType } ================================================ FILE: link/iter.go ================================================ //go:build !windows package link import ( "fmt" "io" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type IterOptions struct { // Program must be of type Tracing with attach type // AttachTraceIter. The kind of iterator to attach to is // determined at load time via the AttachTo field. // // AttachTo requires the kernel to include BTF of itself, // and it to be compiled with a recent pahole (>= 1.16). Program *ebpf.Program // Map specifies the target map for bpf_map_elem and sockmap iterators. // It may be nil. Map *ebpf.Map } // AttachIter attaches a BPF seq_file iterator. func AttachIter(opts IterOptions) (*Iter, error) { progFd := opts.Program.FD() if progFd < 0 { return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) } var info bpfIterLinkInfoMap if opts.Map != nil { mapFd := opts.Map.FD() if mapFd < 0 { return nil, fmt.Errorf("invalid map: %w", sys.ErrClosedFd) } info.map_fd = uint32(mapFd) } attr := sys.LinkCreateIterAttr{ ProgFd: uint32(progFd), AttachType: sys.AttachType(ebpf.AttachTraceIter), IterInfo: sys.UnsafePointer(unsafe.Pointer(&info)), IterInfoLen: uint32(unsafe.Sizeof(info)), } fd, err := sys.LinkCreateIter(&attr) if err != nil { if haveFeatErr := haveBPFLink(); haveFeatErr != nil { return nil, haveFeatErr } return nil, fmt.Errorf("can't link iterator: %w", err) } return &Iter{RawLink{fd, ""}}, err } // Iter represents an attached bpf_iter. type Iter struct { RawLink } // Open creates a new instance of the iterator. // // Reading from the returned reader triggers the BPF program. func (it *Iter) Open() (io.ReadCloser, error) { attr := &sys.IterCreateAttr{ LinkFd: it.fd.Uint(), } fd, err := sys.IterCreate(attr) if err != nil { return nil, fmt.Errorf("can't create iterator: %w", err) } return fd.File("bpf_iter") } // union bpf_iter_link_info.map type bpfIterLinkInfoMap struct { map_fd uint32 } ================================================ FILE: link/iter_test.go ================================================ //go:build !windows package link import ( "io" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) func TestIter(t *testing.T) { testutils.SkipOnOldKernel(t, "5.9", "bpf_map iter") prog := mustLoadProgram(t, ebpf.Tracing, ebpf.AttachTraceIter, "bpf_map") it, err := AttachIter(IterOptions{ Program: prog, }) if err != nil { t.Fatal("Can't create iter:", err) } file, err := it.Open() if err != nil { t.Fatal("Can't open iter instance:", err) } defer file.Close() contents, err := io.ReadAll(file) if err != nil { t.Fatal(err) } if len(contents) != 0 { t.Error("Non-empty output from no-op iterator:", string(contents)) } testLink(t, it, prog) } func TestIterMapElements(t *testing.T) { testutils.SkipOnOldKernel(t, "5.9", "bpf_map_elem iter") prog := mustLoadProgram(t, ebpf.Tracing, ebpf.AttachTraceIter, "bpf_map_elem") arr, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.Array, KeySize: 4, ValueSize: 4, MaxEntries: 3, }) if err != nil { t.Fatal(err) } defer arr.Close() it, err := AttachIter(IterOptions{ Program: prog, Map: arr, }) if err != nil { t.Fatal("Can't create iter:", err) } defer it.Close() file, err := it.Open() if err != nil { t.Fatal("Can't open iter instance:", err) } defer file.Close() contents, err := io.ReadAll(file) if err != nil { t.Fatal(err) } if len(contents) != 0 { t.Error("Non-empty output from no-op iterator:", string(contents)) } } func TestUDPIter(t *testing.T) { // Introduced by 5788b3a07fc5 ("net: bpf: Implement bpf iterator for udp") testutils.SkipOnOldKernel(t, "5.9", "udp iter") prog := mustLoadProgram(t, ebpf.Tracing, ebpf.AttachTraceIter, "udp") it, err := AttachIter(IterOptions{ Program: prog, }) if err != nil { t.Fatal("Can't create iter:", err) } file, err := it.Open() if err != nil { t.Fatal("Can't open iter instance:", err) } defer file.Close() contents, err := io.ReadAll(file) if err != nil { t.Fatal(err) } if len(contents) != 0 { t.Error("Non-empty output from no-op iterator:", string(contents)) } testLink(t, it, prog) } ================================================ FILE: link/kprobe.go ================================================ //go:build !windows package link import ( "errors" "fmt" "os" "runtime" "strings" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) // KprobeOptions defines additional parameters that will be used // when loading Kprobes. type KprobeOptions struct { // Arbitrary value that can be fetched from an eBPF program // via `bpf_get_attach_cookie()`. // // Needs kernel 5.15+. Cookie uint64 // Offset of the kprobe relative to the traced symbol. // Can be used to insert kprobes at arbitrary offsets in kernel functions, // e.g. in places where functions have been inlined. Offset uint64 // Increase the maximum number of concurrent invocations of a kretprobe. // Required when tracing some long running functions in the kernel. // // Warning: this setting forces the use of an outdated kernel API and is // not portable across kernel versions. On supported kernels, consider using // fexit programs instead, as they don't have this MaxActive limitation. RetprobeMaxActive int // Prefix used for the event name if the kprobe must be attached using tracefs. // The group name will be formatted as `_`. // The default empty string is equivalent to "ebpf" as the prefix. TraceFSPrefix string } func (ko *KprobeOptions) cookie() uint64 { if ko == nil { return 0 } return ko.Cookie } // Kprobe attaches the given eBPF program to a perf event that fires when the // given kernel symbol starts executing. See /proc/kallsyms for available // symbols. For example, printk(): // // kp, err := Kprobe("printk", prog, nil) // // Losing the reference to the resulting Link (kp) will close the Kprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. // // If attaching to symbol fails, automatically retries with the running // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls // in a portable fashion. // // On kernels 6.11 and later, setting a kprobe on a nonexistent symbol using // tracefs incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist]. // // The returned Link may implement [PerfEvent]. func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { k, err := kprobe(symbol, prog, opts, false) if err != nil { return nil, err } lnk, err := attachPerfEvent(k, prog, opts.cookie()) if err != nil { k.Close() return nil, err } return lnk, nil } // Kretprobe attaches the given eBPF program to a perf event that fires right // before the given kernel symbol exits, with the function stack left intact. // See /proc/kallsyms for available symbols. For example, printk(): // // kp, err := Kretprobe("printk", prog, nil) // // Losing the reference to the resulting Link (kp) will close the Kretprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. // // If attaching to symbol fails, automatically retries with the running // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls // in a portable fashion. // // On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol // incorrectly returns [unix.EINVAL] instead of [os.ErrNotExist]. // // The returned Link may implement [PerfEvent]. func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { k, err := kprobe(symbol, prog, opts, true) if err != nil { return nil, err } lnk, err := attachPerfEvent(k, prog, opts.cookie()) if err != nil { k.Close() return nil, err } return lnk, nil } // isValidKprobeSymbol implements the equivalent of a regex match // against "^[a-zA-Z_][0-9a-zA-Z_.]*$". func isValidKprobeSymbol(s string) bool { if len(s) < 1 { return false } for i, c := range []byte(s) { switch { case c >= 'a' && c <= 'z': case c >= 'A' && c <= 'Z': case c == '_': case i > 0 && c >= '0' && c <= '9': // Allow `.` in symbol name. GCC-compiled kernel may change symbol name // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. // See: https://gcc.gnu.org/gcc-10/changes.html case i > 0 && c == '.': default: return false } } return true } // kprobe opens a perf event on the given symbol and attaches prog to it. // If ret is true, create a kretprobe. func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { if symbol == "" { return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) } if prog == nil { return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) } if !isValidKprobeSymbol(symbol) { return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) } if prog.Type() != ebpf.Kprobe { return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) } args := tracefs.ProbeArgs{ Type: tracefs.Kprobe, Pid: perfAllThreads, Symbol: symbol, Ret: ret, } if opts != nil { args.RetprobeMaxActive = opts.RetprobeMaxActive args.Cookie = opts.Cookie args.Offset = opts.Offset args.Group = opts.TraceFSPrefix } // Use kprobe PMU if the kernel has it available. tp, err := pmuProbe(args) if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { if prefix := linux.PlatformPrefix(); prefix != "" { args.Symbol = prefix + symbol tp, err = pmuProbe(args) } } if err == nil { return tp, nil } if !errors.Is(err, ErrNotSupported) { return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err) } // Use tracefs if kprobe PMU is missing. args.Symbol = symbol tp, err = tracefsProbe(args) if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { if prefix := linux.PlatformPrefix(); prefix != "" { args.Symbol = prefix + symbol tp, err = tracefsProbe(args) } } if err != nil { return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err) } return tp, nil } // pmuProbe opens a perf event based on a Performance Monitoring Unit. // // Requires at least a 4.17 kernel. // e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" // // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) { // Getting the PMU type will fail if the kernel doesn't support // the perf_[k,u]probe PMU. eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type") if errors.Is(err, os.ErrNotExist) { return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported) } if err != nil { return nil, err } // Use tracefs if we want to set kretprobe's retprobeMaxActive. if args.RetprobeMaxActive != 0 { return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported) } var config uint64 if args.Ret { bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe") if err != nil { return nil, err } config |= 1 << bit } var ( attr unix.PerfEventAttr sp unsafe.Pointer token string ) switch args.Type { case tracefs.Kprobe: // Create a pointer to a NUL-terminated string for the kernel. sp, err = unsafeStringPtr(args.Symbol) if err != nil { return nil, err } token = tracefs.KprobeToken(args) attr = unix.PerfEventAttr{ // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. Size: unix.PERF_ATTR_SIZE_VER1, Type: uint32(eventType), // PMU event type read from sysfs Ext1: uint64(uintptr(sp)), // Kernel symbol to trace Ext2: args.Offset, // Kernel symbol offset Config: config, // Retprobe flag } case tracefs.Uprobe: sp, err = unsafeStringPtr(args.Path) if err != nil { return nil, err } if args.RefCtrOffset != 0 { config |= args.RefCtrOffset << uprobeRefCtrOffsetShift } token = tracefs.UprobeToken(args) attr = unix.PerfEventAttr{ // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, // since it added the config2 (Ext2) field. The Size field controls the // size of the internal buffer the kernel allocates for reading the // perf_event_attr argument from userspace. Size: unix.PERF_ATTR_SIZE_VER1, Type: uint32(eventType), // PMU event type read from sysfs Ext1: uint64(uintptr(sp)), // Uprobe path Ext2: args.Offset, // Uprobe offset Config: config, // RefCtrOffset, Retprobe flag } } cpu := 0 if args.Pid != perfAllThreads { cpu = -1 } rawFd, err := unix.PerfEventOpen(&attr, args.Pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") { return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported) } // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL // when trying to create a retprobe for a missing symbol. if errors.Is(err, os.ErrNotExist) { return nil, fmt.Errorf("token %s: not found: %w", token, err) } // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved // to an invalid insn boundary. The exact conditions that trigger this error are // arch specific however. if errors.Is(err, unix.EILSEQ) { return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) } // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned // when attempting to set a uprobe on a trap instruction. if errors.Is(err, sys.ENOTSUPP) { return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err) } if err != nil { return nil, fmt.Errorf("token %s: opening perf event: %w", token, err) } // Ensure the string pointer is not collected before PerfEventOpen returns. runtime.KeepAlive(sp) fd, err := sys.NewFD(rawFd) if err != nil { return nil, err } // Kernel has perf_[k,u]probe PMU available, initialize perf event. return newPerfEvent(fd, nil), nil } // tracefsProbe creates a trace event by writing an entry to /[k,u]probe_events. // A new trace event group name is generated on every call to support creating // multiple trace events for the same kernel or userspace symbol. // Path and offset are only set in the case of uprobe(s) and are used to set // the executable/library path on the filesystem and the offset where the probe is inserted. // A perf event is then opened on the newly-created trace event and returned to the caller. func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) { groupPrefix := "ebpf" if args.Group != "" { groupPrefix = args.Group } // Generate a random string for each trace event we attempt to create. // This value is used as the 'group' token in tracefs to allow creating // multiple kprobe trace events with the same name. group, err := tracefs.RandomGroup(groupPrefix) if err != nil { return nil, fmt.Errorf("randomizing group name: %w", err) } args.Group = group // Create the [k,u]probe trace event using tracefs. evt, err := tracefs.NewEvent(args) if err != nil { return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) } // Kprobes are ephemeral tracepoints and share the same perf event type. fd, err := openTracepointPerfEvent(evt.ID(), args.Pid) if err != nil { // Make sure we clean up the created tracefs event when we return error. // If a livepatch handler is already active on the symbol, the write to // tracefs will succeed, a trace event will show up, but creating the // perf event will fail with EBUSY. _ = evt.Close() return nil, err } return newPerfEvent(fd, evt), nil } ================================================ FILE: link/kprobe_multi.go ================================================ //go:build !windows package link import ( "errors" "fmt" "os" "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // KprobeMultiOptions defines additional parameters that will be used // when opening a KprobeMulti Link. type KprobeMultiOptions struct { // Symbols takes a list of kernel symbol names to attach an ebpf program to. // // Mutually exclusive with Addresses. Symbols []string // Addresses takes a list of kernel symbol addresses in case they can not // be referred to by name. // // Note that only start addresses can be specified, since the fprobe API // limits the attach point to the function entry or return. // // Mutually exclusive with Symbols. Addresses []uintptr // Cookies specifies arbitrary values that can be fetched from an eBPF // program via `bpf_get_attach_cookie()`. // // If set, its length should be equal to the length of Symbols or Addresses. // Each Cookie is assigned to the Symbol or Address specified at the // corresponding slice index. Cookies []uint64 // Session must be true when attaching Programs with the // [ebpf.AttachTraceKprobeSession] attach type. // // This makes a Kprobe execute on both function entry and return. The entry // program can share a cookie value with the return program and can decide // whether the return program gets executed. Session bool } // KprobeMulti attaches the given eBPF program to the entry point of a given set // of kernel symbols. // // The difference with Kprobe() is that multi-kprobe accomplishes this in a // single system call, making it significantly faster than attaching many // probes one at a time. // // Requires at least Linux 5.18. func KprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) { return kprobeMulti(prog, opts, 0) } // KretprobeMulti attaches the given eBPF program to the return point of a given // set of kernel symbols. // // The difference with Kretprobe() is that multi-kprobe accomplishes this in a // single system call, making it significantly faster than attaching many // probes one at a time. // // Requires at least Linux 5.18. func KretprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) { return kprobeMulti(prog, opts, sys.BPF_F_KPROBE_MULTI_RETURN) } func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Link, error) { if prog == nil { return nil, errors.New("cannot attach a nil program") } syms := uint32(len(opts.Symbols)) addrs := uint32(len(opts.Addresses)) cookies := uint32(len(opts.Cookies)) if syms == 0 && addrs == 0 { return nil, fmt.Errorf("one of Symbols or Addresses is required: %w", errInvalidInput) } if syms != 0 && addrs != 0 { return nil, fmt.Errorf("fields Symbols and Addresses are mutually exclusive: %w", errInvalidInput) } if cookies > 0 && cookies != syms && cookies != addrs { return nil, fmt.Errorf("field Cookies must be exactly Symbols or Addresses in length: %w", errInvalidInput) } attachType := sys.BPF_TRACE_KPROBE_MULTI if opts.Session { attachType = sys.BPF_TRACE_KPROBE_SESSION } attr := &sys.LinkCreateKprobeMultiAttr{ ProgFd: uint32(prog.FD()), AttachType: attachType, KprobeMultiFlags: flags, } switch { case syms != 0: attr.Count = syms attr.Syms = sys.NewStringSlicePointer(opts.Symbols) case addrs != 0: attr.Count = addrs attr.Addrs = sys.SlicePointer(opts.Addresses) } if cookies != 0 { attr.Cookies = sys.SlicePointer(opts.Cookies) } fd, err := sys.LinkCreateKprobeMulti(attr) if err == nil { return &kprobeMultiLink{RawLink{fd, ""}}, nil } if errors.Is(err, unix.ESRCH) { return nil, fmt.Errorf("couldn't find one or more symbols: %w", os.ErrNotExist) } if opts.Session { if haveFeatErr := features.HaveBPFLinkKprobeSession(); haveFeatErr != nil { return nil, haveFeatErr } } else { if haveFeatErr := features.HaveBPFLinkKprobeMulti(); haveFeatErr != nil { return nil, haveFeatErr } } // Check EINVAL after running feature probes, since it's also returned when // the kernel doesn't support the multi/session attach types. if errors.Is(err, unix.EINVAL) { return nil, fmt.Errorf("%w (missing kernel symbol or prog's AttachType not %s?)", err, ebpf.AttachType(attachType)) } return nil, err } type kprobeMultiLink struct { RawLink } var _ Link = (*kprobeMultiLink)(nil) func (kml *kprobeMultiLink) Update(_ *ebpf.Program) error { return fmt.Errorf("update kprobe_multi: %w", ErrNotSupported) } func (kml *kprobeMultiLink) Info() (*Info, error) { var info sys.KprobeMultiLinkInfo if err := sys.ObjInfo(kml.fd, &info); err != nil { return nil, fmt.Errorf("kprobe multi link info: %w", err) } var addrs = make([]uint64, info.Count) var cookies = make([]uint64, info.Count) info = sys.KprobeMultiLinkInfo{ Addrs: sys.SlicePointer(addrs), Cookies: sys.SlicePointer(cookies), Count: uint32(len(addrs)), } if err := sys.ObjInfo(kml.fd, &info); err != nil { return nil, fmt.Errorf("kprobe multi link info: %w", err) } if info.Addrs.IsNil() { addrs = nil } if info.Cookies.IsNil() { cookies = nil } extra := &KprobeMultiInfo{ Count: info.Count, Flags: info.Flags, Missed: info.Missed, addrs: addrs, cookies: cookies, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/kprobe_multi_test.go ================================================ //go:build !windows package link import ( "errors" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) var kprobeMultiSyms = []string{"vprintk", "inet6_release"} func TestKprobeMulti(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceKprobeMulti, "") km, err := KprobeMulti(prog, KprobeMultiOptions{Symbols: kprobeMultiSyms}) if err != nil { t.Fatal(err) } defer km.Close() testLink(t, km, prog) } func TestKprobeMultiInfo(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) testutils.SkipOnOldKernel(t, "6.6", "bpf_link_info_kprobe_multi") prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceKprobeMulti, "") km, err := KprobeMulti(prog, KprobeMultiOptions{Symbols: kprobeMultiSyms}) if err != nil { t.Fatal(err) } defer km.Close() info, err := km.Info() if err != nil { t.Fatal(err) } kmInfo := info.KprobeMulti() addresses, ok := kmInfo.Addresses() qt.Assert(t, qt.IsTrue(ok)) // kprobe_multi only returns addresses, no symbols, so we can't verify that the addresses are correct qt.Assert(t, qt.HasLen(addresses, len(kprobeMultiSyms))) } func TestKprobeMultiInput(t *testing.T) { // Program type that loads on all kernels. Not expected to link successfully. prog := mustLoadProgram(t, ebpf.SocketFilter, 0, "") // One of Symbols or Addresses must be given. _, err := KprobeMulti(prog, KprobeMultiOptions{}) if !errors.Is(err, errInvalidInput) { t.Fatalf("expected errInvalidInput, got: %v", err) } // Symbols and Addresses are mutually exclusive. _, err = KprobeMulti(prog, KprobeMultiOptions{ Symbols: []string{"foo"}, Addresses: []uintptr{1}, }) if !errors.Is(err, errInvalidInput) { t.Fatalf("expected errInvalidInput, got: %v", err) } // One Symbol, two cookies.. _, err = KprobeMulti(prog, KprobeMultiOptions{ Symbols: []string{"one"}, Cookies: []uint64{2, 3}, }) if !errors.Is(err, errInvalidInput) { t.Fatalf("expected errInvalidInput, got: %v", err) } } func TestKprobeMultiErrors(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceKprobeMulti, "") // Nonexistent kernel symbol. _, err := KprobeMulti(prog, KprobeMultiOptions{Symbols: []string{"bogus"}}) if !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.EINVAL) { t.Fatalf("expected ErrNotExist or EINVAL, got: %s", err) } // Only have a negative test for addresses as it would be hard to maintain a // proper one. _, err = KprobeMulti(prog, KprobeMultiOptions{ Addresses: []uintptr{^uintptr(0)}, }) if !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.EINVAL) { t.Fatalf("expected ErrNotExist or EINVAL, got: %s", err) } } func TestKprobeMultiCookie(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceKprobeMulti, "") km, err := KprobeMulti(prog, KprobeMultiOptions{ Symbols: kprobeMultiSyms, Cookies: []uint64{0, 1}, }) if err != nil { t.Fatal(err) } _ = km.Close() } func TestKprobeMultiProgramCall(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) m, p := newUpdaterMapProg(t, ebpf.Kprobe, ebpf.AttachTraceKprobeMulti) // Use actual syscall names with platform prefix. // For simplicity, just assert the increment happens with any symbol in the array. prefix := linux.PlatformPrefix() opts := KprobeMultiOptions{ Symbols: []string{prefix + "sys_getpid", prefix + "sys_gettid"}, } km, err := KprobeMulti(p, opts) if err != nil { t.Fatal(err) } // Trigger ebpf program call. unix.Getpid() unix.Gettid() // Assert that the value got incremented to at least 2, while allowing // for bigger values, because we could race with other getpid/gettid // callers. assertMapValueGE(t, m, 0, 2) // Close the link. if err := km.Close(); err != nil { t.Fatal(err) } // Reset map value to 0 at index 0. if err := m.Update(uint32(0), uint32(0), ebpf.UpdateExist); err != nil { t.Fatal(err) } // Retrigger the ebpf program call. unix.Getpid() unix.Gettid() // Assert that this time the value has not been updated. assertMapValue(t, m, 0, 0) } func TestKprobeSession(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeSession()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceKprobeSession, "") km, err := KprobeMulti(prog, KprobeMultiOptions{Symbols: kprobeMultiSyms, Session: true}) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer km.Close() testLink(t, km, prog) } ================================================ FILE: link/kprobe_test.go ================================================ //go:build !windows package link import ( "errors" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) // Global symbol, present on all tested kernels. var ksym = "vprintk" // Collection of various symbols present in all tested kernels. // Compiler optimizations result in different names for these symbols. var symTests = []string{ "echo_char.isra.0", // function optimized by -fipa-sra "proc_get_long.constprop.0", // optimized function with constant operands "unregister_kprobes.part.0", // function body that was split and partially inlined } func TestKprobe(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") for _, tt := range symTests { t.Run(tt, func(t *testing.T) { k, err := Kprobe(tt, prog, nil) if err != nil { t.Fatal(err) } defer k.Close() }) } k, err := Kprobe("bogus", prog, nil) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist), qt.Commentf("got error: %s", err)) if k != nil { k.Close() } k, err = Kprobe(ksym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer k.Close() testLink(t, k, prog) } func TestKprobeInfo(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "bpf_link_info_perf_event") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") k, err := Kprobe(ksym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer k.Close() info, err := k.Info() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(info.PerfEvent().Kprobe().Function, ksym)) qt.Assert(t, qt.Equals(info.PerfEvent().Kprobe().Offset, 0)) } func TestKprobeOffset(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") // The layout of a function is compiler and arch dependent, so we try to // find a valid attach target in the first few bytes of the function. for i := uint64(1); i < 16; i++ { k, err := Kprobe("inet6_release", prog, &KprobeOptions{Offset: i}) if err != nil { continue } k.Close() return } t.Fatal("Can't attach with non-zero offset") } func TestKretprobeMaxActive(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") defer prog.Close() _, err := Kprobe("do_sys_open", prog, &KprobeOptions{RetprobeMaxActive: 4096}) if !errors.Is(err, tracefs.ErrInvalidMaxActive) { t.Fatal("Expected ErrInvalidMaxActive, got", err) } k, err := Kretprobe("__put_task_struct", prog, &KprobeOptions{RetprobeMaxActive: 4096}) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Kretprobe with maxactive returned an error:", err) } if err := k.Close(); err != nil { t.Fatal("Closing kretprobe:", err) } } func TestKretprobe(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") for _, tt := range symTests { t.Run(tt, func(t *testing.T) { k, err := Kretprobe(tt, prog, nil) if err != nil { t.Fatal(err) } defer k.Close() }) } k, err := Kretprobe("bogus", prog, nil) if !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.EINVAL) { t.Fatal(err) } if k != nil { k.Close() } k, err = Kretprobe(ksym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer k.Close() testLink(t, k, prog) } func TestKprobeErrors(t *testing.T) { // Invalid Kprobe incantations. Kretprobe uses the same code paths // with a different ret flag. _, err := Kprobe("", nil, nil) // empty symbol qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Kprobe("_", nil, nil) // empty prog qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Kprobe(".", &ebpf.Program{}, nil) // illegal chars in symbol qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Kprobe("foo", &ebpf.Program{}, nil) // wrong prog type qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) } // Test k(ret)probe creation using perf_kprobe PMU. func TestKprobeCreatePMU(t *testing.T) { // Requires at least 4.17 (e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU") testutils.SkipOnOldKernel(t, "4.17", "perf_kprobe PMU") // kprobe happy path. printk is always present. pk, err := pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) qt.Assert(t, qt.IsNil(err)) defer pk.Close() // kretprobe happy path. pr, err := pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym, Ret: true}) qt.Assert(t, qt.IsNil(err)) defer pr.Close() // Expect os.ErrNotExist when specifying a non-existent kernel symbol // on kernels 4.17 and up. _, err = pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: "bogus"}) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist), qt.Commentf("got error: %s", err)) // A kernel bug was fixed in 97c753e62e6c where EINVAL was returned instead // of ENOENT, but only for kretprobes. _, err = pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: "bogus", Ret: true}) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist), qt.Commentf("got error: %s", err)) } // Test fallback behaviour on kernels without perf_kprobe PMU available. func TestKprobePMUUnavailable(t *testing.T) { pk, err := pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) if err == nil { pk.Close() t.Skipf("Kernel supports perf_kprobe PMU, not asserting error.") } // Only allow a PMU creation with a valid kernel symbol to fail with ErrNotSupported. qt.Assert(t, qt.ErrorIs(err, ErrNotSupported), qt.Commentf("got error: %s", err)) } func BenchmarkKprobeCreatePMU(b *testing.B) { for b.Loop() { pr, err := pmuProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) if err != nil { b.Error("error creating perf_kprobe PMU:", err) } if err := pr.Close(); err != nil { b.Error("error closing perf_kprobe PMU:", err) } } } // Test tracefs k(ret)probe creation on all kernel versions. func TestKprobeTraceFS(t *testing.T) { // Open and close tracefs k(ret)probes, checking all errors. kp, err := tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(kp.Close())) kp, err = tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym, Ret: true}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(kp.Close())) // Create two identical trace events, ensure their IDs differ. k1, err := tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) qt.Assert(t, qt.IsNil(err)) defer k1.Close() qt.Assert(t, qt.IsNotNil(k1.tracefsEvent)) k2, err := tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym}) qt.Assert(t, qt.IsNil(err)) defer k2.Close() qt.Assert(t, qt.IsNotNil(k2.tracefsEvent)) // Compare the kprobes' tracefs IDs. qt.Assert(t, qt.Not(qt.Equals(k1.tracefsEvent.ID(), k2.tracefsEvent.ID()))) // Expect an error when supplying an invalid custom group name _, err = tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym, Group: "/"}) qt.Assert(t, qt.Not(qt.IsNil(err))) cg := "customgroup" k3, err := tracefsProbe(tracefs.ProbeArgs{Type: tracefs.Kprobe, Symbol: ksym, Group: cg}) qt.Assert(t, qt.IsNil(err)) defer k3.Close() qt.Assert(t, qt.Matches(k3.tracefsEvent.Group(), `customgroup_[a-f0-9]{16}`)) // Prepare probe args. args := tracefs.ProbeArgs{Type: tracefs.Kprobe, Group: "testgroup", Symbol: "symbol"} // Write a k(ret)probe event for a non-existing symbol. _, err = tracefs.NewEvent(args) // A kernel bug was introduced in 9d8616034f16 that causes EINVAL to be returned // instead of ENOENT when trying to attach kprobes to non-existing symbols. qt.Assert(t, qt.IsTrue(errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL)), qt.Commentf("got error: %s", err)) // A kernel bug was fixed in 97c753e62e6c where EINVAL was returned instead // of ENOENT, but only for kretprobes. args.Ret = true _, err = tracefs.NewEvent(args) qt.Assert(t, qt.IsTrue(errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL)), qt.Commentf("got error: %s", err)) } func BenchmarkKprobeCreateTraceFS(b *testing.B) { for b.Loop() { // Include /kprobe_events operations in the benchmark loop // because we create one per perf event. pr, err := tracefsProbe(tracefs.ProbeArgs{Symbol: ksym}) if err != nil { b.Error("error creating tracefs perf event:", err) } if err := pr.Close(); err != nil { b.Error("error closing tracefs perf event:", err) } } } func TestKprobeProgramCall(t *testing.T) { m, p := newUpdaterMapProg(t, ebpf.Kprobe, 0) // Open Kprobe on `sys_getpid` and attach it // to the ebpf program created above. k, err := Kprobe("sys_getpid", p, nil) if err != nil { t.Fatal(err) } // Trigger ebpf program call. unix.Getpid() // Assert that the value got incremented to at least 1, while allowing // for bigger values, because we could race with other getpid callers. assertMapValueGE(t, m, 0, 1) // Detach the Kprobe. if err := k.Close(); err != nil { t.Fatal(err) } // Reset map value to 0 at index 0. if err := m.Update(uint32(0), uint32(0), ebpf.UpdateExist); err != nil { t.Fatal(err) } // Retrigger the ebpf program call. unix.Getpid() // Assert that this time the value has not been updated. assertMapValue(t, m, 0, 0) } func newUpdaterMapProg(t *testing.T, typ ebpf.ProgramType, attach ebpf.AttachType) (*ebpf.Map, *ebpf.Program) { // Create ebpf map. Will contain only one key with initial value 0. m, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }) if err != nil { t.Fatal(err) } // Create ebpf program. When called, will increase the value of key 0 by 1 // in the map created above. p, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: typ, Instructions: asm.Instructions{ // R1 map asm.LoadMapPtr(asm.R1, m.FD()), // R2 key asm.Mov.Reg(asm.R2, asm.R10), asm.Add.Imm(asm.R2, -4), asm.StoreImm(asm.R2, 0, 0, asm.Word), // Lookup map[0] asm.FnMapLookupElem.Call(), asm.JEq.Imm(asm.R0, 0, "ret"), // u32 val = R0++ asm.LoadMem(asm.R1, asm.R0, 0, asm.Word), asm.Add.Imm(asm.R1, 1), asm.StoreMem(asm.RFP, -8, asm.R1, asm.Word), // u32 key = 0 asm.Mov.Imm(asm.R1, 0), asm.StoreMem(asm.RFP, -4, asm.R1, asm.Word), // bpf_map_update_elem(...) asm.Mov.Reg(asm.R2, asm.RFP), asm.Add.Imm(asm.R2, -4), asm.Mov.Reg(asm.R3, asm.RFP), asm.Add.Imm(asm.R3, -8), asm.LoadMapPtr(asm.R1, m.FD()), asm.Mov.Imm(asm.R4, 0), asm.FnMapUpdateElem.Call(), // exit 0 asm.Mov.Imm(asm.R0, 0), asm.Return().WithSymbol("ret"), }, AttachType: attach, License: "Dual MIT/GPL", }) if err != nil { t.Fatal(err) } // Close the program and map on test teardown. t.Cleanup(func() { m.Close() p.Close() }) return m, p } func assertMapValue(t *testing.T, m *ebpf.Map, k, v uint32) { var val uint32 if err := m.Lookup(k, &val); err != nil { t.Fatal(err) } if val != v { t.Fatalf("unexpected value: want '%d', got '%d'", v, val) } } func assertMapValueGE(t *testing.T, m *ebpf.Map, k, v uint32) { var val uint32 if err := m.Lookup(k, &val); err != nil { t.Fatal(err) } if val < v { t.Fatalf("unexpected value: want >= '%d', got '%d'", v, val) } } func TestKprobeCookie(t *testing.T) { testutils.SkipOnOldKernel(t, "5.15", "bpf_perf_link") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") k, err := Kprobe(ksym, prog, &KprobeOptions{Cookie: 1000}) if err != nil { t.Fatal(err) } k.Close() } ================================================ FILE: link/link.go ================================================ package link import ( "errors" "fmt" "os" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // Type is the kind of link. type Type = sys.LinkType var ErrNotSupported = internal.ErrNotSupported // Link represents a Program attached to a BPF hook. type Link interface { // Replace the current program with a new program. // // Passing a nil program is an error. May return an error wrapping ErrNotSupported. Update(*ebpf.Program) error // Persist a link by pinning it into a bpffs. // // May return an error wrapping ErrNotSupported. Pin(string) error // Undo a previous call to Pin. // // May return an error wrapping ErrNotSupported. Unpin() error // Close frees resources. // // The link will be broken unless it has been successfully pinned. // A link may continue past the lifetime of the process if Close is // not called. Close() error // Detach the link from its corresponding attachment point. // // May return an error wrapping ErrNotSupported. Detach() error // Info returns metadata on a link. // // May return an error wrapping ErrNotSupported. Info() (*Info, error) // Prevent external users from implementing this interface. isLink() } // NewFromFD creates a link from a raw fd. // // You should not use fd after calling this function. func NewFromFD(fd int) (Link, error) { sysFD, err := sys.NewFD(fd) if err != nil { return nil, err } return wrapRawLink(&RawLink{fd: sysFD}) } // NewFromID returns the link associated with the given id. // // Returns ErrNotExist if there is no link with the given id. func NewFromID(id ID) (Link, error) { getFdAttr := &sys.LinkGetFdByIdAttr{Id: id} fd, err := sys.LinkGetFdById(getFdAttr) if err != nil { return nil, fmt.Errorf("get link fd from ID %d: %w", id, err) } return wrapRawLink(&RawLink{fd, ""}) } // LoadPinnedLink loads a Link from a pin (file) on the BPF virtual filesystem. // // Requires at least Linux 5.7. func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) { raw, err := loadPinnedRawLink(fileName, opts) if err != nil { return nil, err } return wrapRawLink(raw) } // ID uniquely identifies a BPF link. type ID = sys.LinkID // RawLinkOptions control the creation of a raw link. type RawLinkOptions struct { // File descriptor to attach to. This differs for each attach type. Target int // Program to attach. Program *ebpf.Program // Attach must match the attach type of Program. Attach ebpf.AttachType // BTF is the BTF of the attachment target. BTF btf.TypeID // Flags control the attach behaviour. Flags uint32 } // Info contains metadata on a link. type Info struct { Type Type ID ID Program ebpf.ProgramID extra interface{} } // RawLink is the low-level API to bpf_link. // // You should consider using the higher level interfaces in this // package instead. type RawLink struct { fd *sys.FD pinnedPath string } func loadPinnedRawLink(fileName string, opts *ebpf.LoadPinOptions) (*RawLink, error) { fd, typ, err := sys.ObjGetTyped(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(fileName), FileFlags: opts.Marshal(), }) if err != nil { return nil, fmt.Errorf("load pinned link: %w", err) } if typ != sys.BPF_TYPE_LINK { _ = fd.Close() return nil, fmt.Errorf("%s is not a Link", fileName) } return &RawLink{fd, fileName}, nil } func (l *RawLink) isLink() {} // FD returns the raw file descriptor. func (l *RawLink) FD() int { return l.fd.Int() } // Close breaks the link. // // Use Pin if you want to make the link persistent. func (l *RawLink) Close() error { return l.fd.Close() } // Pin persists a link past the lifetime of the process. // // Calling Close on a pinned Link will not break the link // until the pin is removed. func (l *RawLink) Pin(fileName string) error { if err := sys.Pin(l.pinnedPath, fileName, l.fd); err != nil { return err } l.pinnedPath = fileName return nil } // Unpin implements the Link interface. func (l *RawLink) Unpin() error { if err := sys.Unpin(l.pinnedPath); err != nil { return err } l.pinnedPath = "" return nil } // IsPinned returns true if the Link has a non-empty pinned path. func (l *RawLink) IsPinned() bool { return l.pinnedPath != "" } // Update implements the Link interface. func (l *RawLink) Update(new *ebpf.Program) error { return l.UpdateArgs(RawLinkUpdateOptions{ New: new, }) } // RawLinkUpdateOptions control the behaviour of RawLink.UpdateArgs. type RawLinkUpdateOptions struct { New *ebpf.Program Old *ebpf.Program Flags uint32 } // UpdateArgs updates a link based on args. func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error { newFd := opts.New.FD() if newFd < 0 { return fmt.Errorf("invalid program: %s", sys.ErrClosedFd) } var oldFd int if opts.Old != nil { oldFd = opts.Old.FD() if oldFd < 0 { return fmt.Errorf("invalid replacement program: %s", sys.ErrClosedFd) } } attr := sys.LinkUpdateAttr{ LinkFd: l.fd.Uint(), NewProgFd: uint32(newFd), OldProgFd: uint32(oldFd), Flags: opts.Flags, } if err := sys.LinkUpdate(&attr); err != nil { return fmt.Errorf("update link: %w", err) } return nil } // Detach the link from its corresponding attachment point. func (l *RawLink) Detach() error { attr := sys.LinkDetachAttr{ LinkFd: l.fd.Uint(), } err := sys.LinkDetach(&attr) switch { case errors.Is(err, unix.EOPNOTSUPP): return internal.ErrNotSupported case err != nil: return fmt.Errorf("detach link: %w", err) default: return nil } } // Info returns metadata about the link. // // Linktype specific metadata is not included and can be retrieved // via the linktype specific Info() method. func (l *RawLink) Info() (*Info, error) { var info sys.LinkInfo if err := sys.ObjInfo(l.fd, &info); err != nil { return nil, fmt.Errorf("link info: %s", err) } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), nil, }, nil } // Iterator allows iterating over links attached into the kernel. type Iterator struct { // The ID of the current link. Only valid after a call to Next ID ID // The current link. Only valid until a call to Next. // See Take if you want to retain the link. Link Link err error } // Next retrieves the next link. // // Returns true if another link was found. Call [Iterator.Err] after the function returns false. func (it *Iterator) Next() bool { id := it.ID for { getIdAttr := &sys.LinkGetNextIdAttr{Id: id} err := sys.LinkGetNextId(getIdAttr) if errors.Is(err, os.ErrNotExist) { // There are no more links. break } else if err != nil { it.err = fmt.Errorf("get next link ID: %w", err) break } id = getIdAttr.NextId l, err := NewFromID(id) if errors.Is(err, os.ErrNotExist) { // Couldn't load the link fast enough. Try next ID. continue } else if err != nil { it.err = fmt.Errorf("get link for ID %d: %w", id, err) break } if it.Link != nil { it.Link.Close() } it.ID, it.Link = id, l return true } // No more links or we encountered an error. if it.Link != nil { it.Link.Close() } it.Link = nil return false } // Take the ownership of the current link. // // It's the callers responsibility to close the link. func (it *Iterator) Take() Link { l := it.Link it.Link = nil return l } // Err returns an error if iteration failed for some reason. func (it *Iterator) Err() error { return it.err } func (it *Iterator) Close() { if it.Link != nil { it.Link.Close() } } ================================================ FILE: link/link_other.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) // Valid link types. const ( UnspecifiedType = sys.BPF_LINK_TYPE_UNSPEC RawTracepointType = sys.BPF_LINK_TYPE_RAW_TRACEPOINT TracingType = sys.BPF_LINK_TYPE_TRACING CgroupType = sys.BPF_LINK_TYPE_CGROUP IterType = sys.BPF_LINK_TYPE_ITER NetNsType = sys.BPF_LINK_TYPE_NETNS XDPType = sys.BPF_LINK_TYPE_XDP PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT KprobeMultiType = sys.BPF_LINK_TYPE_KPROBE_MULTI TCXType = sys.BPF_LINK_TYPE_TCX UprobeMultiType = sys.BPF_LINK_TYPE_UPROBE_MULTI NetfilterType = sys.BPF_LINK_TYPE_NETFILTER NetkitType = sys.BPF_LINK_TYPE_NETKIT StructOpsType = sys.BPF_LINK_TYPE_STRUCT_OPS ) // AttachRawLink creates a raw link. func AttachRawLink(opts RawLinkOptions) (*RawLink, error) { if err := haveBPFLink(); err != nil { return nil, err } if opts.Target < 0 { return nil, fmt.Errorf("invalid target: %s", sys.ErrClosedFd) } progFd := opts.Program.FD() if progFd < 0 { return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) } p, attachType := platform.DecodeConstant(opts.Attach) if p != platform.Linux { return nil, fmt.Errorf("attach type %s: %w", opts.Attach, internal.ErrNotSupportedOnOS) } attr := sys.LinkCreateAttr{ TargetFd: uint32(opts.Target), ProgFd: uint32(progFd), AttachType: sys.AttachType(attachType), TargetBtfId: opts.BTF, Flags: opts.Flags, } fd, err := sys.LinkCreate(&attr) if err != nil { return nil, fmt.Errorf("create link: %w", err) } return &RawLink{fd, ""}, nil } // wrap a RawLink in a more specific type if possible. // // The function takes ownership of raw and closes it on error. func wrapRawLink(raw *RawLink) (_ Link, err error) { defer func() { if err != nil { raw.Close() } }() info, err := raw.Info() if err != nil { return nil, err } switch info.Type { case RawTracepointType: return &rawTracepoint{*raw}, nil case TracingType: return &tracing{*raw}, nil case CgroupType: return &linkCgroup{*raw}, nil case IterType: return &Iter{*raw}, nil case NetNsType: return &NetNsLink{*raw}, nil case KprobeMultiType: return &kprobeMultiLink{*raw}, nil case UprobeMultiType: return &uprobeMultiLink{*raw}, nil case PerfEventType: return &perfEventLink{*raw, nil}, nil case TCXType: return &tcxLink{*raw}, nil case NetfilterType: return &netfilterLink{*raw}, nil case NetkitType: return &netkitLink{*raw}, nil case XDPType: return &xdpLink{*raw}, nil case StructOpsType: return &structOpsLink{*raw}, nil default: return raw, nil } } type TracingInfo struct { AttachType sys.AttachType TargetObjectId uint32 TargetBtfId sys.TypeID } type CgroupInfo struct { CgroupId uint64 AttachType sys.AttachType _ [4]byte } type NetNsInfo struct { NetnsInode uint32 AttachType sys.AttachType } type TCXInfo struct { Ifindex uint32 AttachType sys.AttachType } type XDPInfo struct { Ifindex uint32 } type NetfilterInfo struct { ProtocolFamily NetfilterProtocolFamily Hook NetfilterInetHook Priority int32 Flags uint32 } type NetkitInfo struct { Ifindex uint32 AttachType sys.AttachType } type RawTracepointInfo struct { Name string } type KprobeMultiInfo struct { // Count is the number of addresses hooked by the kprobe. Count uint32 Flags uint32 Missed uint64 addrs []uint64 cookies []uint64 } type KprobeMultiAddress struct { Address uint64 Cookie uint64 } // Addresses are the addresses hooked by the kprobe. func (kpm *KprobeMultiInfo) Addresses() ([]KprobeMultiAddress, bool) { if kpm.addrs == nil || len(kpm.addrs) != len(kpm.cookies) { return nil, false } addrs := make([]KprobeMultiAddress, len(kpm.addrs)) for i := range kpm.addrs { addrs[i] = KprobeMultiAddress{ Address: kpm.addrs[i], Cookie: kpm.cookies[i], } } return addrs, true } type UprobeMultiInfo struct { Count uint32 Flags uint32 Missed uint64 offsets []uint64 cookies []uint64 refCtrOffsets []uint64 // File is the path that the file the uprobe was attached to // had at creation time. // // However, due to various circumstances (differing mount namespaces, // file replacement, ...), this path may not point to the same binary // the uprobe was originally attached to. File string pid uint32 } type UprobeMultiOffset struct { Offset uint64 Cookie uint64 ReferenceCount uint64 } // Offsets returns the offsets that the uprobe was attached to along with the related cookies and ref counters. func (umi *UprobeMultiInfo) Offsets() ([]UprobeMultiOffset, bool) { if umi.offsets == nil || len(umi.cookies) != len(umi.offsets) || len(umi.refCtrOffsets) != len(umi.offsets) { return nil, false } var adresses = make([]UprobeMultiOffset, len(umi.offsets)) for i := range umi.offsets { adresses[i] = UprobeMultiOffset{ Offset: umi.offsets[i], Cookie: umi.cookies[i], ReferenceCount: umi.refCtrOffsets[i], } } return adresses, true } // Pid returns the process ID that this uprobe is attached to. // // If it does not exist, the uprobe will trigger for all processes. func (umi *UprobeMultiInfo) Pid() (uint32, bool) { return umi.pid, umi.pid > 0 } const ( PerfEventUnspecified = sys.BPF_PERF_EVENT_UNSPEC PerfEventUprobe = sys.BPF_PERF_EVENT_UPROBE PerfEventUretprobe = sys.BPF_PERF_EVENT_URETPROBE PerfEventKprobe = sys.BPF_PERF_EVENT_KPROBE PerfEventKretprobe = sys.BPF_PERF_EVENT_KRETPROBE PerfEventTracepoint = sys.BPF_PERF_EVENT_TRACEPOINT PerfEventEvent = sys.BPF_PERF_EVENT_EVENT ) type PerfEventInfo struct { Type sys.PerfEventType extra interface{} } func (r *PerfEventInfo) Kprobe() *KprobeInfo { e, _ := r.extra.(*KprobeInfo) return e } func (r *PerfEventInfo) Uprobe() *UprobeInfo { e, _ := r.extra.(*UprobeInfo) return e } func (r *PerfEventInfo) Tracepoint() *TracepointInfo { e, _ := r.extra.(*TracepointInfo) return e } func (r *PerfEventInfo) Event() *EventInfo { e, _ := r.extra.(*EventInfo) return e } type KprobeInfo struct { Address uint64 Missed uint64 Function string Offset uint32 } type UprobeInfo struct { // File is the path that the file the uprobe was attached to // had at creation time. // // However, due to various circumstances (differing mount namespaces, // file replacement, ...), this path may not point to the same binary // the uprobe was originally attached to. File string Offset uint32 Cookie uint64 OffsetReferenceCount uint64 } type TracepointInfo struct { Tracepoint string Cookie uint64 } type EventInfo struct { Config uint64 Type uint32 Cookie uint64 } // Tracing returns tracing type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) Tracing() *TracingInfo { e, _ := r.extra.(*TracingInfo) return e } // Cgroup returns cgroup type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) Cgroup() *CgroupInfo { e, _ := r.extra.(*CgroupInfo) return e } // NetNs returns netns type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) NetNs() *NetNsInfo { e, _ := r.extra.(*NetNsInfo) return e } // XDP returns XDP type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) XDP() *XDPInfo { e, _ := r.extra.(*XDPInfo) return e } // TCX returns TCX type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) TCX() *TCXInfo { e, _ := r.extra.(*TCXInfo) return e } // Netfilter returns netfilter type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) Netfilter() *NetfilterInfo { e, _ := r.extra.(*NetfilterInfo) return e } // Netkit returns netkit type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) Netkit() *NetkitInfo { e, _ := r.extra.(*NetkitInfo) return e } // KprobeMulti returns kprobe-multi type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) KprobeMulti() *KprobeMultiInfo { e, _ := r.extra.(*KprobeMultiInfo) return e } // UprobeMulti returns uprobe-multi type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) UprobeMulti() *UprobeMultiInfo { e, _ := r.extra.(*UprobeMultiInfo) return e } // PerfEvent returns perf-event type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) PerfEvent() *PerfEventInfo { e, _ := r.extra.(*PerfEventInfo) return e } // RawTracepoint returns raw-tracepoint type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) RawTracepoint() *RawTracepointInfo { e, _ := r.extra.(*RawTracepointInfo) return e } ================================================ FILE: link/link_other_test.go ================================================ //go:build !windows package link import ( "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) func testLinkArch(t *testing.T, link Link) { t.Run("link/info", func(t *testing.T) { info, err := link.Info() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Link info returns an error:", err) } if info.Type == 0 { t.Fatal("Failed to get link info type") } switch link.(type) { case *tracing: if info.Tracing() == nil { t.Fatalf("Failed to get link tracing extra info") } case *linkCgroup: cg := info.Cgroup() if cg.CgroupId == 0 { t.Fatalf("Failed to get link Cgroup extra info") } case *NetNsLink: netns := info.NetNs() if netns.AttachType == 0 { t.Fatalf("Failed to get link NetNs extra info") } case *xdpLink: xdp := info.XDP() if xdp.Ifindex == 0 { t.Fatalf("Failed to get link XDP extra info") } case *tcxLink: tcx := info.TCX() if tcx.Ifindex == 0 { t.Fatalf("Failed to get link TCX extra info") } case *netfilterLink: nf := info.Netfilter() if nf.Priority == 0 { t.Fatalf("Failed to get link Netfilter extra info") } case *kprobeMultiLink: // test default Info data kmulti := info.KprobeMulti() // kprobe multi link info is supported since kernel 6.6 testutils.SkipOnOldKernel(t, "6.6", "bpf_kprobe_multi_link_fill_link_info") qt.Assert(t, qt.Not(qt.Equals(kmulti.Count, 0))) // NB: We don't check that missed is actually correct // since it's not easy to trigger from tests. case *perfEventLink: // test default Info data pevent := info.PerfEvent() switch pevent.Type { case sys.BPF_PERF_EVENT_KPROBE, sys.BPF_PERF_EVENT_KRETPROBE: _ = pevent.Kprobe() // NB: We don't check that missed is actually correct // since it's not easy to trigger from tests. // Nor do we check the address (since we don't know it here). } } }) } func newRawLink(t *testing.T) (*RawLink, *ebpf.Program) { t.Helper() cgroup, prog := mustCgroupFixtures(t) link, err := AttachRawLink(RawLinkOptions{ Target: int(cgroup.Fd()), Program: prog, Attach: ebpf.AttachCGroupInetEgress, }) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create raw link:", err) } t.Cleanup(func() { link.Close() }) return link, prog } func mustCgroupFixtures(t *testing.T) (*os.File, *ebpf.Program) { t.Helper() testutils.SkipIfNotSupported(t, haveProgAttach()) return testutils.CreateCgroup(t), mustLoadProgram(t, ebpf.CGroupSKB, 0, "") } func mustLoadProgram(tb testing.TB, typ ebpf.ProgramType, attachType ebpf.AttachType, attachTo string) *ebpf.Program { tb.Helper() license := "MIT" switch typ { case ebpf.RawTracepoint, ebpf.LSM: license = "GPL" } prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: typ, AttachType: attachType, AttachTo: attachTo, License: license, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { tb.Fatal(err) } tb.Cleanup(func() { prog.Close() }) return prog } func TestDetachLinkFail(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") defer prog.Close() uprobeLink, err := bashEx.Uprobe(bashSym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer uprobeLink.Close() err = uprobeLink.Detach() qt.Assert(t, qt.ErrorIs(err, ErrNotSupported), qt.Commentf("got error: %s", err)) } ================================================ FILE: link/link_test.go ================================================ package link import ( "errors" "math" "path/filepath" "reflect" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" "github.com/cilium/ebpf/internal/unix" ) func TestMain(m *testing.M) { testmain.Run(m) } func TestRawLink(t *testing.T) { link, prog := newRawLink(t) info, err := link.Info() if err != nil { t.Fatal("Can't get link info:", err) } pi, err := prog.Info() if err != nil { t.Fatal("Can't get program info:", err) } progID, ok := pi.ID() if !ok { t.Fatal("Program ID not available in program info") } if info.Program != progID { t.Error("Link program ID doesn't match program ID") } testLink(t, link, prog) } func TestUnpinRawLink(t *testing.T) { link, _ := newPinnedRawLink(t) qt.Assert(t, qt.IsTrue(link.IsPinned())) if err := link.Unpin(); err != nil { t.Fatal(err) } qt.Assert(t, qt.IsFalse(link.IsPinned())) } func TestDetachRawLink(t *testing.T) { link, _ := newRawLink(t) if err := link.Detach(); err != nil { t.Fatal(err) } } func TestRawLinkLoadPinnedWithOptions(t *testing.T) { link, path := newPinnedRawLink(t) defer link.Close() qt.Assert(t, qt.IsTrue(link.IsPinned())) // It seems like the kernel ignores BPF_F_RDONLY when updating a link, // so we can't test this. _, err := loadPinnedRawLink(path, &ebpf.LoadPinOptions{ Flags: math.MaxUint32, }) if !errors.Is(err, unix.EINVAL) { t.Fatal("Invalid flags don't trigger an error:", err) } } func TestIterator(t *testing.T) { tLink, _ := newPinnedRawLink(t) tLinkInfo, err := tLink.Info() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't get original link info:", err) } it := new(Iterator) defer it.Close() prev := it.ID var foundLink Link for it.Next() { // Iterate all loaded links. if it.Link == nil { t.Fatal("Next doesn't assign link") } if it.ID == prev { t.Fatal("Iterator doesn't advance ID") } prev = it.ID if it.ID == tLinkInfo.ID { foundLink = it.Take() } } if err := it.Err(); err != nil { t.Fatal("Iteration returned an error:", err) } if it.Link != nil { t.Fatal("Next doesn't clean up link on last iteration") } if prev != it.ID { t.Fatal("Next changes ID on last iteration") } if foundLink == nil { t.Fatal("Original link not found") } defer foundLink.Close() // Confirm that we found the original link. info, err := foundLink.Info() if err != nil { t.Fatal("Can't get link info:", err) } if info.ID != tLinkInfo.ID { t.Fatal("Found link has wrong ID") } } func newPinnedRawLink(t *testing.T) (*RawLink, string) { t.Helper() link, _ := newRawLink(t) path := filepath.Join(testutils.TempBPFFS(t), "link") err := link.Pin(path) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) return link, path } func testLink(t *testing.T, link Link, prog *ebpf.Program) { t.Helper() tmp := testutils.TempBPFFS(t) _, isRawLink := link.(*RawLink) t.Run("link/pinning", func(t *testing.T) { path := filepath.Join(tmp, "link") err := link.Pin(path) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("Can't pin %T: %s", link, err) } link2, err := LoadPinnedLink(path, nil) if err != nil { t.Fatalf("Can't load pinned %T: %s", link, err) } link2.Close() if !isRawLink && reflect.TypeOf(link) != reflect.TypeOf(link2) { t.Errorf("Loading a pinned %T returns a %T", link, link2) } _, err = LoadPinnedLink(path, &ebpf.LoadPinOptions{ Flags: math.MaxUint32, }) if !errors.Is(err, unix.EINVAL) { t.Errorf("Loading a pinned %T doesn't respect flags", link) } }) t.Run("link/update", func(t *testing.T) { err := link.Update(prog) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Update returns an error:", err) } func() { // Panicking is OK defer func() { _ = recover() }() if err := link.Update(nil); err == nil { t.Fatalf("%T.Update accepts nil program", link) } }() }) testLinkArch(t, link) type FDer interface { FD() int } t.Run("from fd", func(t *testing.T) { fder, ok := link.(FDer) if !ok { t.Skip("Link doesn't allow retrieving FD") } // We need to dup the FD since NewLinkFromFD takes // ownership. dupFD := testutils.DupFD(t, fder.FD()) newLink, err := NewFromFD(dupFD) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create new link from dup link FD:", err) } defer newLink.Close() if !isRawLink && reflect.TypeOf(newLink) != reflect.TypeOf(link) { t.Fatalf("Expected type %T, got %T", link, newLink) } }) if err := link.Close(); err != nil { t.Fatalf("%T.Close returns an error: %s", link, err) } } func TestLoadWrongPin(t *testing.T) { l, p := newRawLink(t) tmp := testutils.TempBPFFS(t) ppath := filepath.Join(tmp, "prog") lpath := filepath.Join(tmp, "link") qt.Assert(t, qt.IsNil(p.Pin(ppath))) qt.Assert(t, qt.IsNil(l.Pin(lpath))) _, err := LoadPinnedLink(ppath, nil) qt.Assert(t, qt.IsNotNil(err)) ll, err := LoadPinnedLink(lpath, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(ll.Close())) } ================================================ FILE: link/link_windows.go ================================================ package link import ( "fmt" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/efw" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) // AttachRawLink creates a raw link. func AttachRawLink(opts RawLinkOptions) (*RawLink, error) { if opts.Target != 0 || opts.BTF != 0 || opts.Flags != 0 { return nil, fmt.Errorf("specified option(s) %w", internal.ErrNotSupportedOnOS) } plat, attachType := platform.DecodeConstant(opts.Attach) if plat != platform.Windows { return nil, fmt.Errorf("attach type %s: %w", opts.Attach, internal.ErrNotSupportedOnOS) } attachTypeGUID, err := efw.EbpfGetEbpfAttachType(attachType) if err != nil { return nil, fmt.Errorf("get attach type: %w", err) } progFd := opts.Program.FD() if progFd < 0 { return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) } raw, err := efw.EbpfProgramAttachFds(progFd, attachTypeGUID, nil, 0) if err != nil { return nil, fmt.Errorf("attach link: %w", err) } fd, err := sys.NewFD(int(raw)) if err != nil { return nil, err } return &RawLink{fd: fd}, nil } func wrapRawLink(raw *RawLink) (Link, error) { return raw, nil } ================================================ FILE: link/link_windows_test.go ================================================ package link import ( "os/exec" "testing" "github.com/go-quicktest/qt" "golang.org/x/sys/windows" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" ) // ntosebpfext has not yet assigned a stable enum value so we can't refer to // it via that (https://github.com/microsoft/ntosebpfext/issues/152). // // See https://github.com/microsoft/ntosebpfext/blob/75ceaac38a0254e44f3219852d79a336d10ad9f3/include/ebpf_ntos_program_attach_type_guids.h var ( programTypeProcessGUID = makeGUID(0x22ea7b37, 0x1043, 0x4d0d, [8]byte{0xb6, 0x0d, 0xca, 0xfa, 0x1c, 0x7b, 0x63, 0x8e}) attachTypeProcessGUID = makeGUID(0x66e20687, 0x9805, 0x4458, [8]byte{0xa0, 0xdb, 0x38, 0xe2, 0x20, 0xd3, 0x16, 0x85}) ) func testLinkArch(t *testing.T, link Link) {} func newRawLink(t *testing.T) (*RawLink, *ebpf.Program) { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.WindowsBind, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, License: "MIT", }) qt.Assert(t, qt.IsNil(err)) t.Cleanup(func() { prog.Close() }) link, err := AttachRawLink(RawLinkOptions{ Program: prog, Attach: ebpf.AttachWindowsBind, }) qt.Assert(t, qt.IsNil(err)) t.Cleanup(func() { link.Close() }) return link, prog } func TestProcessLink(t *testing.T) { array, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.WindowsArray, Name: "process_state", KeySize: 4, ValueSize: 4, MaxEntries: 1, }) qt.Assert(t, qt.IsNil(err)) defer array.Close() prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: windowsProgramTypeForGUID(t, programTypeProcessGUID), Name: "process_test", Instructions: asm.Instructions{ // R1 = map asm.LoadMapPtr(asm.R1, array.FD()), // R2 = key asm.Mov.Reg(asm.R2, asm.R10), asm.Add.Imm(asm.R2, -4), asm.StoreImm(asm.R2, 0, 0, asm.Word), // R3 = value asm.Mov.Reg(asm.R3, asm.R2), asm.Add.Imm(asm.R3, -4), asm.StoreImm(asm.R3, 0, 1, asm.Word), // R4 = flags asm.Mov.Imm(asm.R4, 0), // bpf_map_update_elem(map, key, value, flags) asm.WindowsFnMapUpdateElem.Call(), asm.Mov.Imm(asm.R0, 0), asm.Return(), }, License: "MIT", }) qt.Assert(t, qt.IsNil(err)) defer prog.Close() link, err := AttachRawLink(RawLinkOptions{ Program: prog, Attach: windowsAttachTypeForGUID(t, attachTypeProcessGUID), }) qt.Assert(t, qt.IsNil(err)) defer link.Close() qt.Assert(t, qt.IsNil(exec.Command("cmd.exe", "/c", "exit 0").Run())) var value uint32 qt.Assert(t, qt.IsNil(array.Lookup(uint32(0), &value))) qt.Assert(t, qt.Equals(value, 1), qt.Commentf("Executing a binary should trigger the program")) qt.Assert(t, qt.IsNil(link.Close())) } func makeGUID(data1 uint32, data2 uint16, data3 uint16, data4 [8]byte) windows.GUID { return windows.GUID{Data1: data1, Data2: data2, Data3: data3, Data4: data4} } ================================================ FILE: link/netfilter.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) const NetfilterIPDefrag NetfilterAttachFlags = 0 // Enable IP packet defragmentation type NetfilterAttachFlags uint32 type NetfilterInetHook = sys.NetfilterInetHook const ( NetfilterInetPreRouting = sys.NF_INET_PRE_ROUTING NetfilterInetLocalIn = sys.NF_INET_LOCAL_IN NetfilterInetForward = sys.NF_INET_FORWARD NetfilterInetLocalOut = sys.NF_INET_LOCAL_OUT NetfilterInetPostRouting = sys.NF_INET_POST_ROUTING ) type NetfilterProtocolFamily = sys.NetfilterProtocolFamily const ( NetfilterProtoUnspec = sys.NFPROTO_UNSPEC NetfilterProtoInet = sys.NFPROTO_INET // Inet applies to both IPv4 and IPv6 NetfilterProtoIPv4 = sys.NFPROTO_IPV4 NetfilterProtoARP = sys.NFPROTO_ARP NetfilterProtoNetdev = sys.NFPROTO_NETDEV NetfilterProtoBridge = sys.NFPROTO_BRIDGE NetfilterProtoIPv6 = sys.NFPROTO_IPV6 ) type NetfilterOptions struct { // Program must be a netfilter BPF program. Program *ebpf.Program // The protocol family. ProtocolFamily NetfilterProtocolFamily // The netfilter hook to attach to. Hook NetfilterInetHook // Priority within hook Priority int32 // Extra link flags Flags uint32 // Netfilter flags NetfilterFlags NetfilterAttachFlags } type netfilterLink struct { RawLink } // AttachNetfilter links a netfilter BPF program to a netfilter hook. func AttachNetfilter(opts NetfilterOptions) (Link, error) { if opts.Program == nil { return nil, fmt.Errorf("netfilter program is nil") } if t := opts.Program.Type(); t != ebpf.Netfilter { return nil, fmt.Errorf("invalid program type %s, expected netfilter", t) } progFd := opts.Program.FD() if progFd < 0 { return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) } attr := sys.LinkCreateNetfilterAttr{ ProgFd: uint32(opts.Program.FD()), AttachType: sys.BPF_NETFILTER, Flags: opts.Flags, Pf: opts.ProtocolFamily, Hooknum: opts.Hook, Priority: opts.Priority, NetfilterFlags: uint32(opts.NetfilterFlags), } fd, err := sys.LinkCreateNetfilter(&attr) if err != nil { return nil, fmt.Errorf("attach netfilter link: %w", err) } return &netfilterLink{RawLink{fd, ""}}, nil } func (*netfilterLink) Update(_ *ebpf.Program) error { return fmt.Errorf("netfilter update: %w", ErrNotSupported) } func (nf *netfilterLink) Info() (*Info, error) { var info sys.NetfilterLinkInfo if err := sys.ObjInfo(nf.fd, &info); err != nil { return nil, fmt.Errorf("netfilter link info: %s", err) } extra := &NetfilterInfo{ ProtocolFamily: info.Pf, Hook: info.Hooknum, Priority: info.Priority, Flags: info.Flags, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } var _ Link = (*netfilterLink)(nil) ================================================ FILE: link/netfilter_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) func TestAttachNetfilter(t *testing.T) { testutils.SkipOnOldKernel(t, "6.4", "BPF_LINK_TYPE_NETFILTER") prog := mustLoadProgram(t, ebpf.Netfilter, ebpf.AttachNetfilter, "") l, err := AttachNetfilter(NetfilterOptions{ Program: prog, ProtocolFamily: NetfilterProtoIPv4, Hook: NetfilterInetLocalOut, Priority: -128, }) if err != nil { t.Fatal(err) } info, err := l.Info() if err != nil { t.Fatal(err) } nfInfo := info.Netfilter() qt.Assert(t, qt.Equals(nfInfo.ProtocolFamily, NetfilterProtoIPv4)) qt.Assert(t, qt.Equals(nfInfo.Hook, NetfilterInetLocalOut)) qt.Assert(t, qt.Equals(nfInfo.Priority, -128)) testLink(t, l, prog) } ================================================ FILE: link/netkit.go ================================================ //go:build !windows package link import ( "fmt" "runtime" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type NetkitOptions struct { // Index of the interface to attach to. Interface int // Program to attach. Program *ebpf.Program // One of the AttachNetkit* constants. Attach ebpf.AttachType // Attach relative to an anchor. Optional. Anchor Anchor // Only attach if the expected revision matches. ExpectedRevision uint64 // Flags control the attach behaviour. Specify an Anchor instead of // F_LINK, F_ID, F_BEFORE, F_AFTER and R_REPLACE. Optional. Flags uint32 } func AttachNetkit(opts NetkitOptions) (Link, error) { if opts.Interface < 0 { return nil, fmt.Errorf("interface %d is out of bounds", opts.Interface) } if opts.Flags&anchorFlags != 0 { return nil, fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.LinkCreateNetkitAttr{ ProgFd: uint32(opts.Program.FD()), AttachType: sys.AttachType(opts.Attach), TargetIfindex: uint32(opts.Interface), ExpectedRevision: opts.ExpectedRevision, Flags: opts.Flags, } if opts.Anchor != nil { fdOrID, flags, err := opts.Anchor.anchor() if err != nil { return nil, fmt.Errorf("attach netkit link: %w", err) } attr.RelativeFdOrId = fdOrID attr.Flags |= flags } fd, err := sys.LinkCreateNetkit(&attr) runtime.KeepAlive(opts.Program) runtime.KeepAlive(opts.Anchor) if err != nil { if haveFeatErr := haveNetkit(); haveFeatErr != nil { return nil, haveFeatErr } return nil, fmt.Errorf("attach netkit link: %w", err) } return &netkitLink{RawLink{fd, ""}}, nil } type netkitLink struct { RawLink } var _ Link = (*netkitLink)(nil) func (netkit *netkitLink) Info() (*Info, error) { var info sys.NetkitLinkInfo if err := sys.ObjInfo(netkit.fd, &info); err != nil { return nil, fmt.Errorf("netkit link info: %s", err) } extra := &NetkitInfo{ Ifindex: info.Ifindex, AttachType: info.AttachType, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/netkit_test.go ================================================ //go:build !windows package link import ( "fmt" "sync/atomic" "testing" "github.com/go-quicktest/qt" "github.com/jsimonetti/rtnetlink/v2" "github.com/jsimonetti/rtnetlink/v2/driver" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) func TestAttachNetkit(t *testing.T) { testutils.SkipOnOldKernel(t, "6.7", "Netkit Device") ns := testutils.NewNetNS(t) prog := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNetkitPrimary, "") link, _ := mustAttachNetkit(t, prog, ebpf.AttachNetkitPrimary, ns) testLink(t, link, prog) } func TestNetkitAnchor(t *testing.T) { testutils.SkipOnOldKernel(t, "6.7", "Netkit Device") a := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNetkitPrimary, "") b := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNetkitPrimary, "") ns := testutils.NewNetNS(t) linkA, ifIndex := mustAttachNetkit(t, a, ebpf.AttachNetkitPrimary, ns) programInfo, err := a.Info() qt.Assert(t, qt.IsNil(err)) programID, _ := programInfo.ID() linkInfo, err := linkA.Info() qt.Assert(t, qt.IsNil(err)) linkID := linkInfo.ID for _, anchor := range []Anchor{ Head(), Tail(), BeforeProgram(a), BeforeProgramByID(programID), AfterLink(linkA), AfterLinkByID(linkID), } { t.Run(fmt.Sprintf("%T", anchor), func(t *testing.T) { var linkB Link qt.Assert(t, qt.IsNil(ns.Do(func() (err error) { linkB, err = AttachNetkit(NetkitOptions{ Program: b, Attach: ebpf.AttachNetkitPrimary, Interface: ifIndex, Anchor: anchor, }) return err }))) qt.Assert(t, qt.IsNil(linkB.Close())) }) } } // The last ifindex we created. var prevIfindex atomic.Uint32 func init() { prevIfindex.Store(1000 - 1) } func mustAttachNetkit(tb testing.TB, prog *ebpf.Program, attachType ebpf.AttachType, ns *testutils.NetNS) (Link, int) { var conn *rtnetlink.Conn qt.Assert(tb, qt.IsNil(ns.Do(func() (err error) { conn, err = rtnetlink.Dial(nil) return err }))) tb.Cleanup(func() { qt.Assert(tb, qt.IsNil(conn.Close())) }) ifIndex := prevIfindex.Add(1) layer2 := driver.NetkitModeL2 blackhole := driver.NetkitPolicyDrop qt.Assert(tb, qt.IsNil(conn.Link.New(&rtnetlink.LinkMessage{ Family: unix.AF_UNSPEC, Index: ifIndex, Flags: unix.IFF_UP, Change: unix.IFF_UP, Attributes: &rtnetlink.LinkAttributes{ Info: &rtnetlink.LinkInfo{ Kind: "netkit", Data: &driver.Netkit{ Mode: &layer2, PeerPolicy: &blackhole, }, }, }, }))) tb.Cleanup(func() { qt.Assert(tb, qt.IsNil(conn.Link.Delete(uint32(ifIndex)))) }) var link Link qt.Assert(tb, qt.IsNil(ns.Do(func() (err error) { link, err = AttachNetkit(NetkitOptions{ Program: prog, Attach: attachType, Interface: int(ifIndex), }) return err }))) tb.Cleanup(func() { qt.Assert(tb, qt.IsNil(link.Close())) }) return link, int(ifIndex) } ================================================ FILE: link/netns.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) // NetNsLink is a program attached to a network namespace. type NetNsLink struct { RawLink } // AttachNetNs attaches a program to a network namespace. func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) { var attach ebpf.AttachType switch t := prog.Type(); t { case ebpf.FlowDissector: attach = ebpf.AttachFlowDissector case ebpf.SkLookup: attach = ebpf.AttachSkLookup default: return nil, fmt.Errorf("can't attach %v to network namespace", t) } link, err := AttachRawLink(RawLinkOptions{ Target: ns, Program: prog, Attach: attach, }) if err != nil { return nil, err } return &NetNsLink{*link}, nil } func (ns *NetNsLink) Info() (*Info, error) { var info sys.NetNsLinkInfo if err := sys.ObjInfo(ns.fd, &info); err != nil { return nil, fmt.Errorf("netns link info: %s", err) } extra := &NetNsInfo{ NetnsInode: info.NetnsIno, AttachType: info.AttachType, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/netns_test.go ================================================ //go:build !windows package link import ( "os" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal/testutils" ) func TestSkLookup(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "sk_lookup program") prog := mustLoadProgram(t, ebpf.SkLookup, ebpf.AttachSkLookup, "") netns, err := os.Open("/proc/self/ns/net") if err != nil { t.Fatal(err) } defer netns.Close() link, err := AttachNetNs(int(netns.Fd()), prog) if err != nil { t.Fatal("Can't attach link:", err) } testLink(t, link, prog) } func createSkLookupProgram() (*ebpf.Program, error) { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.SkLookup, AttachType: ebpf.AttachSkLookup, License: "MIT", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { return nil, err } return prog, nil } func ExampleAttachNetNs() { prog, err := createSkLookupProgram() if err != nil { panic(err) } defer prog.Close() // This can be a path to another netns as well. netns, err := os.Open("/proc/self/ns/net") if err != nil { panic(err) } defer netns.Close() link, err := AttachNetNs(int(netns.Fd()), prog) if err != nil { panic(err) } // The socket lookup program is now active until Close(). link.Close() } ================================================ FILE: link/perf_event.go ================================================ //go:build !windows package link import ( "errors" "fmt" "os" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) // Getting the terminology right is usually the hardest part. For posterity and // for staying sane during implementation: // // - trace event: Representation of a kernel runtime hook. Filesystem entries // under /events. Can be tracepoints (static), kprobes or uprobes. // Can be instantiated into perf events (see below). // - tracepoint: A predetermined hook point in the kernel. Exposed as trace // events in (sub)directories under /events. Cannot be closed or // removed, they are static. // - k(ret)probe: Ephemeral trace events based on entry or exit points of // exported kernel symbols. kprobe-based (tracefs) trace events can be // created system-wide by writing to the /kprobe_events file, or // they can be scoped to the current process by creating PMU perf events. // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries // and offsets. uprobe-based (tracefs) trace events can be // created system-wide by writing to the /uprobe_events file, or // they can be scoped to the current process by creating PMU perf events. // - perf event: An object instantiated based on an existing trace event or // kernel symbol. Referred to by fd in userspace. // Exactly one eBPF program can be attached to a perf event. Multiple perf // events can be created from a single trace event. Closing a perf event // stops any further invocations of the attached eBPF program. var ( errInvalidInput = tracefs.ErrInvalidInput ) const ( perfAllThreads = -1 ) // A perfEvent represents a perf event kernel object. Exactly one eBPF program // can be attached to it. It is created based on a tracefs trace event or a // Performance Monitoring Unit (PMU). type perfEvent struct { // Trace event backing this perfEvent. May be nil. tracefsEvent *tracefs.Event // This is the perf event FD. fd *sys.FD } func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent { pe := &perfEvent{event, fd} return pe } func (pe *perfEvent) Close() error { // We close the perf event before attempting to remove the tracefs event. if err := pe.fd.Close(); err != nil { return fmt.Errorf("closing perf event fd: %w", err) } if pe.tracefsEvent != nil { return pe.tracefsEvent.Close() } return nil } // PerfEvent is implemented by some Link types which use a perf event under // the hood. type PerfEvent interface { // PerfEvent returns a file for the underlying perf event. // // It is the callers responsibility to close the returned file. // // Making changes to the associated perf event lead to // undefined behaviour. PerfEvent() (*os.File, error) } // perfEventLink represents a bpf perf link. type perfEventLink struct { RawLink pe *perfEvent } func (pl *perfEventLink) isLink() {} func (pl *perfEventLink) Close() error { if err := pl.fd.Close(); err != nil { return fmt.Errorf("perf link close: %w", err) } // when created from pinned link if pl.pe == nil { return nil } if err := pl.pe.Close(); err != nil { return fmt.Errorf("perf event close: %w", err) } return nil } func (pl *perfEventLink) Update(_ *ebpf.Program) error { return fmt.Errorf("perf event link update: %w", ErrNotSupported) } var _ PerfEvent = (*perfEventLink)(nil) func (pl *perfEventLink) PerfEvent() (*os.File, error) { // when created from pinned link if pl.pe == nil { return nil, ErrNotSupported } fd, err := pl.pe.fd.Dup() if err != nil { return nil, err } return fd.File("perf-event") } // queryInfoWithString queries object info that contains a string field. // // The passed stringField and stringLengthField must point to the string field // and its length field inside the info struct respectively. // // It returns the queried string and fills in the passed info struct. func queryInfoWithString(fd *sys.FD, info sys.Info, stringField *sys.TypedPointer[byte], stringLengthField *uint32) (string, error) { // Query info to get the length if err := sys.ObjInfo(fd, info); err != nil { return "", err } // The stringLengthField pointer points to a field inside info, so it is now populated. var stringData = make([]byte, *stringLengthField) *stringField = sys.SlicePointer(stringData) // Query info again to fill in the string. // Since the stringField pointer points to a field inside info, // the info now contains the pointer to our allocated stringData. if err := sys.ObjInfo(fd, info); err != nil { return "", fmt.Errorf("object info with string: %s", err) } return unix.ByteSliceToString(stringData), nil } func (pl *perfEventLink) Info() (*Info, error) { var info sys.PerfEventLinkInfo if err := sys.ObjInfo(pl.fd, &info); err != nil { return nil, fmt.Errorf("perf event link info: %s", err) } var extra2 interface{} switch info.PerfEventType { case PerfEventKprobe, PerfEventKretprobe: var kprobeInfo sys.KprobeLinkInfo funcName, err := queryInfoWithString(pl.fd, &kprobeInfo, &kprobeInfo.FuncName, &kprobeInfo.NameLen) if err != nil { return nil, fmt.Errorf("kprobe link info: %s", err) } extra2 = &KprobeInfo{ Address: kprobeInfo.Addr, Missed: kprobeInfo.Missed, Function: funcName, Offset: kprobeInfo.Offset, } case PerfEventUprobe, PerfEventUretprobe: var uprobeInfo sys.UprobeLinkInfo fileName, err := queryInfoWithString(pl.fd, &uprobeInfo, &uprobeInfo.FileName, &uprobeInfo.NameLen) if err != nil { return nil, fmt.Errorf("uprobe link info: %s", err) } extra2 = &UprobeInfo{ Offset: uprobeInfo.Offset, Cookie: uprobeInfo.Cookie, OffsetReferenceCount: uprobeInfo.RefCtrOffset, File: fileName, } case PerfEventTracepoint: var tracepointInfo sys.TracepointLinkInfo tpName, err := queryInfoWithString(pl.fd, &tracepointInfo, &tracepointInfo.TpName, &tracepointInfo.NameLen) if err != nil { return nil, fmt.Errorf("perf event link info: %w", err) } extra2 = &TracepointInfo{ Tracepoint: tpName, Cookie: tracepointInfo.Cookie, } case PerfEventEvent: var eventInfo sys.EventLinkInfo err := sys.ObjInfo(pl.fd, &eventInfo) if err != nil { return nil, fmt.Errorf("trace point link info: %s", err) } extra2 = &EventInfo{ Config: eventInfo.Config, Type: eventInfo.EventType, Cookie: eventInfo.Cookie, } } extra := &PerfEventInfo{ Type: info.PerfEventType, extra: extra2, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } // perfEventIoctl implements Link and handles the perf event lifecycle // via ioctl(). type perfEventIoctl struct { *perfEvent } func (pi *perfEventIoctl) isLink() {} // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array // owned by the perf event, which means multiple programs can be attached // simultaneously. // // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event // returns EEXIST. // // Detaching a program from a perf event is currently not possible, so a // program replacement mechanism cannot be implemented for perf events. func (pi *perfEventIoctl) Update(_ *ebpf.Program) error { return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported) } func (pi *perfEventIoctl) Pin(string) error { return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported) } func (pi *perfEventIoctl) Unpin() error { return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported) } func (pi *perfEventIoctl) Detach() error { return fmt.Errorf("perf event ioctl detach: %w", ErrNotSupported) } func (pi *perfEventIoctl) Info() (*Info, error) { return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported) } var _ PerfEvent = (*perfEventIoctl)(nil) func (pi *perfEventIoctl) PerfEvent() (*os.File, error) { fd, err := pi.fd.Dup() if err != nil { return nil, err } return fd.File("perf-event") } // attach the given eBPF prog to the perf event stored in pe. // pe must contain a valid perf event fd. // prog's type must match the program type stored in pe. func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) { if prog == nil { return nil, errors.New("cannot attach a nil program") } if prog.FD() < 0 { return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) } if err := haveBPFLinkPerfEvent(); err == nil { return attachPerfEventLink(pe, prog, cookie) } if cookie != 0 { return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) } return attachPerfEventIoctl(pe, prog) } func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { // Assign the eBPF program to the perf event. err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) if err != nil { return nil, fmt.Errorf("setting perf event bpf program: %w", err) } // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { return nil, fmt.Errorf("enable perf event: %s", err) } return &perfEventIoctl{pe}, nil } // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). // // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) { fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ ProgFd: uint32(prog.FD()), TargetFd: pe.fd.Uint(), AttachType: sys.BPF_PERF_EVENT, BpfCookie: cookie, }) if err != nil { return nil, fmt.Errorf("cannot create bpf perf link: %v", err) } return &perfEventLink{RawLink{fd: fd}, pe}, nil } // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. func unsafeStringPtr(str string) (unsafe.Pointer, error) { p, err := unix.BytePtrFromString(str) if err != nil { return nil, err } return unsafe.Pointer(p), nil } // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide // [k,u]probes created by writing to /[k,u]probe_events are tracepoints // behind the scenes, and can be attached to using these perf events. func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { attr := unix.PerfEventAttr{ Type: unix.PERF_TYPE_TRACEPOINT, Config: tid, Sample_type: unix.PERF_SAMPLE_RAW, Sample: 1, Wakeup: 1, } cpu := 0 if pid != perfAllThreads { cpu = -1 } fd, err := unix.PerfEventOpen(&attr, pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) if err != nil { return nil, fmt.Errorf("opening tracepoint perf event: %w", err) } return sys.NewFD(fd) } // Probe BPF perf link. // // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "probe_bpf_perf_link", Type: ebpf.Kprobe, Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, License: "MIT", }) if err != nil { return err } defer prog.Close() _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ ProgFd: uint32(prog.FD()), AttachType: sys.BPF_PERF_EVENT, }) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } if errors.Is(err, unix.EBADF) { return nil } return err }, "5.15") ================================================ FILE: link/perf_event_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveBPFLinkPerfEvent(t *testing.T) { testutils.CheckFeatureTest(t, haveBPFLinkPerfEvent) } ================================================ FILE: link/program.go ================================================ //go:build !windows package link import ( "fmt" "runtime" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type RawAttachProgramOptions struct { // Target to query. This is usually a file descriptor but may refer to // something else based on the attach type. Target int // Program to attach. Program *ebpf.Program // Attach must match the attach type of Program. Attach ebpf.AttachType // Attach relative to an anchor. Optional. Anchor Anchor // Flags control the attach behaviour. Specify an Anchor instead of // F_LINK, F_ID, F_BEFORE, F_AFTER and F_REPLACE. Optional. Flags uint32 // Only attach if the internal revision matches the given value. ExpectedRevision uint64 } // RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH. // // You should use one of the higher level abstractions available in this // package if possible. func RawAttachProgram(opts RawAttachProgramOptions) error { if opts.Flags&anchorFlags != 0 { return fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.ProgAttachAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachBpfFd: uint32(opts.Program.FD()), AttachType: uint32(opts.Attach), AttachFlags: uint32(opts.Flags), ExpectedRevision: opts.ExpectedRevision, } if opts.Anchor != nil { fdOrID, flags, err := opts.Anchor.anchor() if err != nil { return fmt.Errorf("attach program: %w", err) } if flags == sys.BPF_F_REPLACE { // Ensure that replacing a program works on old kernels. attr.ReplaceBpfFd = fdOrID } else { attr.RelativeFdOrId = fdOrID attr.AttachFlags |= flags } } if err := sys.ProgAttach(&attr); err != nil { if haveFeatErr := haveProgAttach(); haveFeatErr != nil { return haveFeatErr } return fmt.Errorf("attach program: %w", err) } runtime.KeepAlive(opts.Program) return nil } type RawDetachProgramOptions RawAttachProgramOptions // RawDetachProgram is a low level wrapper around BPF_PROG_DETACH. // // You should use one of the higher level abstractions available in this // package if possible. func RawDetachProgram(opts RawDetachProgramOptions) error { if opts.Flags&anchorFlags != 0 { return fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.ProgDetachAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachBpfFd: uint32(opts.Program.FD()), AttachType: uint32(opts.Attach), ExpectedRevision: opts.ExpectedRevision, } if opts.Anchor != nil { fdOrID, flags, err := opts.Anchor.anchor() if err != nil { return fmt.Errorf("detach program: %w", err) } attr.RelativeFdOrId = fdOrID attr.AttachFlags |= flags } if err := sys.ProgDetach(&attr); err != nil { if haveFeatErr := haveProgAttach(); haveFeatErr != nil { return haveFeatErr } return fmt.Errorf("can't detach program: %w", err) } return nil } ================================================ FILE: link/program_test.go ================================================ //go:build !windows package link import ( "fmt" "net" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestProgramAlter(t *testing.T) { testutils.SkipOnOldKernel(t, "4.13", "SkSKB type") prog := mustLoadProgram(t, ebpf.SkSKB, 0, "") var sockMap *ebpf.Map sockMap, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.MapType(15), // BPF_MAP_TYPE_SOCKMAP KeySize: 4, ValueSize: 4, MaxEntries: 2, }) if err != nil { t.Fatal(err) } defer sockMap.Close() err = RawAttachProgram(RawAttachProgramOptions{ Target: sockMap.FD(), Program: prog, Attach: ebpf.AttachSkSKBStreamParser, }) if err != nil { t.Fatal(err) } err = RawDetachProgram(RawDetachProgramOptions{ Target: sockMap.FD(), Program: prog, Attach: ebpf.AttachSkSKBStreamParser, }) if err != nil { t.Fatal(err) } } func TestRawAttachProgramAnchor(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "attach anchor") iface, err := net.InterfaceByName("lo") qt.Assert(t, qt.IsNil(err)) a := mustLoadProgram(t, ebpf.SchedCLS, 0, "") info, err := a.Info() qt.Assert(t, qt.IsNil(err)) aID, _ := info.ID() err = RawAttachProgram(RawAttachProgramOptions{ Target: iface.Index, Program: a, Attach: ebpf.AttachTCXIngress, }) qt.Assert(t, qt.IsNil(err)) defer RawDetachProgram(RawDetachProgramOptions{ Target: iface.Index, Program: a, Attach: ebpf.AttachTCXIngress, }) link, err := AttachTCX(TCXOptions{ Interface: iface.Index, Program: mustLoadProgram(t, ebpf.SchedCLS, 0, ""), Attach: ebpf.AttachTCXIngress, }) qt.Assert(t, qt.IsNil(err)) defer link.Close() linkInfo, err := link.Info() qt.Assert(t, qt.IsNil(err)) b := mustLoadProgram(t, ebpf.SchedCLS, 0, "") for _, anchor := range []Anchor{ Head(), Tail(), AfterProgram(a), AfterProgramByID(aID), AfterLink(link), AfterLinkByID(linkInfo.ID), } { t.Run(fmt.Sprintf("%T", anchor), func(t *testing.T) { err := RawAttachProgram(RawAttachProgramOptions{ Target: iface.Index, Program: b, Attach: ebpf.AttachTCXIngress, Anchor: anchor, }) qt.Assert(t, qt.IsNil(err)) // Detach doesn't allow first or last anchor. if _, ok := anchor.(firstAnchor); ok { anchor = nil } else if _, ok := anchor.(lastAnchor); ok { anchor = nil } err = RawDetachProgram(RawDetachProgramOptions{ Target: iface.Index, Program: b, Attach: ebpf.AttachTCXIngress, Anchor: anchor, }) qt.Assert(t, qt.IsNil(err)) }) } // Check that legacy replacement with a program works. err = RawAttachProgram(RawAttachProgramOptions{ Target: iface.Index, Program: b, Attach: ebpf.AttachTCXIngress, Anchor: ReplaceProgram(a), }) qt.Assert(t, qt.IsNil(err)) err = RawDetachProgram(RawDetachProgramOptions{ Target: iface.Index, Program: b, Attach: ebpf.AttachTCXIngress, }) qt.Assert(t, qt.IsNil(err)) } ================================================ FILE: link/query.go ================================================ //go:build !windows package link import ( "fmt" "slices" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) // QueryOptions defines additional parameters when querying for programs. type QueryOptions struct { // Target to query. This is usually a file descriptor but may refer to // something else based on the attach type. Target int // Attach specifies the AttachType of the programs queried for Attach ebpf.AttachType // QueryFlags are flags for BPF_PROG_QUERY, e.g. BPF_F_QUERY_EFFECTIVE QueryFlags uint32 } // QueryResult describes which programs and links are active. type QueryResult struct { // List of attached programs. Programs []AttachedProgram // Incremented by one every time the set of attached programs changes. // May be zero if not supported by the [ebpf.AttachType]. Revision uint64 } // HaveLinkInfo returns true if the kernel supports querying link information // for a particular [ebpf.AttachType]. func (qr *QueryResult) HaveLinkInfo() bool { return slices.ContainsFunc(qr.Programs, func(ap AttachedProgram) bool { _, ok := ap.LinkID() return ok }, ) } type AttachedProgram struct { ID ebpf.ProgramID linkID ID } // LinkID returns the ID associated with the program. // // Returns 0, false if the kernel doesn't support retrieving the ID or if the // program wasn't attached via a link. func (ap *AttachedProgram) LinkID() (ID, bool) { return ap.linkID, ap.linkID != 0 } // QueryPrograms retrieves a list of programs for the given AttachType. // // Returns a slice of attached programs, which may be empty. // revision counts how many times the set of attached programs has changed and // may be zero if not supported by the [ebpf.AttachType]. // Returns ErrNotSupportd on a kernel without BPF_PROG_QUERY func QueryPrograms(opts QueryOptions) (*QueryResult, error) { // query the number of programs to allocate correct slice size attr := sys.ProgQueryAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachType: sys.AttachType(opts.Attach), QueryFlags: opts.QueryFlags, } err := sys.ProgQuery(&attr) if err != nil { if haveFeatErr := haveProgQuery(); haveFeatErr != nil { return nil, fmt.Errorf("query programs: %w", haveFeatErr) } return nil, fmt.Errorf("query programs: %w", err) } if attr.Count == 0 { return &QueryResult{Revision: attr.Revision}, nil } // The minimum bpf_mprog revision is 1, so we can use the field to detect // whether the attach type supports link ids. haveLinkIDs := attr.Revision != 0 count := attr.Count progIds := make([]ebpf.ProgramID, count) attr = sys.ProgQueryAttr{ TargetFdOrIfindex: uint32(opts.Target), AttachType: sys.AttachType(opts.Attach), QueryFlags: opts.QueryFlags, Count: count, ProgIds: sys.SlicePointer(progIds), } var linkIds []ID if haveLinkIDs { linkIds = make([]ID, count) attr.LinkIds = sys.SlicePointer(linkIds) } if err := sys.ProgQuery(&attr); err != nil { return nil, fmt.Errorf("query programs: %w", err) } // NB: attr.Count might have changed between the two syscalls. var programs []AttachedProgram for i, id := range progIds[:attr.Count] { ap := AttachedProgram{ID: id} if haveLinkIDs { ap.linkID = linkIds[i] } programs = append(programs, ap) } return &QueryResult{programs, attr.Revision}, nil } ================================================ FILE: link/query_test.go ================================================ //go:build !windows package link import ( "os" "slices" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestQueryPrograms(t *testing.T) { for name, fn := range map[string]func(*testing.T) (*ebpf.Program, Link, QueryOptions){ "cgroup": queryCgroupProgAttachFixtures, "cgroup link": queryCgroupLinkFixtures, "netns": queryNetNSFixtures, "tcx": queryTCXFixtures, } { t.Run(name, func(t *testing.T) { prog, link, opts := fn(t) result, err := QueryPrograms(opts) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) progInfo, err := prog.Info() qt.Assert(t, qt.IsNil(err)) progID, _ := progInfo.ID() i := slices.IndexFunc(result.Programs, func(ap AttachedProgram) bool { return ap.ID == progID }) qt.Assert(t, qt.Not(qt.Equals(i, -1))) if name == "tcx" { qt.Assert(t, qt.Not(qt.Equals(result.Revision, 0))) } if result.HaveLinkInfo() { ap := result.Programs[i] linkInfo, err := link.Info() qt.Assert(t, qt.IsNil(err)) linkID, ok := ap.LinkID() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(linkID, linkInfo.ID)) } }) } } func queryCgroupProgAttachFixtures(t *testing.T) (*ebpf.Program, Link, QueryOptions) { cgroup, prog := mustCgroupFixtures(t) link, err := newProgAttachCgroup(cgroup, ebpf.AttachCGroupInetEgress, prog, flagAllowOverride) if err != nil { t.Fatal("Can't create link:", err) } t.Cleanup(func() { qt.Assert(t, qt.IsNil(link.Close())) }) return prog, nil, QueryOptions{ Target: int(cgroup.Fd()), Attach: ebpf.AttachCGroupInetEgress, } } func queryCgroupLinkFixtures(t *testing.T) (*ebpf.Program, Link, QueryOptions) { cgroup, prog := mustCgroupFixtures(t) link, err := newLinkCgroup(cgroup, ebpf.AttachCGroupInetEgress, prog) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create link:", err) } t.Cleanup(func() { qt.Assert(t, qt.IsNil(link.Close())) }) return prog, nil, QueryOptions{ Target: int(cgroup.Fd()), Attach: ebpf.AttachCGroupInetEgress, } } func queryNetNSFixtures(t *testing.T) (*ebpf.Program, Link, QueryOptions) { testutils.SkipOnOldKernel(t, "4.20", "flow_dissector program") prog := mustLoadProgram(t, ebpf.FlowDissector, ebpf.AttachFlowDissector, "") // RawAttachProgramOptions.Target needs to be 0, as PROG_ATTACH with namespaces // only works with the threads current netns. Any other fd will be rejected. if err := RawAttachProgram(RawAttachProgramOptions{ Target: 0, Program: prog, Attach: ebpf.AttachFlowDissector, }); err != nil { t.Fatal(err) } t.Cleanup(func() { err := RawDetachProgram(RawDetachProgramOptions{ Target: 0, Program: prog, Attach: ebpf.AttachFlowDissector, }) if err != nil { t.Fatal(err) } }) netns, err := os.Open("/proc/self/ns/net") qt.Assert(t, qt.IsNil(err)) t.Cleanup(func() { netns.Close() }) return prog, nil, QueryOptions{ Target: int(netns.Fd()), Attach: ebpf.AttachFlowDissector, } } func queryTCXFixtures(t *testing.T) (*ebpf.Program, Link, QueryOptions) { testutils.SkipOnOldKernel(t, "6.6", "TCX link") prog := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachTCXIngress, "") link, iface := mustAttachTCX(t, prog, ebpf.AttachTCXIngress) return prog, link, QueryOptions{ Target: iface, Attach: ebpf.AttachTCXIngress, } } ================================================ FILE: link/raw_tracepoint.go ================================================ //go:build !windows package link import ( "errors" "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type RawTracepointOptions struct { // Tracepoint name. Name string // Program must be of type RawTracepoint* Program *ebpf.Program } // AttachRawTracepoint links a BPF program to a raw_tracepoint. // // Requires at least Linux 4.17. func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) { if t := opts.Program.Type(); t != ebpf.RawTracepoint && t != ebpf.RawTracepointWritable { return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t) } if opts.Program.FD() < 0 { return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) } fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ Name: sys.NewStringPointer(opts.Name), ProgFd: uint32(opts.Program.FD()), }) if err != nil { return nil, err } err = haveBPFLink() if errors.Is(err, ErrNotSupported) { // Prior to commit 70ed506c3bbc ("bpf: Introduce pinnable bpf_link abstraction") // raw_tracepoints are just a plain fd. return &simpleRawTracepoint{fd}, nil } if err != nil { return nil, err } return &rawTracepoint{RawLink{fd: fd}}, nil } type simpleRawTracepoint struct { fd *sys.FD } var _ Link = (*simpleRawTracepoint)(nil) func (frt *simpleRawTracepoint) isLink() {} func (frt *simpleRawTracepoint) Close() error { return frt.fd.Close() } func (frt *simpleRawTracepoint) Update(_ *ebpf.Program) error { return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported) } func (frt *simpleRawTracepoint) Pin(string) error { return fmt.Errorf("pin raw_tracepoint: %w", ErrNotSupported) } func (frt *simpleRawTracepoint) Unpin() error { return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported) } func (frt *simpleRawTracepoint) Detach() error { return fmt.Errorf("detach raw_tracepoint: %w", ErrNotSupported) } func (frt *simpleRawTracepoint) Info() (*Info, error) { return nil, fmt.Errorf("can't get raw_tracepoint info: %w", ErrNotSupported) } type rawTracepoint struct { RawLink } var _ Link = (*rawTracepoint)(nil) func (rt *rawTracepoint) Update(_ *ebpf.Program) error { return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported) } func (rt *rawTracepoint) Info() (*Info, error) { var info sys.RawTracepointLinkInfo name, err := queryInfoWithString(rt.fd, &info, &info.TpName, &info.TpNameLen) if err != nil { return nil, err } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), &RawTracepointInfo{ Name: name, }, }, nil } ================================================ FILE: link/raw_tracepoint_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) func TestRawTracepoint(t *testing.T) { testutils.SkipOnOldKernel(t, "4.17", "BPF_RAW_TRACEPOINT API") prog := mustLoadProgram(t, ebpf.RawTracepoint, 0, "") link, err := AttachRawTracepoint(RawTracepointOptions{ Name: "cgroup_mkdir", Program: prog, }) if err != nil { t.Fatal(err) } testLink(t, link, prog) } func TestRawTracepointInfo(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "bpf_link_info_raw_tracepoint") prog := mustLoadProgram(t, ebpf.RawTracepoint, 0, "") link, err := AttachRawTracepoint(RawTracepointOptions{ Name: "cgroup_mkdir", Program: prog, }) if err != nil { t.Fatal(err) } defer link.Close() info, err := link.Info() if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(RawTracepointType, info.Type)) tpInfo := info.RawTracepoint() qt.Assert(t, qt.Equals(tpInfo.Name, "cgroup_mkdir")) } func TestRawTracepoint_writable(t *testing.T) { testutils.SkipOnOldKernel(t, "5.2", "BPF_RAW_TRACEPOINT_WRITABLE API") prog := mustLoadProgram(t, ebpf.RawTracepoint, 0, "") defer prog.Close() link, err := AttachRawTracepoint(RawTracepointOptions{ Name: "cgroup_rmdir", Program: prog, }) if err != nil { t.Fatal(err) } testLink(t, link, prog) } ================================================ FILE: link/socket_filter.go ================================================ //go:build !windows package link import ( "syscall" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/unix" ) // AttachSocketFilter attaches a SocketFilter BPF program to a socket. func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error { rawConn, err := conn.SyscallConn() if err != nil { return err } var ssoErr error err = rawConn.Control(func(fd uintptr) { ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD()) }) if ssoErr != nil { return ssoErr } return err } // DetachSocketFilter detaches a SocketFilter BPF program from a socket. func DetachSocketFilter(conn syscall.Conn) error { rawConn, err := conn.SyscallConn() if err != nil { return err } var ssoErr error err = rawConn.Control(func(fd uintptr) { ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0) }) if ssoErr != nil { return ssoErr } return err } ================================================ FILE: link/socket_filter_test.go ================================================ //go:build !windows package link import ( "net" "testing" "github.com/cilium/ebpf" ) func TestSocketFilterAttach(t *testing.T) { prog := mustLoadProgram(t, ebpf.SocketFilter, 0, "") defer prog.Close() conn, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1)}) if err != nil { t.Fatal(err) } defer conn.Close() if err := AttachSocketFilter(conn, prog); err != nil { t.Fatal(err) } if err := DetachSocketFilter(conn); err != nil { t.Fatal(err) } } ================================================ FILE: link/struct_ops.go ================================================ package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type structOpsLink struct { RawLink } func (*structOpsLink) Update(*ebpf.Program) error { return fmt.Errorf("update struct_ops link: %w", ErrNotSupported) } type StructOpsOptions struct { Map *ebpf.Map } // AttachStructOps attaches a struct_ops map (created from a ".struct_ops.link" // section) to its kernel subsystem via a BPF link. func AttachStructOps(opts StructOpsOptions) (Link, error) { m := opts.Map if m == nil { return nil, fmt.Errorf("map cannot be nil") } if t := m.Type(); t != ebpf.StructOpsMap { return nil, fmt.Errorf("can't attach non-struct_ops map") } mapFD := m.FD() if mapFD <= 0 { return nil, fmt.Errorf("invalid map: %s", sys.ErrClosedFd) } fd, err := sys.LinkCreate(&sys.LinkCreateAttr{ // For struct_ops links, the mapFD must be passed as ProgFd. ProgFd: uint32(mapFD), AttachType: sys.AttachType(ebpf.AttachStructOps), TargetFd: 0, }) if err != nil { return nil, fmt.Errorf("attach StructOps: create link: %w", err) } return &structOpsLink{RawLink{fd: fd}}, nil } ================================================ FILE: link/struct_ops_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) func TestStructOps(t *testing.T) { testutils.SkipOnOldKernel(t, "6.12", "bpf_testmod_ops") m := mustStructOpsFixtures(t) l, err := AttachStructOps(StructOpsOptions{Map: m}) qt.Assert(t, qt.IsNil(err)) testLink(t, l, nil) } func mustStructOpsFixtures(tb testing.TB) *ebpf.Map { tb.Helper() testutils.SkipIfNotSupported(tb, haveBPFLink()) userData := []byte{ // test_1 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // test_2 func ptr (8B) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // data (4B) + padding (4B) 0xef, 0xbe, 0xad, 0xde, 0x00, 0x00, 0x00, 0x00, } spec := &ebpf.CollectionSpec{ Maps: map[string]*ebpf.MapSpec{ "testmod_ops": { Name: "testmod_ops", Type: ebpf.StructOpsMap, MaxEntries: 1, Flags: sys.BPF_F_LINK, Key: &btf.Int{Size: 4}, KeySize: 4, ValueSize: 24, Value: &btf.Struct{ Name: "bpf_testmod_ops", Size: 24, Members: []btf.Member{ { Name: "test_1", Type: &btf.Pointer{ Target: &btf.FuncProto{ Params: []btf.FuncParam{}, Return: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}}, Offset: 0, }, { Name: "test_2", Type: &btf.Pointer{ Target: &btf.FuncProto{ Params: []btf.FuncParam{ {Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}, {Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}}, }, Return: (*btf.Void)(nil), }, }, Offset: 64, }, { Name: "data", Type: &btf.Int{Name: "int", Size: 4, Encoding: btf.Signed}, Offset: 128, // bits }, }, }, Contents: []ebpf.MapKV{ { Key: uint32(0), Value: userData, }, }, }, }, Programs: map[string]*ebpf.ProgramSpec{ "test_1": { Name: "test_1", Type: ebpf.StructOps, AttachTo: "bpf_testmod_ops:test_1", License: "GPL", SectionName: "struct_ops/test_1", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }, }, Variables: map[string]*ebpf.VariableSpec{}, } coll, err := ebpf.NewCollection(spec) testutils.SkipIfNotSupported(tb, err) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { coll.Close() }) m := coll.Maps["testmod_ops"] qt.Assert(tb, qt.IsNotNil(m)) return m } ================================================ FILE: link/syscalls.go ================================================ //go:build !windows package link import ( "errors" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) var haveProgAttach = internal.NewFeatureTest("BPF_PROG_ATTACH", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.CGroupSKB, License: "MIT", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { return internal.ErrNotSupported } // BPF_PROG_ATTACH was introduced at the same time as CGgroupSKB, // so being able to load the program is enough to infer that we // have the syscall. prog.Close() return nil }, "4.10") var haveProgAttachReplace = internal.NewFeatureTest("BPF_PROG_ATTACH atomic replacement of MULTI progs", func() error { if err := haveProgAttach(); err != nil { return err } prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.CGroupSKB, AttachType: ebpf.AttachCGroupInetIngress, License: "MIT", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { return internal.ErrNotSupported } defer prog.Close() // We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs. // If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't // present. attr := sys.ProgAttachAttr{ // We rely on this being checked after attachFlags. TargetFdOrIfindex: ^uint32(0), AttachBpfFd: uint32(prog.FD()), AttachType: uint32(ebpf.AttachCGroupInetIngress), AttachFlags: uint32(flagReplace), } err = sys.ProgAttach(&attr) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } if errors.Is(err, unix.EBADF) { return nil } return err }, "5.5") var haveBPFLink = internal.NewFeatureTest("bpf_link", func() error { attr := sys.LinkCreateAttr{ // This is a hopefully invalid file descriptor, which triggers EBADF. TargetFd: ^uint32(0), ProgFd: ^uint32(0), AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress), } _, err := sys.LinkCreate(&attr) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } if errors.Is(err, unix.EBADF) { return nil } return err }, "5.7") var haveProgQuery = internal.NewFeatureTest("BPF_PROG_QUERY", func() error { attr := sys.ProgQueryAttr{ // We rely on this being checked during the syscall. // With an otherwise correct payload we expect EBADF here // as an indication that the feature is present. TargetFdOrIfindex: ^uint32(0), AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress), } err := sys.ProgQuery(&attr) if errors.Is(err, unix.EBADF) { return nil } if err != nil { return ErrNotSupported } return errors.New("syscall succeeded unexpectedly") }, "4.15") var haveTCX = internal.NewFeatureTest("tcx", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.SchedCLS, License: "MIT", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { return internal.ErrNotSupported } defer prog.Close() attr := sys.LinkCreateTcxAttr{ // We rely on this being checked during the syscall. // With an otherwise correct payload we expect ENODEV here // as an indication that the feature is present. TargetIfindex: ^uint32(0), ProgFd: uint32(prog.FD()), AttachType: sys.AttachType(ebpf.AttachTCXIngress), } _, err = sys.LinkCreateTcx(&attr) if errors.Is(err, unix.ENODEV) { return nil } if err != nil { return ErrNotSupported } return errors.New("syscall succeeded unexpectedly") }, "6.6") var haveNetkit = internal.NewFeatureTest("netkit", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.SchedCLS, License: "MIT", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }) if err != nil { return internal.ErrNotSupported } defer prog.Close() attr := sys.LinkCreateNetkitAttr{ // We rely on this being checked during the syscall. // With an otherwise correct payload we expect ENODEV here // as an indication that the feature is present. TargetIfindex: ^uint32(0), ProgFd: uint32(prog.FD()), AttachType: sys.AttachType(ebpf.AttachNetkitPrimary), } _, err = sys.LinkCreateNetkit(&attr) if errors.Is(err, unix.ENODEV) { return nil } if err != nil { return ErrNotSupported } return errors.New("syscall succeeded unexpectedly") }, "6.7") ================================================ FILE: link/syscalls_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/cilium/ebpf/internal/testutils" ) func TestHaveProgAttach(t *testing.T) { testutils.CheckFeatureTest(t, haveProgAttach) } func TestHaveProgAttachReplace(t *testing.T) { testutils.CheckFeatureTest(t, haveProgAttachReplace) } func TestHaveBPFLink(t *testing.T) { testutils.CheckFeatureTest(t, haveBPFLink) } func TestHaveProgQuery(t *testing.T) { testutils.CheckFeatureTest(t, haveProgQuery) } func TestHaveTCX(t *testing.T) { testutils.CheckFeatureTest(t, haveTCX) } func TestHaveNetkit(t *testing.T) { testutils.CheckFeatureTest(t, haveNetkit) } ================================================ FILE: link/tcx.go ================================================ //go:build !windows package link import ( "fmt" "runtime" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type TCXOptions struct { // Index of the interface to attach to. Interface int // Program to attach. Program *ebpf.Program // One of the AttachTCX* constants. Attach ebpf.AttachType // Attach relative to an anchor. Optional. Anchor Anchor // Only attach if the expected revision matches. ExpectedRevision uint64 // Flags control the attach behaviour. Specify an Anchor instead of // F_LINK, F_ID, F_BEFORE, F_AFTER and R_REPLACE. Optional. Flags uint32 } func AttachTCX(opts TCXOptions) (Link, error) { if opts.Interface < 0 { return nil, fmt.Errorf("interface %d is out of bounds", opts.Interface) } if opts.Flags&anchorFlags != 0 { return nil, fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.LinkCreateTcxAttr{ ProgFd: uint32(opts.Program.FD()), AttachType: sys.AttachType(opts.Attach), TargetIfindex: uint32(opts.Interface), ExpectedRevision: opts.ExpectedRevision, Flags: opts.Flags, } if opts.Anchor != nil { fdOrID, flags, err := opts.Anchor.anchor() if err != nil { return nil, fmt.Errorf("attach tcx link: %w", err) } attr.RelativeFdOrId = fdOrID attr.Flags |= flags } fd, err := sys.LinkCreateTcx(&attr) runtime.KeepAlive(opts.Program) runtime.KeepAlive(opts.Anchor) if err != nil { if haveFeatErr := haveTCX(); haveFeatErr != nil { return nil, haveFeatErr } return nil, fmt.Errorf("attach tcx link: %w", err) } return &tcxLink{RawLink{fd, ""}}, nil } type tcxLink struct { RawLink } var _ Link = (*tcxLink)(nil) func (tcx *tcxLink) Info() (*Info, error) { var info sys.TcxLinkInfo if err := sys.ObjInfo(tcx.fd, &info); err != nil { return nil, fmt.Errorf("tcx link info: %s", err) } extra := &TCXInfo{ Ifindex: info.Ifindex, AttachType: info.AttachType, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/tcx_test.go ================================================ //go:build !windows package link import ( "fmt" "math" "net" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) func TestAttachTCX(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "TCX link") prog := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") link, _ := mustAttachTCX(t, prog, ebpf.AttachTCXIngress) testLink(t, link, prog) } func TestTCXAnchor(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "TCX link") a := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") b := mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, "") linkA, iface := mustAttachTCX(t, a, ebpf.AttachTCXEgress) programInfo, err := a.Info() qt.Assert(t, qt.IsNil(err)) programID, _ := programInfo.ID() linkInfo, err := linkA.Info() qt.Assert(t, qt.IsNil(err)) linkID := linkInfo.ID for _, anchor := range []Anchor{ Head(), Tail(), BeforeProgram(a), BeforeProgramByID(programID), AfterLink(linkA), AfterLinkByID(linkID), } { t.Run(fmt.Sprintf("%T", anchor), func(t *testing.T) { linkB, err := AttachTCX(TCXOptions{ Program: b, Attach: ebpf.AttachTCXEgress, Interface: iface, Anchor: anchor, }) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(linkB.Close())) }) } } func TestTCXExpectedRevision(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "TCX link") iface, err := net.InterfaceByName("lo") qt.Assert(t, qt.IsNil(err)) _, err = AttachTCX(TCXOptions{ Program: mustLoadProgram(t, ebpf.SchedCLS, ebpf.AttachNone, ""), Attach: ebpf.AttachTCXEgress, Interface: iface.Index, ExpectedRevision: math.MaxUint64, }) qt.Assert(t, qt.ErrorIs(err, unix.ESTALE)) } func mustAttachTCX(tb testing.TB, prog *ebpf.Program, attachType ebpf.AttachType) (Link, int) { iface, err := net.InterfaceByName("lo") qt.Assert(tb, qt.IsNil(err)) link, err := AttachTCX(TCXOptions{ Program: prog, Attach: attachType, Interface: iface.Index, }) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { qt.Assert(tb, qt.IsNil(link.Close())) }) return link, iface.Index } ================================================ FILE: link/tracepoint.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/tracefs" ) // TracepointOptions defines additional parameters that will be used // when loading Tracepoints. type TracepointOptions struct { // Arbitrary value that can be fetched from an eBPF program // via `bpf_get_attach_cookie()`. // // Needs kernel 5.15+. Cookie uint64 } // Tracepoint attaches the given eBPF program to the tracepoint with the given // group and name. See /sys/kernel/tracing/events to find available // tracepoints. The top-level directory is the group, the event's subdirectory // is the name. Example: // // tp, err := Tracepoint("syscalls", "sys_enter_fork", prog, nil) // // Losing the reference to the resulting Link (tp) will close the Tracepoint // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. // // Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is // only possible as of kernel 4.14 (commit cf5f5ce). // // The returned Link may implement [PerfEvent]. func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) (Link, error) { if group == "" || name == "" { return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput) } if prog == nil { return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) } if prog.Type() != ebpf.TracePoint { return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput) } tid, err := tracefs.EventID(group, name) if err != nil { return nil, err } fd, err := openTracepointPerfEvent(tid, perfAllThreads) if err != nil { return nil, err } var cookie uint64 if opts != nil { cookie = opts.Cookie } pe := newPerfEvent(fd, nil) lnk, err := attachPerfEvent(pe, prog, cookie) if err != nil { pe.Close() return nil, err } return lnk, nil } ================================================ FILE: link/tracepoint_test.go ================================================ //go:build !windows package link import ( "errors" "os" "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" ) func TestTracepoint(t *testing.T) { // Requires at least 4.7 (98b5c2c65c29 "perf, bpf: allow bpf programs attach to tracepoints") testutils.SkipOnOldKernel(t, "4.7", "tracepoint support") prog := mustLoadProgram(t, ebpf.TracePoint, 0, "") // printk is guaranteed to be present. // Kernels before 4.14 don't support attaching to syscall tracepoints. tp, err := Tracepoint("printk", "console", prog, nil) if err != nil { t.Fatal(err) } if err := tp.Close(); err != nil { t.Error("closing tracepoint:", err) } } func TestTracepointMissing(t *testing.T) { // Requires at least 4.7 (98b5c2c65c29 "perf, bpf: allow bpf programs attach to tracepoints") testutils.SkipOnOldKernel(t, "4.7", "tracepoint support") prog := mustLoadProgram(t, ebpf.TracePoint, 0, "") _, err := Tracepoint("missing", "foobazbar", prog, nil) if !errors.Is(err, os.ErrNotExist) { t.Error("Expected os.ErrNotExist, got", err) } } func TestTracepointErrors(t *testing.T) { // Invalid Tracepoint incantations. _, err := Tracepoint("", "", nil, nil) // empty names qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Tracepoint("_", "_", nil, nil) // empty prog qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Tracepoint(".", "+", &ebpf.Program{}, nil) // illegal chars in group/name qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = Tracepoint("foo", "bar", &ebpf.Program{}, nil) // wrong prog type qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) } func TestTracepointProgramCall(t *testing.T) { // Kernels before 4.14 don't support attaching to syscall tracepoints. testutils.SkipOnOldKernel(t, "4.14", "syscalls tracepoint support") m, p := newUpdaterMapProg(t, ebpf.TracePoint, 0) // Open Tracepoint at /sys/kernel/tracing/events/syscalls/sys_enter_getpid // and attach it to the ebpf program created above. tp, err := Tracepoint("syscalls", "sys_enter_getpid", p, nil) if err != nil { t.Fatal(err) } // Trigger ebpf program call. unix.Getpid() // Assert that the value got incremented to at least 1, while allowing // for bigger values, because we could race with other getpid callers. assertMapValueGE(t, m, 0, 1) // Detach the Tracepoint. if err := tp.Close(); err != nil { t.Fatal(err) } // Reset map value to 0 at index 0. if err := m.Update(uint32(0), uint32(0), ebpf.UpdateExist); err != nil { t.Fatal(err) } // Retrigger the ebpf program call. unix.Getpid() // Assert that this time the value has not been updated. assertMapValue(t, m, 0, 0) } func TestTracepointInfo(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "bpf_link_info_perf_event") prog := mustLoadProgram(t, ebpf.TracePoint, 0, "") // printk is guaranteed to be present. // Kernels before 4.14 don't support attaching to syscall tracepoints. tp, err := Tracepoint("printk", "console", prog, nil) if err != nil { t.Fatal(err) } defer tp.Close() info, err := tp.Info() if err != nil { t.Fatal(err) } tpInfo := info.PerfEvent().Tracepoint() qt.Assert(t, qt.Equals(tpInfo.Tracepoint, "console")) } ================================================ FILE: link/tracing.go ================================================ //go:build !windows package link import ( "errors" "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) type tracing struct { RawLink } func (f *tracing) Update(_ *ebpf.Program) error { return fmt.Errorf("tracing update: %w", ErrNotSupported) } func (f *tracing) Info() (*Info, error) { var info sys.TracingLinkInfo if err := sys.ObjInfo(f.fd, &info); err != nil { return nil, fmt.Errorf("tracing link info: %s", err) } extra := &TracingInfo{ TargetObjectId: info.TargetObjId, TargetBtfId: info.TargetBtfId, AttachType: info.AttachType, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } // AttachFreplace attaches the given eBPF program to the function it replaces. // // The program and name can either be provided at link time, or can be provided // at program load time. If they were provided at load time, they should be nil // and empty respectively here, as they will be ignored by the kernel. // Examples: // // AttachFreplace(dispatcher, "function", replacement) // AttachFreplace(nil, "", replacement) func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (Link, error) { if (name == "") != (targetProg == nil) { return nil, fmt.Errorf("must provide both or neither of name and targetProg: %w", errInvalidInput) } if prog == nil { return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) } if prog.Type() != ebpf.Extension { return nil, fmt.Errorf("eBPF program type %s is not an Extension: %w", prog.Type(), errInvalidInput) } var ( target int typeID btf.TypeID ) if targetProg != nil { btfHandle, err := targetProg.Handle() if err != nil { return nil, err } defer btfHandle.Close() spec, err := btfHandle.Spec(nil) if err != nil { return nil, err } var function *btf.Func if err := spec.TypeByName(name, &function); err != nil { return nil, err } target = targetProg.FD() typeID, err = spec.TypeID(function) if err != nil { return nil, err } } link, err := AttachRawLink(RawLinkOptions{ Target: target, Program: prog, Attach: ebpf.AttachNone, BTF: typeID, }) if errors.Is(err, sys.ENOTSUPP) { // This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke. return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported) } if err != nil { return nil, err } return &tracing{*link}, nil } type TracingOptions struct { // Program must be of type Tracing with attach type // AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or // AttachTraceRawTp. Program *ebpf.Program // Program attach type. Can be one of: // - AttachTraceFEntry // - AttachTraceFExit // - AttachModifyReturn // - AttachTraceRawTp // This field is optional. AttachType ebpf.AttachType // Arbitrary value that can be fetched from an eBPF program // via `bpf_get_attach_cookie()`. Cookie uint64 } type LSMOptions struct { // Program must be of type LSM with attach type // AttachLSMMac. Program *ebpf.Program // Arbitrary value that can be fetched from an eBPF program // via `bpf_get_attach_cookie()`. Cookie uint64 } // attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id. func attachBTFID(program *ebpf.Program, at ebpf.AttachType, cookie uint64) (Link, error) { if program.FD() < 0 { return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd) } var ( fd *sys.FD err error ) switch at { case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachTraceRawTp, ebpf.AttachModifyReturn, ebpf.AttachLSMMac: // Attach via BPF link fd, err = sys.LinkCreateTracing(&sys.LinkCreateTracingAttr{ ProgFd: uint32(program.FD()), AttachType: sys.AttachType(at), Cookie: cookie, }) if err == nil { break } if !errors.Is(err, unix.EINVAL) && !errors.Is(err, sys.ENOTSUPP) { return nil, fmt.Errorf("create tracing link: %w", err) } fallthrough case ebpf.AttachNone: // Attach via RawTracepointOpen if cookie > 0 { return nil, fmt.Errorf("create raw tracepoint with cookie: %w", ErrNotSupported) } fd, err = sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ ProgFd: uint32(program.FD()), }) if errors.Is(err, sys.ENOTSUPP) { // This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke. return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported) } if err != nil { return nil, fmt.Errorf("create raw tracepoint: %w", err) } default: return nil, fmt.Errorf("invalid attach type: %s", at.String()) } raw := RawLink{fd: fd} info, err := raw.Info() if err != nil { raw.Close() return nil, err } if info.Type == RawTracepointType { // Sadness upon sadness: a Tracing program with AttachRawTp returns // a raw_tracepoint link. Other types return a tracing link. return &rawTracepoint{raw}, nil } return &tracing{raw}, nil } // AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or // a BTF-powered raw tracepoint (tp_btf) BPF Program to a BPF hook defined // in kernel modules. func AttachTracing(opts TracingOptions) (Link, error) { if t := opts.Program.Type(); t != ebpf.Tracing { return nil, fmt.Errorf("invalid program type %s, expected Tracing", t) } switch opts.AttachType { case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachModifyReturn, ebpf.AttachTraceRawTp, ebpf.AttachNone: default: return nil, fmt.Errorf("invalid attach type: %s", opts.AttachType.String()) } return attachBTFID(opts.Program, opts.AttachType, opts.Cookie) } // AttachLSM links a Linux security module (LSM) BPF Program to a BPF // hook defined in kernel modules. func AttachLSM(opts LSMOptions) (Link, error) { if t := opts.Program.Type(); t != ebpf.LSM { return nil, fmt.Errorf("invalid program type %s, expected LSM", t) } return attachBTFID(opts.Program, ebpf.AttachLSMMac, opts.Cookie) } ================================================ FILE: link/tracing_test.go ================================================ //go:build !windows package link import ( "testing" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) func TestFreplace(t *testing.T) { testutils.SkipOnOldKernel(t, "5.10", "freplace") file := testutils.NativeFile(t, "../testdata/freplace-%s.elf") spec, err := ebpf.LoadCollectionSpec(file) if err != nil { t.Fatal("Can't parse ELF:", err) } target, err := ebpf.NewProgram(spec.Programs["sched_process_exec"]) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create target program:", err) } defer target.Close() // Test attachment specified at load time spec.Programs["replacement"].AttachTarget = target replacement, err := ebpf.NewProgram(spec.Programs["replacement"]) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create replacement program:", err) } defer replacement.Close() freplace, err := AttachFreplace(nil, "", replacement) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create freplace:", err) } testLink(t, freplace, replacement) } func TestFentryFexit(t *testing.T) { testutils.SkipOnOldKernel(t, "5.5", "fentry") spec, err := ebpf.LoadCollectionSpec(testutils.NativeFile(t, "../testdata/fentry_fexit-%s.elf")) if err != nil { t.Fatal("Can't parse ELF:", err) } target, err := ebpf.NewProgram(spec.Programs["target"]) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't create target program:", err) } defer target.Close() for _, name := range []string{"trace_on_entry", "trace_on_exit"} { progSpec := spec.Programs[name] t.Run(name, func(t *testing.T) { progSpec.AttachTarget = target prog, err := ebpf.NewProgram(progSpec) if err != nil { t.Fatal(err) } defer prog.Close() t.Run("link", func(t *testing.T) { testutils.SkipOnOldKernel(t, "5.11", "BPF_LINK_TYPE_TRACING") tracingLink, err := AttachTracing(TracingOptions{ Program: prog, }) if err != nil { t.Fatal("Can't attach tracing:", err) } defer tracingLink.Close() testLink(t, tracingLink, prog) }) }) } } func TestTracing(t *testing.T) { testutils.SkipOnOldKernel(t, "5.11", "BPF_LINK_TYPE_TRACING") tests := []struct { name string attachTo string programType ebpf.ProgramType programAttachType, attachTypeOpt ebpf.AttachType cookie uint64 }{ { name: "AttachTraceFEntry", attachTo: "inet_dgram_connect", programType: ebpf.Tracing, programAttachType: ebpf.AttachTraceFEntry, }, { name: "AttachTraceFEntry", attachTo: "inet_dgram_connect", programType: ebpf.Tracing, programAttachType: ebpf.AttachTraceFEntry, attachTypeOpt: ebpf.AttachTraceFEntry, cookie: 1, }, { name: "AttachTraceFEntry", attachTo: "inet_dgram_connect", programType: ebpf.Tracing, programAttachType: ebpf.AttachTraceFEntry, }, { name: "AttachTraceFExit", attachTo: "inet_dgram_connect", programType: ebpf.Tracing, programAttachType: ebpf.AttachTraceFExit, }, { name: "AttachModifyReturn", attachTo: "bpf_modify_return_test", programType: ebpf.Tracing, programAttachType: ebpf.AttachModifyReturn, }, { name: "AttachTraceRawTp", attachTo: "kfree_skb", programType: ebpf.Tracing, programAttachType: ebpf.AttachTraceRawTp, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { prog := mustLoadProgram(t, tt.programType, tt.programAttachType, tt.attachTo) opts := TracingOptions{Program: prog, AttachType: tt.attachTypeOpt, Cookie: tt.cookie} link, err := AttachTracing(opts) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } testLink(t, link, prog) if err = link.Close(); err != nil { t.Fatal(err) } }) } } func TestLSM(t *testing.T) { testutils.SkipOnOldKernel(t, "5.11", "BPF_LINK_TYPE_TRACING") prog := mustLoadProgram(t, ebpf.LSM, ebpf.AttachLSMMac, "file_mprotect") link, err := AttachLSM(LSMOptions{Program: prog}) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } testLink(t, link, prog) } ================================================ FILE: link/uprobe.go ================================================ //go:build !windows package link import ( "debug/elf" "errors" "fmt" "io/fs" "os" "sync" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/tracefs" ) var ( uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset" // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799 uprobeRefCtrOffsetShift = 32 haveRefCtrOffsetPMU = internal.NewFeatureTest("RefCtrOffsetPMU", func() error { _, err := os.Stat(uprobeRefCtrOffsetPMUPath) if errors.Is(err, os.ErrNotExist) { return internal.ErrNotSupported } if err != nil { return err } return nil }, "4.20") // ErrNoSymbol indicates that the given symbol was not found // in the ELF symbols table. ErrNoSymbol = errors.New("not found") ) const permExec fs.FileMode = 0111 // Executable defines an executable program on the filesystem. type Executable struct { // Path of the executable on the filesystem. path string // Cached symbol values for all ELF and dynamic symbols. // Before using this, lazyLoadSymbols() must be called. cachedSymbols map[string]symbol // cachedSymbolsOnce tracks the lazy load of cachedSymbols. cachedSymbolsOnce sync.Once } type symbol struct { addr uint64 size uint64 } // contains returns true if the given address falls within the range // covered by the symbol. func (s symbol) contains(address uint64) bool { return s.addr <= address && address < s.addr+s.size } // UprobeOptions defines additional parameters that will be used // when loading Uprobes. type UprobeOptions struct { // Symbol address. Must be provided in case of external symbols (shared libs). // If set, overrides the address eventually parsed from the executable. Address uint64 // The offset relative to given symbol. Useful when tracing an arbitrary point // inside the frame of given symbol. // // Note: this field changed from being an absolute offset to being relative // to Address. Offset uint64 // Only set the uprobe on the given process ID. Useful when tracing // shared library calls or programs that have many running instances. PID int // Automatically manage SDT reference counts (semaphores). // // If this field is set, the Kernel will increment/decrement the // semaphore located in the process memory at the provided address on // probe attach/detach. // // See also: // sourceware.org/systemtap/wiki/UserSpaceProbeImplementation (Semaphore Handling) // github.com/torvalds/linux/commit/1cc33161a83d // github.com/torvalds/linux/commit/a6ca88b241d5 RefCtrOffset uint64 // Arbitrary value that can be fetched from an eBPF program // via `bpf_get_attach_cookie()`. // // Needs kernel 5.15+. Cookie uint64 // Prefix used for the event name if the uprobe must be attached using tracefs. // The group name will be formatted as `_`. // The default empty string is equivalent to "ebpf" as the prefix. TraceFSPrefix string } func (uo *UprobeOptions) cookie() uint64 { if uo == nil { return 0 } return uo.Cookie } // To open a new Executable, use: // // OpenExecutable("/bin/bash") // // The returned value can then be used to open Uprobe(s). func OpenExecutable(path string) (*Executable, error) { if path == "" { return nil, fmt.Errorf("path cannot be empty") } info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("stat executable: %w", err) } if info.Mode()&permExec == 0 { return nil, fmt.Errorf("file %s is not executable", path) } return &Executable{ path: path, cachedSymbols: make(map[string]symbol), }, nil } func (ex *Executable) load(f *internal.SafeELFFile) error { syms, err := f.Symbols() if err != nil && !errors.Is(err, elf.ErrNoSymbols) { return err } dynsyms, err := f.DynamicSymbols() if err != nil && !errors.Is(err, elf.ErrNoSymbols) { return err } syms = append(syms, dynsyms...) for _, s := range syms { if elf.ST_TYPE(s.Info) != elf.STT_FUNC { // Symbol not associated with a function or other executable code. continue } address := s.Value // Loop over ELF segments. for _, prog := range f.Progs { // Skip uninteresting segments. if prog.Type != elf.PT_LOAD || (prog.Flags&elf.PF_X) == 0 { continue } if prog.Vaddr <= s.Value && s.Value < (prog.Vaddr+prog.Memsz) { // If the symbol value is contained in the segment, calculate // the symbol offset. // // fn symbol offset = fn symbol VA - .text VA + .text offset // // stackoverflow.com/a/40249502 address = s.Value - prog.Vaddr + prog.Off break } } ex.cachedSymbols[s.Name] = symbol{ addr: address, size: s.Size, } } return nil } func (ex *Executable) lazyLoadSymbols() error { var err error ex.cachedSymbolsOnce.Do(func() { var f *internal.SafeELFFile f, err = internal.OpenSafeELFFile(ex.path) if err != nil { err = fmt.Errorf("parse ELF file: %w", err) return } defer f.Close() if f.Type != elf.ET_EXEC && f.Type != elf.ET_DYN { // ELF is not an executable or a shared object. err = errors.New("the given file is not an executable or a shared object") return } err = ex.load(f) }) return err } // address calculates the address of a symbol in the executable. // // opts must not be nil. func (ex *Executable) address(symbol string, address, offset uint64) (uint64, error) { if address > 0 { return address + offset, nil } err := ex.lazyLoadSymbols() if err != nil { return 0, fmt.Errorf("lazy load symbols: %w", err) } sym, ok := ex.cachedSymbols[symbol] if !ok { return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol) } // Symbols with location 0 from section undef are shared library calls and // are relocated before the binary is executed. Dynamic linking is not // implemented by the library, so mark this as unsupported for now. // // Since only offset values are stored and not elf.Symbol, if the value is 0, // assume it's an external symbol. if sym.addr == 0 { return 0, fmt.Errorf("cannot resolve %s library call '%s': %w "+ "(consider providing UprobeOptions.Address)", ex.path, symbol, ErrNotSupported) } if offset >= sym.size { return 0, fmt.Errorf("offset %d is out of range of symbol %s", offset, symbol) } return sym.addr + offset, nil } // SymbolOffset represents an offset within a symbol within a binary. type SymbolOffset struct { Symbol string Offset uint64 } // Symbol returns the SymbolOffset that the given address points to. // This includes the symbol name and the offset within that symbol. // // If no symbol is found for the given address, ErrNoSymbol is returned. func (ex *Executable) Symbol(address uint64) (SymbolOffset, error) { if err := ex.lazyLoadSymbols(); err != nil { return SymbolOffset{}, fmt.Errorf("lazy load symbols: %w", err) } for name, symbol := range ex.cachedSymbols { if symbol.contains(address) { return SymbolOffset{name, address - symbol.addr}, nil } } return SymbolOffset{}, ErrNoSymbol } // Uprobe attaches the given eBPF program to a perf event that fires when the // given symbol starts executing in the given Executable. // For example, /bin/bash::main(): // // ex, _ = OpenExecutable("/bin/bash") // ex.Uprobe("main", prog, nil) // // When using symbols which belongs to shared libraries, // an offset must be provided via options: // // up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123}) // // Note: Setting the Offset field in the options supersedes the symbol's offset. // // Losing the reference to the resulting Link (up) will close the Uprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. // // Functions provided by shared libraries can currently not be traced and // will result in an ErrNotSupported. // // The returned Link may implement [PerfEvent]. func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { u, err := ex.uprobe(symbol, prog, opts, false) if err != nil { return nil, err } lnk, err := attachPerfEvent(u, prog, opts.cookie()) if err != nil { u.Close() return nil, err } return lnk, nil } // Uretprobe attaches the given eBPF program to a perf event that fires right // before the given symbol exits. For example, /bin/bash::main(): // // ex, _ = OpenExecutable("/bin/bash") // ex.Uretprobe("main", prog, nil) // // When using symbols which belongs to shared libraries, // an offset must be provided via options: // // up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123}) // // Note: Setting the Offset field in the options supersedes the symbol's offset. // // Losing the reference to the resulting Link (up) will close the Uprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. // // Functions provided by shared libraries can currently not be traced and // will result in an ErrNotSupported. // // The returned Link may implement [PerfEvent]. func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { u, err := ex.uprobe(symbol, prog, opts, true) if err != nil { return nil, err } lnk, err := attachPerfEvent(u, prog, opts.cookie()) if err != nil { u.Close() return nil, err } return lnk, nil } // uprobe opens a perf event for the given binary/symbol and attaches prog to it. // If ret is true, create a uretprobe. func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions, ret bool) (*perfEvent, error) { if prog == nil { return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) } if prog.Type() != ebpf.Kprobe { return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput) } if opts == nil { opts = &UprobeOptions{} } offset, err := ex.address(symbol, opts.Address, opts.Offset) if err != nil { return nil, err } pid := opts.PID if pid == 0 { pid = perfAllThreads } if opts.RefCtrOffset != 0 { if err := haveRefCtrOffsetPMU(); err != nil { return nil, fmt.Errorf("uprobe ref_ctr_offset: %w", err) } } args := tracefs.ProbeArgs{ Type: tracefs.Uprobe, Symbol: symbol, Path: ex.path, Offset: offset, Pid: pid, RefCtrOffset: opts.RefCtrOffset, Ret: ret, Cookie: opts.Cookie, Group: opts.TraceFSPrefix, } // Use uprobe PMU if the kernel has it available. tp, err := pmuProbe(args) if err == nil { return tp, nil } if !errors.Is(err, ErrNotSupported) { return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err) } // Use tracefs if uprobe PMU is missing. tp, err = tracefsProbe(args) if err != nil { return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err) } return tp, nil } ================================================ FILE: link/uprobe_multi.go ================================================ //go:build !windows package link import ( "errors" "fmt" "os" "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // UprobeMultiOptions defines additional parameters that will be used // when opening a UprobeMulti Link. type UprobeMultiOptions struct { // Symbol addresses. If set, overrides the addresses eventually parsed from // the executable. Mutually exclusive with UprobeMulti's symbols argument. Addresses []uint64 // Offsets into functions provided by UprobeMulti's symbols argument. // For example: to set uprobes to main+5 and _start+10, call UprobeMulti // with: // symbols: "main", "_start" // opt.Offsets: 5, 10 Offsets []uint64 // Optional list of associated ref counter offsets. RefCtrOffsets []uint64 // Optional list of associated BPF cookies. Cookies []uint64 // Only set the uprobe_multi link on the given process ID, zero PID means // system-wide. PID uint32 } func (ex *Executable) UprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions) (Link, error) { return ex.uprobeMulti(symbols, prog, opts, 0) } func (ex *Executable) UretprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions) (Link, error) { // The return probe is not limited for symbols entry, so there's no special // setup for return uprobes (other than the extra flag). The symbols, opts.Offsets // and opts.Addresses arrays follow the same logic as for entry uprobes. return ex.uprobeMulti(symbols, prog, opts, sys.BPF_F_UPROBE_MULTI_RETURN) } func (ex *Executable) uprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions, flags uint32) (Link, error) { if prog == nil { return nil, errors.New("cannot attach a nil program") } if opts == nil { opts = &UprobeMultiOptions{} } addresses, err := ex.addresses(symbols, opts.Addresses, opts.Offsets) if err != nil { return nil, err } addrs := len(addresses) cookies := len(opts.Cookies) refCtrOffsets := len(opts.RefCtrOffsets) if addrs == 0 { return nil, fmt.Errorf("field Addresses is required: %w", errInvalidInput) } if refCtrOffsets > 0 && refCtrOffsets != addrs { return nil, fmt.Errorf("field RefCtrOffsets must be exactly Addresses in length: %w", errInvalidInput) } if cookies > 0 && cookies != addrs { return nil, fmt.Errorf("field Cookies must be exactly Addresses in length: %w", errInvalidInput) } attr := &sys.LinkCreateUprobeMultiAttr{ Path: sys.NewStringPointer(ex.path), ProgFd: uint32(prog.FD()), AttachType: sys.BPF_TRACE_UPROBE_MULTI, UprobeMultiFlags: flags, Count: uint32(addrs), Offsets: sys.SlicePointer(addresses), Pid: opts.PID, } if refCtrOffsets != 0 { attr.RefCtrOffsets = sys.SlicePointer(opts.RefCtrOffsets) } if cookies != 0 { attr.Cookies = sys.SlicePointer(opts.Cookies) } fd, err := sys.LinkCreateUprobeMulti(attr) if errors.Is(err, unix.ESRCH) { return nil, fmt.Errorf("%w (specified pid not found?)", os.ErrNotExist) } // Since Linux commit 46ba0e49b642 ("bpf: fix multi-uprobe PID filtering // logic"), if the provided pid overflows MaxInt32 (turning it negative), the // kernel will return EINVAL instead of ESRCH. if errors.Is(err, unix.EINVAL) { return nil, fmt.Errorf("%w (invalid pid, missing symbol or prog's AttachType not AttachTraceUprobeMulti?)", err) } if err != nil { if haveFeatErr := features.HaveBPFLinkUprobeMulti(); haveFeatErr != nil { return nil, haveFeatErr } return nil, err } return &uprobeMultiLink{RawLink{fd, ""}}, nil } func (ex *Executable) addresses(symbols []string, addresses, offsets []uint64) ([]uint64, error) { n := len(symbols) if n == 0 { n = len(addresses) } if n == 0 { return nil, fmt.Errorf("%w: neither symbols nor addresses given", errInvalidInput) } if symbols != nil && len(symbols) != n { return nil, fmt.Errorf("%w: have %d symbols but want %d", errInvalidInput, len(symbols), n) } if addresses != nil && len(addresses) != n { return nil, fmt.Errorf("%w: have %d addresses but want %d", errInvalidInput, len(addresses), n) } if offsets != nil && len(offsets) != n { return nil, fmt.Errorf("%w: have %d offsets but want %d", errInvalidInput, len(offsets), n) } results := make([]uint64, 0, n) for i := 0; i < n; i++ { var sym string if symbols != nil { sym = symbols[i] } var addr, off uint64 if addresses != nil { addr = addresses[i] } if offsets != nil { off = offsets[i] } result, err := ex.address(sym, addr, off) if err != nil { return nil, err } results = append(results, result) } return results, nil } type uprobeMultiLink struct { RawLink } var _ Link = (*uprobeMultiLink)(nil) func (kml *uprobeMultiLink) Update(_ *ebpf.Program) error { return fmt.Errorf("update uprobe_multi: %w", ErrNotSupported) } func (kml *uprobeMultiLink) Info() (*Info, error) { var info sys.UprobeMultiLinkInfo if err := sys.ObjInfo(kml.fd, &info); err != nil { return nil, fmt.Errorf("uprobe multi link info: %s", err) } var ( path = make([]byte, info.PathSize) refCtrOffsets = make([]uint64, info.Count) addrs = make([]uint64, info.Count) cookies = make([]uint64, info.Count) ) info = sys.UprobeMultiLinkInfo{ Path: sys.SlicePointer(path), PathSize: uint32(len(path)), Offsets: sys.SlicePointer(addrs), RefCtrOffsets: sys.SlicePointer(refCtrOffsets), Cookies: sys.SlicePointer(cookies), Count: uint32(len(addrs)), } if err := sys.ObjInfo(kml.fd, &info); err != nil { return nil, fmt.Errorf("uprobe multi link info: %s", err) } if info.Path.IsNil() { path = nil } if info.Cookies.IsNil() { cookies = nil } if info.Offsets.IsNil() { addrs = nil } if info.RefCtrOffsets.IsNil() { refCtrOffsets = nil } extra := &UprobeMultiInfo{ Count: info.Count, Flags: info.Flags, pid: info.Pid, offsets: addrs, cookies: cookies, refCtrOffsets: refCtrOffsets, File: unix.ByteSliceToString(path), } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/uprobe_multi_test.go ================================================ //go:build !windows package link import ( "errors" "math" "os" "os/exec" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/internal/testutils" ) func TestUprobeMulti(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkUprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceUprobeMulti, "") // uprobe um, err := bashEx.UprobeMulti(bashSyms, prog, nil) if err != nil { t.Fatal(err) } testLink(t, um, prog) _ = um.Close() // uretprobe um, err = bashEx.UretprobeMulti(bashSyms, prog, nil) if err != nil { t.Fatal(err) } testLink(t, um, prog) _ = um.Close() } func TestUprobeMultiInfo(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkKprobeMulti()) testutils.SkipOnOldKernel(t, "6.8", "bpf_link_info_uprobe_multi") prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceUprobeMulti, "") // uprobe um, err := bashEx.UprobeMulti(bashSyms, prog, nil) if err != nil { t.Fatal(err) } defer um.Close() linkInfo, err := um.Info() if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(linkInfo.Type, UprobeMultiType)) uprobeDetails := linkInfo.UprobeMulti() // On some platforms, /bin/bash may point to /usr/bin/bash, thus only a contains and no equals check qt.Assert(t, qt.StringContains(uprobeDetails.File, bashEx.path)) uprobeOffsets, ok := uprobeDetails.Offsets() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.HasLen(uprobeOffsets, len(bashSyms))) bashElf, err := OpenExecutable(uprobeDetails.File) qt.Assert(t, qt.IsNil(err)) var symnames = make([]string, len(uprobeOffsets)) for i, offset := range uprobeOffsets { symOffset, err := bashElf.Symbol(offset.Offset) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(symOffset.Offset, 0)) symnames[i] = symOffset.Symbol } qt.Assert(t, qt.ContentEquals(symnames, bashSyms)) } func TestUprobeMultiInput(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkUprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceUprobeMulti, "") // Always doing same test for both uprobe and uretprobe // One of symbols or offsets must be given. _, err := bashEx.UprobeMulti([]string{}, prog, nil) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = bashEx.UretprobeMulti([]string{}, prog, nil) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // One address, two cookies. _, err = bashEx.UprobeMulti([]string{}, prog, &UprobeMultiOptions{ Addresses: []uint64{1}, Cookies: []uint64{2, 3}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = bashEx.UretprobeMulti([]string{}, prog, &UprobeMultiOptions{ Addresses: []uint64{1}, Cookies: []uint64{2, 3}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // Two addresses, one refctr offset. _, err = bashEx.UprobeMulti([]string{}, prog, &UprobeMultiOptions{ Addresses: []uint64{1, 2}, RefCtrOffsets: []uint64{4}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = bashEx.UretprobeMulti([]string{}, prog, &UprobeMultiOptions{ Addresses: []uint64{1, 2}, RefCtrOffsets: []uint64{4}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // It's either symbols or addresses. _, err = bashEx.UprobeMulti(bashSyms, prog, &UprobeMultiOptions{ Addresses: []uint64{1}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = bashEx.UretprobeMulti(bashSyms, prog, &UprobeMultiOptions{ Addresses: []uint64{1}, }) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // No addresses and no symbols _, err = bashEx.UprobeMulti([]string{}, prog, nil) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) _, err = bashEx.UretprobeMulti([]string{}, prog, nil) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // PID not found _, err = bashEx.UprobeMulti(bashSyms, prog, &UprobeMultiOptions{ // pid_t is int32, overflowing it will return EINVAL. PID: math.MaxInt32, }) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) _, err = bashEx.UretprobeMulti(bashSyms, prog, &UprobeMultiOptions{ PID: math.MaxInt32, }) qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) } func TestUprobeMultiResolveOk(t *testing.T) { addrSym1, err := bashEx.address(bashSyms[0], 0, 0) qt.Assert(t, qt.IsNil(err)) addrSym2, err := bashEx.address(bashSyms[1], 0, 0) qt.Assert(t, qt.IsNil(err)) addrSym3, err := bashEx.address(bashSyms[2], 0, 0) qt.Assert(t, qt.IsNil(err)) addrs, err := bashEx.addresses(bashSyms, nil, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(addrs, []uint64{addrSym1, addrSym2, addrSym3})) addrs, err = bashEx.addresses(bashSyms, nil, []uint64{5, 10, 11}) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(addrs, []uint64{addrSym1 + 5, addrSym2 + 10, addrSym3 + 11})) addrs, err = bashEx.addresses(bashSyms, []uint64{1, 2, 3}, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(addrs, []uint64{1, 2, 3})) } func TestUprobeMultiResolveFail(t *testing.T) { // No input _, err := bashEx.addresses(nil, nil, nil) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // Different dimensions for Addresses and Offsets _, err = bashEx.addresses(nil, []uint64{100, 200}, []uint64{5, 10, 11}) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) // Different dimensions for symbols and Offsets _, err = bashEx.addresses(bashSyms, nil, []uint64{5, 10}) qt.Assert(t, qt.ErrorIs(err, errInvalidInput)) } func TestUprobeMultiCookie(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkUprobeMulti()) prog := mustLoadProgram(t, ebpf.Kprobe, ebpf.AttachTraceUprobeMulti, "") // uprobe um, err := bashEx.UprobeMulti(bashSyms, prog, &UprobeMultiOptions{ Cookies: []uint64{1, 2, 3}, }) if err != nil { t.Fatal(err) } _ = um.Close() // uretprobe um, err = bashEx.UretprobeMulti(bashSyms, prog, &UprobeMultiOptions{ Cookies: []uint64{3, 2, 1}, }) if err != nil { t.Fatal(err) } _ = um.Close() } func TestUprobeMultiProgramCall(t *testing.T) { testutils.SkipIfNotSupported(t, features.HaveBPFLinkUprobeMulti()) // We execute 'bash --help' args := []string{"--help"} elf := "/bin/bash" test := func(retprobe bool, expected uint32) { m, p := newUpdaterMapProg(t, ebpf.Kprobe, ebpf.AttachTraceUprobeMulti) var err error // Load the executable. ex, err := OpenExecutable(elf) if err != nil { t.Fatal(err) } var um Link // Open UprobeMulti on the executable for the given symbol // and attach it to the ebpf program created above. if retprobe { um, err = ex.UretprobeMulti(bashSyms, p, nil) } else { um, err = ex.UprobeMulti(bashSyms, p, nil) } if errors.Is(err, ErrNoSymbol) { // Assume bash_Syms symbols always exist and skip the test // if the symbol can't be found as certain OS (eg. Debian) // strip binaries. t.Skipf("executable %s appear to be stripped, skipping", elf) } if err != nil { t.Fatal(err) } // Trigger ebpf program call. trigger := func(t *testing.T) { if err := exec.Command(elf, args...).Run(); err != nil { t.Fatal(err) } } trigger(t) // Detach link. if err := um.Close(); err != nil { t.Fatal(err) } assertMapValueGE(t, m, 0, expected) // Reset map value to 0 at index 0. if err := m.Update(uint32(0), uint32(0), ebpf.UpdateExist); err != nil { t.Fatal(err) } // Retrigger the ebpf program call. trigger(t) // Assert that this time the value has not been updated. assertMapValue(t, m, 0, 0) } // all 3 uprobes should trigger for entry uprobes test(false, 3) // We have return uprobe installed on main, _start and check_dev_tty // functions, but only check_dev_tty is triggered, because 'bash --help' // calls exit(0). test(true, 1) } ================================================ FILE: link/uprobe_test.go ================================================ //go:build !windows package link import ( "errors" "go/build" "os" "os/exec" "path" "path/filepath" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) var ( bashEx, _ = OpenExecutable("/bin/bash") bashSyms = []string{"main", "_start", "check_dev_tty"} bashSym = bashSyms[0] ) func TestExecutable(t *testing.T) { _, err := OpenExecutable("") if err == nil { t.Fatal("create executable: expected error on empty path") } _, err = OpenExecutable("/non/existent/path") if err == nil { t.Fatal("create executable: expected error on non-existent path") } var pe *os.PathError qt.Assert(t, qt.ErrorAs(err, &pe)) // create temp non-executable file dir := t.TempDir() path := filepath.Join(dir, "file.txt") err = os.WriteFile(path, []byte("hello"), 0600) if err != nil { t.Fatalf("write file: %v", err) } _, err = OpenExecutable(path) if err == nil { t.Fatal("create executable: expected error on non-executable file") } // make it executable err = os.Chmod(path, 0700) if err != nil { t.Fatalf("chmod file: %v", err) } _, err = OpenExecutable(path) if err != nil { t.Fatalf("create executable: %v", err) } if bashEx.path != "/bin/bash" { t.Fatalf("create executable: unexpected path '%s'", bashEx.path) } _, err = bashEx.address(bashSym, 0, 0) if err != nil { t.Fatalf("find offset: %v", err) } _, err = bashEx.address("bogus", 0, 0) if err == nil { t.Fatal("find symbol: expected error") } } func TestExecutableOffset(t *testing.T) { symbolOffset, err := bashEx.address(bashSym, 0, 0) if err != nil { t.Fatal(err) } offset, err := bashEx.address(bashSym, 0x1, 0) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(offset, 0x1)) offset, err = bashEx.address(bashSym, 0, 0x2) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(offset, symbolOffset+0x2)) offset, err = bashEx.address(bashSym, 0x1, 0x2) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(offset, 0x1+0x2)) } func TestExecutableLazyLoadSymbols(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") ex, err := OpenExecutable("/bin/bash") qt.Assert(t, qt.IsNil(err)) // Addresses must be empty, will be lazy loaded. qt.Assert(t, qt.HasLen(ex.cachedSymbols, 0)) prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") // Address must be a multiple of 4 on arm64, see // https://elixir.bootlin.com/linux/v6.6.4/source/arch/arm64/kernel/probes/uprobes.c#L42 up, err := ex.Uprobe(bashSym, prog, &UprobeOptions{Address: 124}) qt.Assert(t, qt.IsNil(err)) up.Close() // Addresses must still be empty as Address has been provided via options. qt.Assert(t, qt.HasLen(ex.cachedSymbols, 0)) up, err = ex.Uprobe(bashSym, prog, nil) qt.Assert(t, qt.IsNil(err)) up.Close() // Symbol table should be loaded. qt.Assert(t, qt.Not(qt.HasLen(ex.cachedSymbols, 0))) } func TestUprobe(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") up, err := bashEx.Uprobe(bashSym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer up.Close() testLink(t, up, prog) } func TestUprobeInfo(t *testing.T) { testutils.SkipOnOldKernel(t, "6.6", "bpf_link_info_perf_event") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") up, err := bashEx.Uprobe(bashSym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer up.Close() info, err := up.Info() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(info.Type, PerfEventType)) eventInfo := info.PerfEvent() qt.Assert(t, qt.Equals(eventInfo.Type, PerfEventUprobe)) uprobeInfo := eventInfo.Uprobe() qt.Assert(t, qt.StringContains(uprobeInfo.File, bashEx.path)) executable, err := OpenExecutable(uprobeInfo.File) qt.Assert(t, qt.IsNil(err)) sym, err := executable.Symbol(uint64(uprobeInfo.Offset)) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(sym.Symbol, bashSym)) qt.Assert(t, qt.Equals(sym.Offset, 0)) } func TestUprobeExtNotFound(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") // This symbol will not be present in Executable (elf.SHN_UNDEF). _, err := bashEx.Uprobe("open", prog, nil) if err == nil { t.Fatal("expected error") } } func TestUprobeExtWithOpts(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") // NB: It's not possible to invoke the uprobe since we use an arbitrary // address. up, err := bashEx.Uprobe("open", prog, &UprobeOptions{ // arm64 requires the addresses to be aligned (a multiple of 4) Address: 0x4, }) if err != nil { t.Fatal(err) } defer up.Close() } func TestUprobeWithPID(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") up, err := bashEx.Uprobe(bashSym, prog, &UprobeOptions{PID: os.Getpid()}) if err != nil { t.Fatal(err) } defer up.Close() } func TestUprobeWithNonExistentPID(t *testing.T) { prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") // trying to open a perf event on a non-existent PID will return ESRCH. _, err := bashEx.Uprobe(bashSym, prog, &UprobeOptions{PID: -2}) if !errors.Is(err, unix.ESRCH) { t.Fatalf("expected ESRCH, got %v", err) } } func TestUretprobe(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") prog := mustLoadProgram(t, ebpf.Kprobe, 0, "") up, err := bashEx.Uretprobe(bashSym, prog, nil) qt.Assert(t, qt.IsNil(err)) defer up.Close() testLink(t, up, prog) } // Test u(ret)probe creation using perf_uprobe PMU. func TestUprobeCreatePMU(t *testing.T) { // Requires at least 4.17 (e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU") testutils.SkipOnOldKernel(t, "4.17", "perf_kprobe PMU") // Fetch the offset from the /bin/bash Executable already defined. off, err := bashEx.address(bashSym, 0, 0) qt.Assert(t, qt.IsNil(err)) // Prepare probe args. args := tracefs.ProbeArgs{ Type: tracefs.Uprobe, Symbol: bashSym, Path: bashEx.path, Offset: off, Pid: perfAllThreads, } // uprobe PMU pu, err := pmuProbe(args) qt.Assert(t, qt.IsNil(err)) defer pu.Close() // uretprobe PMU args.Ret = true pr, err := pmuProbe(args) qt.Assert(t, qt.IsNil(err)) defer pr.Close() } // Test fallback behaviour on kernels without perf_uprobe PMU available. func TestUprobePMUUnavailable(t *testing.T) { // Fetch the offset from the /bin/bash Executable already defined. off, err := bashEx.address(bashSym, 0, 0) qt.Assert(t, qt.IsNil(err)) // Prepare probe args. args := tracefs.ProbeArgs{ Type: tracefs.Uprobe, Symbol: bashSym, Path: bashEx.path, Offset: off, Pid: perfAllThreads, } pk, err := pmuProbe(args) if err == nil { pk.Close() t.Skipf("Kernel supports perf_uprobe PMU, not asserting error.") } // Expect ErrNotSupported. qt.Assert(t, qt.ErrorIs(err, ErrNotSupported), qt.Commentf("got error: %s", err)) } // Test tracefs u(ret)probe creation on all kernel versions. func TestUprobeTraceFS(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") // Fetch the offset from the /bin/bash Executable already defined. off, err := bashEx.address(bashSym, 0, 0) qt.Assert(t, qt.IsNil(err)) // Prepare probe args. args := tracefs.ProbeArgs{ Type: tracefs.Uprobe, Symbol: bashSym, Path: bashEx.path, Offset: off, Pid: perfAllThreads, } // Open and close tracefs u(ret)probes, checking all errors. up, err := tracefsProbe(args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(up.Close())) args.Ret = true up, err = tracefsProbe(args) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(up.Close())) // Create two identical trace events, ensure their IDs differ. args.Ret = false u1, err := tracefsProbe(args) qt.Assert(t, qt.IsNil(err)) defer u1.Close() qt.Assert(t, qt.IsNotNil(u1.tracefsEvent)) u2, err := tracefsProbe(args) qt.Assert(t, qt.IsNil(err)) defer u2.Close() qt.Assert(t, qt.IsNotNil(u2.tracefsEvent)) // Compare the uprobes' tracefs IDs. qt.Assert(t, qt.Not(qt.Equals(u1.tracefsEvent.ID(), u2.tracefsEvent.ID()))) // Expect an error when supplying an invalid custom group name args.Group = "/" _, err = tracefsProbe(args) qt.Assert(t, qt.Not(qt.IsNil(err))) args.Group = "customgroup" u3, err := tracefsProbe(args) qt.Assert(t, qt.IsNil(err)) defer u3.Close() qt.Assert(t, qt.Matches(u3.tracefsEvent.Group(), `customgroup_[a-f0-9]{16}`)) } func TestUprobeProgramCall(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") tests := []struct { name string elf string args []string sym string }{ { "bash", "/bin/bash", []string{"--help"}, "main", }, { "go-binary", path.Join(build.Default.GOROOT, "bin/go"), []string{"version"}, "main.main", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if tt.name == "go-binary" { // https://github.com/cilium/ebpf/issues/406 testutils.SkipOnOldKernel(t, "4.14", "uprobes on Go binaries silently fail on kernel < 4.14") } m, p := newUpdaterMapProg(t, ebpf.Kprobe, 0) // Load the executable. ex, err := OpenExecutable(tt.elf) if err != nil { t.Fatal(err) } // Open Uprobe on the executable for the given symbol // and attach it to the ebpf program created above. u, err := ex.Uprobe(tt.sym, p, nil) if errors.Is(err, ErrNoSymbol) { // Assume bash::main and go::main.main always exists // and skip the test if the symbol can't be found as // certain OS (eg. Debian) strip binaries. t.Skipf("executable %s appear to be stripped, skipping", tt.elf) } if err != nil { t.Fatal(err) } // Trigger ebpf program call. trigger := func(t *testing.T) { if err := exec.Command(tt.elf, tt.args...).Run(); err != nil { t.Fatal(err) } } trigger(t) // Assert that the value got incremented to at least 1, while allowing // for bigger values, because we could race with other bash execution. assertMapValueGE(t, m, 0, 1) // Detach the Uprobe. if err := u.Close(); err != nil { t.Fatal(err) } // Reset map value to 0 at index 0. if err := m.Update(uint32(0), uint32(0), ebpf.UpdateExist); err != nil { t.Fatal(err) } // Retrigger the ebpf program call. trigger(t) // Assert that this time the value has not been updated. assertMapValue(t, m, 0, 0) }) } } func TestUprobeProgramWrongPID(t *testing.T) { testutils.SkipOnOldKernel(t, "4.14", "uprobe on v4.9 returns EIO on vimto") m, p := newUpdaterMapProg(t, ebpf.Kprobe, 0) // Load the '/bin/bash' executable. ex, err := OpenExecutable("/bin/bash") if err != nil { t.Fatal(err) } // Open Uprobe on '/bin/bash' for the symbol 'main' // and attach it to the ebpf program created above. // Create the perf-event with the current process' PID // to make sure the event is not fired when we will try // to trigger the program execution via exec. u, err := ex.Uprobe("main", p, &UprobeOptions{PID: os.Getpid()}) if err != nil { t.Fatal(err) } defer u.Close() // Trigger ebpf program call. if err := exec.Command("/bin/bash", "--help").Run(); err != nil { t.Fatal(err) } // Assert that the value at index 0 is still 0. assertMapValue(t, m, 0, 0) } func TestHaveRefCtrOffsetPMU(t *testing.T) { testutils.CheckFeatureTest(t, haveRefCtrOffsetPMU) } ================================================ FILE: link/xdp.go ================================================ //go:build !windows package link import ( "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) // XDPAttachFlags represents how XDP program will be attached to interface. type XDPAttachFlags uint32 const ( // XDPGenericMode (SKB) links XDP BPF program for drivers which do // not yet support native XDP. XDPGenericMode XDPAttachFlags = 1 << (iota + 1) // XDPDriverMode links XDP BPF program into the driver’s receive path. XDPDriverMode // XDPOffloadMode offloads the entire XDP BPF program into hardware. XDPOffloadMode ) type XDPOptions struct { // Program must be an XDP BPF program. Program *ebpf.Program // Interface is the interface index to attach program to. Interface int // Flags is one of XDPAttachFlags (optional). // // Only one XDP mode should be set, without flag defaults // to driver/generic mode (best effort). Flags XDPAttachFlags } // AttachXDP links an XDP BPF program to an XDP hook. func AttachXDP(opts XDPOptions) (Link, error) { if t := opts.Program.Type(); t != ebpf.XDP { return nil, fmt.Errorf("invalid program type %s, expected XDP", t) } if opts.Interface < 1 { return nil, fmt.Errorf("invalid interface index: %d", opts.Interface) } rawLink, err := AttachRawLink(RawLinkOptions{ Program: opts.Program, Attach: ebpf.AttachXDP, Target: opts.Interface, Flags: uint32(opts.Flags), }) if err != nil { return nil, fmt.Errorf("failed to attach link: %w", err) } return &xdpLink{*rawLink}, nil } type xdpLink struct { RawLink } func (xdp *xdpLink) Info() (*Info, error) { var info sys.XDPLinkInfo if err := sys.ObjInfo(xdp.fd, &info); err != nil { return nil, fmt.Errorf("xdp link info: %s", err) } extra := &XDPInfo{ Ifindex: info.Ifindex, } return &Info{ info.Type, info.Id, ebpf.ProgramID(info.ProgId), extra, }, nil } ================================================ FILE: link/xdp_test.go ================================================ //go:build !windows package link import ( "math" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/testutils" ) const IfIndexLO = 1 func TestAttachXDP(t *testing.T) { testutils.SkipOnOldKernel(t, "5.9", "BPF_LINK_TYPE_XDP") prog := mustLoadProgram(t, ebpf.XDP, 0, "") _, err := AttachXDP(XDPOptions{ Program: prog, Interface: math.MaxInt, }) qt.Assert(t, qt.IsNotNil(err)) l, err := AttachXDP(XDPOptions{ Program: prog, Interface: IfIndexLO, }) qt.Assert(t, qt.IsNil(err)) info, err := l.Info() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(info.XDP().Ifindex, IfIndexLO)) testLink(t, l, prog) } ================================================ FILE: linker.go ================================================ package ebpf import ( "debug/elf" "encoding/binary" "errors" "fmt" "io" "io/fs" "math" "slices" "strings" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/kallsyms" "github.com/cilium/ebpf/internal/platform" ) // handles stores handle objects to avoid gc cleanup type handles []*btf.Handle func (hs *handles) add(h *btf.Handle) (int, error) { if h == nil { return 0, nil } if len(*hs) == math.MaxInt16 { return 0, fmt.Errorf("can't add more than %d module FDs to fdArray", math.MaxInt16) } *hs = append(*hs, h) // return length of slice so that indexes start at 1 return len(*hs), nil } func (hs handles) fdArray() []int32 { // first element of fda is reserved as no module can be indexed with 0 fda := []int32{0} for _, h := range hs { fda = append(fda, int32(h.FD())) } return fda } func (hs *handles) Close() error { var errs []error for _, h := range *hs { errs = append(errs, h.Close()) } return errors.Join(errs...) } // The linker is responsible for resolving bpf-to-bpf calls between programs // within an ELF. Each BPF program must be a self-contained binary blob, // so when an instruction in one ELF program section wants to jump to // a function in another, the linker needs to pull in the bytecode // (and BTF info) of the target function and concatenate the instruction // streams. // // Later on in the pipeline, all call sites are fixed up with relative jumps // within this newly-created instruction stream to then finally hand off to // the kernel with BPF_PROG_LOAD. // // Each function is denoted by an ELF symbol and the compiler takes care of // register setup before each jump instruction. // hasFunctionReferences returns true if insns contains one or more bpf2bpf // function references. func hasFunctionReferences(insns asm.Instructions) bool { for _, i := range insns { if i.IsFunctionReference() { return true } } return false } // applyRelocations collects and applies any CO-RE relocations in insns. // // insns are modified in place. func applyRelocations(insns asm.Instructions, bo binary.ByteOrder, b *btf.Builder, c *btf.Cache, kernelOverride *btf.Spec, extraTargets []*btf.Spec) error { var relos []*btf.CORERelocation var reloInsns []*asm.Instruction iter := insns.Iterate() for iter.Next() { if relo := btf.CORERelocationMetadata(iter.Ins); relo != nil { relos = append(relos, relo) reloInsns = append(reloInsns, iter.Ins) } } if len(relos) == 0 { return nil } if bo == nil { bo = internal.NativeEndian } var targets []*btf.Spec if kernelOverride == nil { kernel, err := c.Kernel() if err != nil { return fmt.Errorf("load kernel spec: %w", err) } modules, err := c.Modules() // Ignore ErrNotExists to cater to kernels which have CONFIG_DEBUG_INFO_BTF_MODULES // or CONFIG_DEBUG_INFO_BTF disabled. if err != nil && !errors.Is(err, fs.ErrNotExist) { return err } targets = make([]*btf.Spec, 0, 1+len(modules)+len(extraTargets)) targets = append(targets, kernel) for _, kmod := range modules { spec, err := c.Module(kmod) if err != nil { return fmt.Errorf("load BTF for kmod %s: %w", kmod, err) } targets = append(targets, spec) } } else { // We expect kernelOverride to contain the merged types // of vmlinux and kernel modules, as distributed by btfhub. targets = []*btf.Spec{kernelOverride} } targets = append(targets, extraTargets...) fixups, err := btf.CORERelocate(relos, targets, bo, b.Add) if err != nil { return err } for i, fixup := range fixups { if err := fixup.Apply(reloInsns[i]); err != nil { return fmt.Errorf("fixup for %s: %w", relos[i], err) } } return nil } // flattenPrograms resolves bpf-to-bpf calls for a set of programs. // // Links all programs in names by modifying their ProgramSpec in progs. func flattenPrograms(progs map[string]*ProgramSpec, names []string) { // Pre-calculate all function references. refs := make(map[*ProgramSpec][]string) for _, prog := range progs { refs[prog] = prog.Instructions.FunctionReferences() } // Create a flattened instruction stream, but don't modify progs yet to // avoid linking multiple times. flattened := make([]asm.Instructions, 0, len(names)) for _, name := range names { flattened = append(flattened, flattenInstructions(name, progs, refs)) } // Finally, assign the flattened instructions. for i, name := range names { progs[name].Instructions = flattened[i] } } // flattenInstructions resolves bpf-to-bpf calls for a single program. // // Flattens the instructions of prog by concatenating the instructions of all // direct and indirect dependencies. // // progs contains all referenceable programs, while refs contain the direct // dependencies of each program. func flattenInstructions(name string, progs map[string]*ProgramSpec, refs map[*ProgramSpec][]string) asm.Instructions { prog := progs[name] progRefs := refs[prog] if len(progRefs) == 0 { // No references, nothing to do. return prog.Instructions } insns := make(asm.Instructions, len(prog.Instructions)) copy(insns, prog.Instructions) // Add all direct references of prog to the list of to be linked programs. pending := make([]string, len(progRefs)) copy(pending, progRefs) // All references for which we've appended instructions. linked := make(map[string]bool) // Iterate all pending references. We can't use a range since pending is // modified in the body below. for len(pending) > 0 { var ref string ref, pending = pending[0], pending[1:] if linked[ref] { // We've already linked this ref, don't append instructions again. continue } progRef := progs[ref] if progRef == nil { // We don't have instructions that go with this reference. This // happens when calling extern functions. continue } insns = append(insns, progRef.Instructions...) linked[ref] = true // Make sure we link indirect references. pending = append(pending, refs[progRef]...) } return insns } // fixupAndValidate is called by the ELF reader right before marshaling the // instruction stream. It performs last-minute adjustments to the program and // runs some sanity checks before sending it off to the kernel. func fixupAndValidate(insns asm.Instructions) error { iter := insns.Iterate() for iter.Next() { ins := iter.Ins // Map load was tagged with a Reference, but does not contain a Map pointer. needsMap := ins.Reference() != "" || ins.Metadata.Get(kconfigMetaKey{}) != nil if ins.IsLoadFromMap() && needsMap && ins.Map() == nil { return fmt.Errorf("instruction %d: %w", iter.Index, asm.ErrUnsatisfiedMapReference) } fixupProbeReadKernel(ins) } return nil } // A constant used to poison calls to non-existent kfuncs. // // Similar POISON_CALL_KFUNC_BASE in libbpf, except that we use a value lower // than 2^28 to fit into a tagged constant. const kfuncCallPoisonBase = 0xdedc0de // fixupKfuncs loops over all instructions in search for kfunc calls. // If at least one is found, the current kernels BTF and module BTFis are searched to set Instruction.Constant // and Instruction.Offset to the correct values. func fixupKfuncs(insns asm.Instructions, cache *btf.Cache) (_ handles, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() } } iter := insns.Iterate() for iter.Next() { ins := iter.Ins if metadata := ins.Metadata.Get(kfuncMetaKey{}); metadata != nil { goto fixups } } return nil, nil fixups: // Only load kernel BTF if we found at least one kfunc call. kernelSpec can be // nil if the kernel does not have BTF, in which case we poison all kfunc // calls. _, err = cache.Kernel() // ErrNotSupportedOnOS wraps ErrNotSupported, check for it first. if errors.Is(err, internal.ErrNotSupportedOnOS) { return nil, fmt.Errorf("kfuncs are not supported on this platform: %w", err) } if err != nil && !errors.Is(err, ErrNotSupported) { return nil, err } fdArray := make(handles, 0) defer closeOnError(&fdArray) for { ins := iter.Ins metadata := ins.Metadata.Get(kfuncMetaKey{}) if metadata == nil { if !iter.Next() { // break loop if this was the last instruction in the stream. break } continue } // check meta, if no meta return err kfm, _ := metadata.(*kfuncMeta) if kfm == nil { return nil, fmt.Errorf("kfuncMetaKey doesn't contain kfuncMeta") } // findTargetInKernel returns btf.ErrNotFound if the input btf.Spec is nil. target := btf.Type((*btf.Func)(nil)) spec, module, err := findTargetInKernel(kfm.Func.Name, &target, cache) if errors.Is(err, btf.ErrNotFound) { if kfm.Binding == elf.STB_WEAK { if ins.IsKfuncCall() { // If the kfunc call is weak and not found, poison the call. Use a // recognizable constant to make it easier to debug. fn, err := asm.BuiltinFuncForPlatform(platform.Native, kfuncCallPoisonBase) if err != nil { return nil, err } *ins = fn.Call() } else if ins.OpCode.IsDWordLoad() { // If the kfunc DWordLoad is weak and not found, set its address to 0. ins.Constant = 0 ins.Src = 0 } else { return nil, fmt.Errorf("only kfunc calls and dword loads may have kfunc metadata") } iter.Next() continue } // Error on non-weak kfunc not found. return nil, fmt.Errorf("kfunc %q: %w", kfm.Func.Name, ErrNotSupported) } if err != nil { return nil, fmt.Errorf("finding kfunc in kernel: %w", err) } idx, err := fdArray.add(module) if err != nil { return nil, err } if err := btf.CheckTypeCompatibility(kfm.Func.Type, target.(*btf.Func).Type); err != nil { return nil, &incompatibleKfuncError{kfm.Func.Name, err} } id, err := spec.TypeID(target) if err != nil { return nil, err } ins.Constant = int64(id) ins.Offset = int16(idx) if !iter.Next() { break } } return fdArray, nil } type incompatibleKfuncError struct { name string err error } func (ike *incompatibleKfuncError) Error() string { return fmt.Sprintf("kfunc %q: %s", ike.name, ike.err) } // fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str) // with bpf_probe_read(_str) on kernels that don't support it yet. func fixupProbeReadKernel(ins *asm.Instruction) { if !ins.IsBuiltinCall() { return } // Kernel supports bpf_probe_read_kernel, nothing to do. if haveProbeReadKernel() == nil { return } switch asm.BuiltinFunc(ins.Constant) { case asm.FnProbeReadKernel, asm.FnProbeReadUser: ins.Constant = int64(asm.FnProbeRead) case asm.FnProbeReadKernelStr, asm.FnProbeReadUserStr: ins.Constant = int64(asm.FnProbeReadStr) } } // resolveKconfigReferences creates and populates a .kconfig map if necessary. // // Returns a nil Map and no error if no references exist. func resolveKconfigReferences(insns asm.Instructions) (_ *Map, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() } } var spec *MapSpec iter := insns.Iterate() for iter.Next() { meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta) if meta != nil { spec = meta.Map break } } if spec == nil { return nil, nil } cpy := spec.Copy() if err := resolveKconfig(cpy); err != nil { return nil, err } kconfig, err := NewMap(cpy) if err != nil { return nil, err } defer closeOnError(kconfig) // Resolve all instructions which load from .kconfig map with actual map // and offset inside it. iter = insns.Iterate() for iter.Next() { meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta) if meta == nil { continue } if meta.Map != spec { return nil, fmt.Errorf("instruction %d: reference to multiple .kconfig maps is not allowed", iter.Index) } if err := iter.Ins.AssociateMap(kconfig); err != nil { return nil, fmt.Errorf("instruction %d: %w", iter.Index, err) } // Encode a map read at the offset of the var in the datasec. iter.Ins.Constant = int64(uint64(meta.Offset) << 32) iter.Ins.Metadata.Set(kconfigMetaKey{}, nil) } return kconfig, nil } func resolveKsymReferences(insns asm.Instructions) error { type fixup struct { *asm.Instruction *ksymMeta } var symbols map[string]uint64 var fixups []fixup iter := insns.Iterate() for iter.Next() { ins := iter.Ins meta, _ := ins.Metadata.Get(ksymMetaKey{}).(*ksymMeta) if meta == nil { continue } if symbols == nil { symbols = make(map[string]uint64) } symbols[meta.Name] = 0 fixups = append(fixups, fixup{ iter.Ins, meta, }) } if len(symbols) == 0 { return nil } err := kallsyms.AssignAddresses(symbols) // Tolerate ErrRestrictedKernel during initial lookup, user may have all weak // ksyms and a fallback path. if err != nil && !errors.Is(err, ErrRestrictedKernel) { return fmt.Errorf("resolve ksyms: %w", err) } var missing []string for _, fixup := range fixups { addr := symbols[fixup.Name] // A weak ksym variable in eBPF C means its resolution is optional. if addr == 0 && fixup.Binding != elf.STB_WEAK { if !slices.Contains(missing, fixup.Name) { missing = append(missing, fixup.Name) } continue } fixup.Constant = int64(addr) } if len(missing) > 0 { if err != nil { // Program contains required ksyms, return the error from above. return fmt.Errorf("resolve required ksyms: %s: %w", strings.Join(missing, ","), err) } return fmt.Errorf("kernel is missing symbol: %s", strings.Join(missing, ",")) } return nil } ================================================ FILE: linker_test.go ================================================ package ebpf import ( "errors" "testing" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/testutils" "github.com/go-quicktest/qt" ) func TestFindReferences(t *testing.T) { progs := map[string]*ProgramSpec{ "entrypoint": { Type: SocketFilter, Instructions: asm.Instructions{ // Make sure the call doesn't happen at instruction 0 // to exercise the relative offset calculation. asm.Mov.Reg(asm.R0, asm.R1), asm.Call.Label("my_func"), asm.Return(), }, License: "MIT", }, "my_other_func": { Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 1337, asm.DWord).WithSymbol("my_other_func"), asm.Return(), }, }, "my_func": { Instructions: asm.Instructions{ asm.Call.Label("my_other_func").WithSymbol("my_func"), asm.Return(), }, }, } flattenPrograms(progs, []string{"entrypoint"}) prog, err := newProgram(t, progs["entrypoint"], nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) ret := mustRun(t, prog, nil) if ret != 1337 { t.Errorf("Expected return code 1337, got %d", ret) } } func TestForwardFunctionDeclaration(t *testing.T) { file := testutils.NativeFile(t, "testdata/fwd_decl-%s.elf") coll, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } spec := coll.Programs["call_fwd"] // This program calls an unimplemented forward function declaration. _, err = newProgram(t, spec, nil) if !errors.Is(err, asm.ErrUnsatisfiedProgramReference) { t.Fatal("Expected an error wrapping ErrUnsatisfiedProgramReference, got:", err) } // Append the implementation of fwd(). spec.Instructions = append(spec.Instructions, asm.Mov.Imm32(asm.R0, 23).WithSymbol("fwd"), asm.Return(), ) // The body of the subprog we appended does not come with BTF func_infos, // so the verifier will reject it. Load without BTF. for i, ins := range spec.Instructions { if btf.FuncMetadata(&ins) != nil || ins.Source() != nil { sym := ins.Symbol() ref := ins.Reference() ins.Metadata = asm.Metadata{} spec.Instructions[i] = ins.WithSymbol(sym).WithReference(ref) } } prog, err := newProgram(t, spec, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) ret := mustRun(t, prog, nil) if ret != 23 { t.Fatalf("Expected 23, got %d", ret) } } func TestFlattenInstructionsAllocations(t *testing.T) { name := "entrypoint" instructions := asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), } prog := &ProgramSpec{ Name: name, Instructions: instructions, } progs := map[string]*ProgramSpec{name: prog} refs := make(map[*ProgramSpec][]string) // ensure that flattenInstructions does not allocate memory // if there is no reference for the given program. allocs := testing.AllocsPerRun(5, func() { _ = flattenInstructions(name, progs, refs) }) qt.Assert(t, qt.Equals(allocs, float64(0))) } ================================================ FILE: map.go ================================================ package ebpf import ( "bytes" "errors" "fmt" "io" "math/rand" "os" "path/filepath" "reflect" "slices" "strings" "sync" "time" "unsafe" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/sysenc" "github.com/cilium/ebpf/internal/unix" ) // Errors returned by Map and MapIterator methods. var ( ErrKeyNotExist = errors.New("key does not exist") ErrKeyExist = errors.New("key already exists") ErrIterationAborted = errors.New("iteration aborted") ErrMapIncompatible = errors.New("map spec is incompatible with existing map") // pre-allocating these errors here since they may get called in hot code paths // and cause unnecessary memory allocations errMapLookupKeyNotExist = fmt.Errorf("lookup: %w", sysErrKeyNotExist) ) // MapOptions control loading a map into the kernel. type MapOptions struct { // The base path to pin maps in if requested via PinByName. // Existing maps will be re-used if they are compatible, otherwise an // error is returned. PinPath string LoadPinOptions LoadPinOptions } // MapID represents the unique ID of an eBPF map type MapID = sys.MapID // MapSpec defines a Map. type MapSpec struct { // Name is passed to the kernel as a debug aid. // // Unsupported characters will be stripped. Name string Type MapType KeySize uint32 ValueSize uint32 MaxEntries uint32 // Flags is passed to the kernel and specifies additional map // creation attributes. Flags uint32 // Automatically pin and load a map from MapOptions.PinPath. // Generates an error if an existing pinned map is incompatible with the MapSpec. Pinning PinType // Specify numa node during map creation // (effective only if sys.BPF_F_NUMA_NODE flag is set, // which can be imported from golang.org/x/sys/unix) NumaNode uint32 // The initial contents of the map. May be nil. Contents []MapKV // InnerMap is used as a template for ArrayOfMaps and HashOfMaps InnerMap *MapSpec // MapExtra is an opaque field whose meaning is map-specific. // // Available from 5.16. MapExtra uint64 // Extra trailing bytes found in the ELF map definition when using structs // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. // Must be nil or empty before instantiating the MapSpec into a Map. Extra *bytes.Reader // The key and value type of this map. May be nil. Key, Value btf.Type // Tags is a list of btf_decl_tag attributes set on the map definition. // // Decorate a map definition with `__attribute__((btf_decl_tag("foo")))`. Tags []string } func (ms *MapSpec) String() string { return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) } // Copy returns a copy of the spec. // // MapSpec.Contents is a shallow copy. func (ms *MapSpec) Copy() *MapSpec { if ms == nil { return nil } cpy := *ms cpy.Contents = slices.Clone(cpy.Contents) cpy.Key = btf.Copy(cpy.Key) cpy.Value = btf.Copy(cpy.Value) cpy.Tags = slices.Clone(cpy.Tags) if cpy.InnerMap == ms { cpy.InnerMap = &cpy } else { cpy.InnerMap = ms.InnerMap.Copy() } if cpy.Extra != nil { extra := *cpy.Extra cpy.Extra = &extra } return &cpy } // fixupMagicFields fills fields of MapSpec which are usually // left empty in ELF or which depend on runtime information. // // The method doesn't modify Spec, instead returning a copy. // The copy is only performed if fixups are necessary, so callers mustn't mutate // the returned spec. func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) { switch { case spec.Type.canStoreMap(): if spec.ValueSize != 0 && spec.ValueSize != 4 { return nil, errors.New("ValueSize must be zero or four for map of map") } spec = spec.Copy() spec.ValueSize = 4 case spec.Type == PerfEventArray: if spec.KeySize != 0 && spec.KeySize != 4 { return nil, errors.New("KeySize must be zero or four for perf event array") } if spec.ValueSize != 0 && spec.ValueSize != 4 { return nil, errors.New("ValueSize must be zero or four for perf event array") } spec = spec.Copy() spec.KeySize = 4 spec.ValueSize = 4 n, err := PossibleCPU() if err != nil { return nil, fmt.Errorf("fixup perf event array: %w", err) } if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n { // MaxEntries should be zero most of the time, but there is code // out there which hardcodes large constants. Clamp the number // of entries to the number of CPUs at most. Allow creating maps with // less than n items since some kernel selftests relied on this // behaviour in the past. spec.MaxEntries = n } case spec.Type == CPUMap: n, err := PossibleCPU() if err != nil { return nil, fmt.Errorf("fixup cpu map: %w", err) } if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n { // Perform clamping similar to PerfEventArray. spec.MaxEntries = n } } return spec, nil } // dataSection returns the contents of a datasec if the MapSpec represents one. func (ms *MapSpec) dataSection() ([]byte, error) { if n := len(ms.Contents); n != 1 { return nil, fmt.Errorf("expected one key, found %d", n) } kv := ms.Contents[0] if key, ok := ms.Contents[0].Key.(uint32); !ok || key != 0 { return nil, fmt.Errorf("expected contents to have key 0") } value, ok := kv.Value.([]byte) if !ok { return nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) } return value, nil } // updateDataSection copies the values of variables into MapSpec.Contents[0].Value. // // Only variables declared in sectionName will be updated. func (ms *MapSpec) updateDataSection(vars map[string]*VariableSpec, sectionName string) error { var specs []*VariableSpec for _, vs := range vars { if vs.SectionName != sectionName { continue } specs = append(specs, vs) } if len(specs) == 0 { return nil } data, err := ms.dataSection() if err != nil { return err } // Do not modify the original data slice, ms.Contents is a shallow copy. data = slices.Clone(data) slices.SortFunc(specs, func(a, b *VariableSpec) int { return int(int64(a.Offset) - int64(b.Offset)) }) offset := uint32(0) for _, v := range specs { if v.Offset < offset { return fmt.Errorf("variable %s (offset %d) overlaps with previous variable (offset %d)", v.Name, v.Offset, offset) } end := v.Offset + v.Size() if int(end) > len(data) { return fmt.Errorf("variable %s exceeds map size", v.Name) } copy(data[v.Offset:end], v.Value) offset = end } ms.Contents = []MapKV{{Key: uint32(0), Value: data}} return nil } func (ms *MapSpec) readOnly() bool { return (ms.Flags & sys.BPF_F_RDONLY_PROG) > 0 } func (ms *MapSpec) writeOnly() bool { return (ms.Flags & sys.BPF_F_WRONLY_PROG) > 0 } // MapKV is used to initialize the contents of a Map. type MapKV struct { Key interface{} Value interface{} } // Compatible returns nil if an existing map may be used instead of creating // one from the spec. // // Returns an error wrapping [ErrMapIncompatible] otherwise. func (ms *MapSpec) Compatible(m *Map) error { ms, err := ms.fixupMagicFields() if err != nil { return err } diffs := []string{} if m.typ != ms.Type { diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type)) } if m.keySize != ms.KeySize { diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize)) } if m.valueSize != ms.ValueSize { diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize)) } if m.maxEntries != ms.MaxEntries { diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries)) } flags := ms.Flags if ms.Type == DevMap || ms.Type == DevMapHash { // As of 0cdbb4b09a06 ("devmap: Allow map lookups from eBPF") // BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly // allow this mismatch. flags |= (m.flags & sys.BPF_F_RDONLY_PROG) } if m.flags != flags { diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, flags)) } if len(diffs) == 0 { return nil } return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible) } // Map represents a Map file descriptor. // // It is not safe to close a map which is used by other goroutines. // // Methods which take interface{} arguments by default encode // them using binary.Read/Write in the machine's native endianness. // // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler // if you require custom encoding. type Map struct { name string fd *sys.FD typ MapType keySize uint32 valueSize uint32 maxEntries uint32 flags uint32 pinnedPath string // Per CPU maps return values larger than the size in the spec fullValueSize int memory *Memory } // NewMapFromFD creates a [Map] around a raw fd. // // You should not use fd after calling this function. // // Requires at least Linux 4.13. func NewMapFromFD(fd int) (*Map, error) { f, err := sys.NewFD(fd) if err != nil { return nil, err } return newMapFromFD(f) } func newMapFromFD(fd *sys.FD) (*Map, error) { info, err := minimalMapInfoFromFd(fd) if err != nil { fd.Close() return nil, fmt.Errorf("get map info: %w", err) } return newMapFromParts(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) } // NewMap creates a new Map. // // It's equivalent to calling NewMapWithOptions with default options. func NewMap(spec *MapSpec) (*Map, error) { return NewMapWithOptions(spec, MapOptions{}) } // NewMapWithOptions creates a new Map. // // Creating a map for the first time will perform feature detection // by creating small, temporary maps. // // The caller is responsible for ensuring the process' rlimit is set // sufficiently high for locking memory during map creation. This can be done // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. // // May return an error wrapping ErrMapIncompatible. func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { m, err := newMapWithOptions(spec, opts, btf.NewCache()) if err != nil { return nil, fmt.Errorf("creating map: %w", err) } if err := m.finalize(spec); err != nil { m.Close() return nil, fmt.Errorf("populating map: %w", err) } return m, nil } func newMapWithOptions(spec *MapSpec, opts MapOptions, c *btf.Cache) (_ *Map, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() } } switch spec.Pinning { case PinByName: if spec.Name == "" { return nil, fmt.Errorf("pin by name: missing Name") } if opts.PinPath == "" { return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") } path := filepath.Join(opts.PinPath, spec.Name) m, err := LoadPinnedMap(path, &opts.LoadPinOptions) if errors.Is(err, unix.ENOENT) { break } if err != nil { return nil, fmt.Errorf("load pinned map: %w", err) } defer closeOnError(m) if err := spec.Compatible(m); err != nil { return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) } return m, nil case PinNone: // Nothing to do here default: return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) } var innerFd *sys.FD if spec.Type.canStoreMap() { if spec.InnerMap == nil { return nil, fmt.Errorf("%s requires InnerMap", spec.Type) } if spec.InnerMap.Pinning != PinNone { return nil, errors.New("inner maps cannot be pinned") } template, err := spec.InnerMap.createMap(nil, c) if err != nil { return nil, fmt.Errorf("inner map: %w", err) } defer template.Close() // Intentionally skip populating and freezing (finalizing) // the inner map template since it will be removed shortly. innerFd = template.fd } m, err := spec.createMap(innerFd, c) if err != nil { return nil, err } defer closeOnError(m) if spec.Pinning == PinByName { path := filepath.Join(opts.PinPath, spec.Name) if err := m.Pin(path); err != nil { return nil, fmt.Errorf("pin map to %s: %w", path, err) } } return m, nil } // Memory returns a memory-mapped region for the Map. The Map must have been // created with the BPF_F_MMAPABLE flag. Repeated calls to Memory return the // same mapping. Callers are responsible for coordinating access to Memory. func (m *Map) Memory() (*Memory, error) { if m.memory != nil { return m.memory, nil } if m.flags&sys.BPF_F_MMAPABLE == 0 { return nil, fmt.Errorf("Map was not created with the BPF_F_MMAPABLE flag: %w", ErrNotSupported) } size, err := m.memorySize() if err != nil { return nil, err } mm, err := newMemory(m.FD(), size) if err != nil { return nil, fmt.Errorf("creating new Memory: %w", err) } m.memory = mm return mm, nil } // unsafeMemory returns a heap-mapped memory region for the Map. The Map must // have been created with the BPF_F_MMAPABLE flag. Repeated calls to Memory // return the same mapping. Callers are responsible for coordinating access to // Memory. func (m *Map) unsafeMemory() (*Memory, error) { if m.memory != nil { if !m.memory.heap { return nil, errors.New("unsafeMemory would return existing non-heap memory") } return m.memory, nil } if m.flags&sys.BPF_F_MMAPABLE == 0 { return nil, fmt.Errorf("Map was not created with the BPF_F_MMAPABLE flag: %w", ErrNotSupported) } size, err := m.memorySize() if err != nil { return nil, err } mm, err := newUnsafeMemory(m.FD(), size) if err != nil { return nil, fmt.Errorf("creating new Memory: %w", err) } m.memory = mm return mm, nil } func (m *Map) memorySize() (int, error) { switch m.Type() { case Array: // In Arrays, values are always laid out on 8-byte boundaries regardless of // architecture. Multiply by MaxEntries and align the result to the host's // page size. size := int(internal.Align(m.ValueSize(), 8) * m.MaxEntries()) size = internal.Align(size, os.Getpagesize()) return size, nil case Arena: // For Arenas, MaxEntries denotes the maximum number of pages available to // the arena. return int(m.MaxEntries()) * os.Getpagesize(), nil } return 0, fmt.Errorf("determine memory size of map type %s: %w", m.Type(), ErrNotSupported) } // createMap validates the spec's properties and creates the map in the kernel // using the given opts. It does not populate or freeze the map. func (spec *MapSpec) createMap(inner *sys.FD, c *btf.Cache) (_ *Map, err error) { closeOnError := func(closer io.Closer) { if err != nil { closer.Close() } } // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained // additional 'inner_map_idx' and later 'numa_node' fields. // In order to support loading these definitions, tolerate the presence of // extra bytes, but require them to be zeroes. if spec.Extra != nil { if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") } } spec, err = spec.fixupMagicFields() if err != nil { return nil, err } p, sysMapType := platform.DecodeConstant(spec.Type) if p != platform.Native { return nil, fmt.Errorf("map type %s (%s): %w", spec.Type, p, internal.ErrNotSupportedOnOS) } attr := sys.MapCreateAttr{ MapName: maybeFillObjName(spec.Name), MapType: sys.MapType(sysMapType), KeySize: spec.KeySize, ValueSize: spec.ValueSize, MaxEntries: spec.MaxEntries, MapFlags: spec.Flags, NumaNode: spec.NumaNode, MapExtra: spec.MapExtra, } if inner != nil { attr.InnerMapFd = inner.Uint() } if spec.Key != nil || spec.Value != nil { handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value) if err != nil && !errors.Is(err, btf.ErrNotSupported) { return nil, fmt.Errorf("load BTF: %w", err) } if handle != nil { defer handle.Close() // Use BTF k/v during map creation. attr.BtfFd = uint32(handle.FD()) attr.BtfKeyTypeId = keyTypeID attr.BtfValueTypeId = valueTypeID } if spec.Type == StructOpsMap { if handle == nil { return nil, fmt.Errorf("struct_ops requires BTF") } localValue, ok := btf.As[*btf.Struct](spec.Value) if !ok { return nil, fmt.Errorf("struct_ops: value must be struct") } targetValue, targetID, module, err := structOpsFindTarget(localValue, c) if err != nil { return nil, fmt.Errorf("struct_ops: %w", err) } defer module.Close() spec = spec.Copy() spec.ValueSize = targetValue.Size attr.ValueSize = targetValue.Size attr.BtfVmlinuxValueTypeId = targetID if module != nil { // BPF_F_VTYPE_BTF_OBJ_FD is required if the type comes from a module attr.MapFlags |= sys.BPF_F_VTYPE_BTF_OBJ_FD // set FD for the kernel module attr.ValueTypeBtfObjFd = int32(module.FD()) } // StructOpsMap forbids passing BtfKeyTypeId or BtfValueTypeId, but // requires BtfFd. Do the simple thing and just zero out the fields. // See https://github.com/torvalds/linux/blob/9b332cece987ee1790b2ed4c989e28162fa47860/kernel/bpf/syscall.c#L1382-L1384 attr.BtfKeyTypeId = 0 attr.BtfValueTypeId = 0 } } fd, err := sys.MapCreate(&attr) // Some map types don't support BTF k/v in earlier kernel versions. // Remove BTF metadata and retry map creation. if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 { attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0 fd, err = sys.MapCreate(&attr) } if err != nil { return nil, handleMapCreateError(attr, spec, err) } defer closeOnError(fd) m, err := newMapFromParts(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) if err != nil { return nil, fmt.Errorf("map create: %w", err) } return m, nil } func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error { if platform.IsWindows { if errors.Is(err, unix.EINVAL) && attr.MapFlags != 0 { return fmt.Errorf("map create: flags: %w", internal.ErrNotSupportedOnOS) } return err } if errors.Is(err, unix.EPERM) { return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) } if errors.Is(err, unix.EINVAL) { if spec.MaxEntries == 0 { return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) } if spec.Type == UnspecifiedMap { return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap) } if spec.Flags&sys.BPF_F_NO_PREALLOC != 0 && !spec.Type.mustHaveNoPrealloc() { return fmt.Errorf("map create: %w (BPF_F_NO_PREALLOC flag may be incompatible with map type %s)", err, spec.Type) } if spec.Flags&sys.BPF_F_NO_PREALLOC == 0 && spec.Type.mustHaveNoPrealloc() { return fmt.Errorf("map create: %w (BPF_F_NO_PREALLOC flag may need to be set for map type %s)", err, spec.Type) } } if spec.Type.canStoreMap() { if haveFeatErr := haveNestedMaps(); haveFeatErr != nil { return fmt.Errorf("map create: %w", haveFeatErr) } } if spec.readOnly() || spec.writeOnly() { if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { return fmt.Errorf("map create: %w", haveFeatErr) } } if spec.Flags&sys.BPF_F_MMAPABLE > 0 { if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil { return fmt.Errorf("map create: %w", haveFeatErr) } } if spec.Flags&sys.BPF_F_INNER_MAP > 0 { if haveFeatErr := haveInnerMaps(); haveFeatErr != nil { return fmt.Errorf("map create: %w", haveFeatErr) } } if spec.Flags&sys.BPF_F_NO_PREALLOC > 0 { if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil { return fmt.Errorf("map create: %w", haveFeatErr) } } // BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size. if errors.Is(err, unix.EINVAL) && (attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) { pageSize := uint32(os.Getpagesize()) maxEntries := attr.MaxEntries if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) { return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize) } } return fmt.Errorf("map create: %w", err) } // newMapFromParts allocates and returns a new Map structure. // Sets the fullValueSize on per-CPU maps. func newMapFromParts(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { m := &Map{ name, fd, typ, keySize, valueSize, maxEntries, flags, "", int(valueSize), nil, } if !typ.hasPerCPUValue() { return m, nil } possibleCPUs, err := PossibleCPU() if err != nil { return nil, err } m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs return m, nil } func (m *Map) String() string { if m.name != "" { return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) } return fmt.Sprintf("%s#%v", m.typ, m.fd) } // Type returns the underlying type of the map. func (m *Map) Type() MapType { return m.typ } // KeySize returns the size of the map key in bytes. func (m *Map) KeySize() uint32 { return m.keySize } // ValueSize returns the size of the map value in bytes. func (m *Map) ValueSize() uint32 { return m.valueSize } // MaxEntries returns the maximum number of elements the map can hold. func (m *Map) MaxEntries() uint32 { return m.maxEntries } // Flags returns the flags of the map. func (m *Map) Flags() uint32 { return m.flags } // Info returns metadata about the map. This was first introduced in Linux 4.5, // but newer kernels support more MapInfo fields with the introduction of more // features. See [MapInfo] and its methods for more details. // // Returns an error wrapping [ErrNotSupported] if the kernel supports neither // BPF_OBJ_GET_INFO_BY_FD nor reading map information from /proc/self/fdinfo. func (m *Map) Info() (*MapInfo, error) { return newMapInfoFromFd(m.fd) } // Handle returns a reference to the Map's type information in the kernel. // // Returns [ErrNotSupported] if the kernel has no BTF support, or if there is no // BTF associated with the Map. func (m *Map) Handle() (*btf.Handle, error) { info, err := m.Info() if err != nil { return nil, err } id, ok := info.BTFID() if !ok { return nil, fmt.Errorf("map %s: retrieve BTF ID: %w", m, ErrNotSupported) } return btf.NewHandleFromID(id) } // MapLookupFlags controls the behaviour of the map lookup calls. type MapLookupFlags uint64 // LookupLock look up the value of a spin-locked map. const LookupLock MapLookupFlags = sys.BPF_F_LOCK // Lookup retrieves a value from a Map. // // Calls Close() on valueOut if it is of type **Map or **Program, // and *valueOut is not nil. // // Returns an error if the key doesn't exist, see ErrKeyNotExist. func (m *Map) Lookup(key, valueOut interface{}) error { return m.LookupWithFlags(key, valueOut, 0) } // LookupWithFlags retrieves a value from a Map with flags. // // Passing LookupLock flag will look up the value of a spin-locked // map without returning the lock. This must be specified if the // elements contain a spinlock. // // Calls Close() on valueOut if it is of type **Map or **Program, // and *valueOut is not nil. // // Returns an error if the key doesn't exist, see ErrKeyNotExist. func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { if m.typ.hasPerCPUValue() { return m.lookupPerCPU(key, valueOut, flags) } valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil { return err } return m.unmarshalValue(valueOut, valueBytes) } // LookupAndDelete retrieves and deletes a value from a Map. // // Returns ErrKeyNotExist if the key doesn't exist. func (m *Map) LookupAndDelete(key, valueOut interface{}) error { return m.LookupAndDeleteWithFlags(key, valueOut, 0) } // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. // // Passing LookupLock flag will look up and delete the value of a spin-locked // map without returning the lock. This must be specified if the elements // contain a spinlock. // // Returns ErrKeyNotExist if the key doesn't exist. func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { if m.typ.hasPerCPUValue() { return m.lookupAndDeletePerCPU(key, valueOut, flags) } valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil { return err } return m.unmarshalValue(valueOut, valueBytes) } // LookupBytes gets a value from Map. // // Returns a nil value if a key doesn't exist. func (m *Map) LookupBytes(key interface{}) ([]byte, error) { valueBytes := make([]byte, m.fullValueSize) valuePtr := sys.UnsafeSlicePointer(valueBytes) err := m.lookup(key, valuePtr, 0) if errors.Is(err, ErrKeyNotExist) { return nil, nil } return valueBytes, err } func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error { slice, err := ensurePerCPUSlice(valueOut) if err != nil { return err } valueBytes := make([]byte, m.fullValueSize) if err := m.lookup(key, sys.UnsafeSlicePointer(valueBytes), flags); err != nil { return err } return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) } func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { keyPtr, err := m.marshalKey(key) if err != nil { return fmt.Errorf("can't marshal key: %w", err) } attr := sys.MapLookupElemAttr{ MapFd: m.fd.Uint(), Key: keyPtr, Value: valueOut, Flags: uint64(flags), } if err = sys.MapLookupElem(&attr); err != nil { if errors.Is(err, unix.ENOENT) { return errMapLookupKeyNotExist } return fmt.Errorf("lookup: %w", wrapMapError(err)) } return nil } func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error { slice, err := ensurePerCPUSlice(valueOut) if err != nil { return err } valueBytes := make([]byte, m.fullValueSize) if err := m.lookupAndDelete(key, sys.UnsafeSlicePointer(valueBytes), flags); err != nil { return err } return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) } // ensurePerCPUSlice allocates a slice for a per-CPU value if necessary. func ensurePerCPUSlice(sliceOrPtr any) (any, error) { sliceOrPtrType := reflect.TypeOf(sliceOrPtr) if sliceOrPtrType.Kind() == reflect.Slice { // The target is a slice, the caller is responsible for ensuring that // size is correct. return sliceOrPtr, nil } slicePtrType := sliceOrPtrType if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice") } possibleCPUs, err := PossibleCPU() if err != nil { return nil, err } sliceType := slicePtrType.Elem() slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) sliceElemType := sliceType.Elem() sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr reflect.ValueOf(sliceOrPtr).Elem().Set(slice) if !sliceElemIsPointer { return slice.Interface(), nil } sliceElemType = sliceElemType.Elem() for i := 0; i < possibleCPUs; i++ { newElem := reflect.New(sliceElemType) slice.Index(i).Set(newElem) } return slice.Interface(), nil } func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error { keyPtr, err := m.marshalKey(key) if err != nil { return fmt.Errorf("can't marshal key: %w", err) } attr := sys.MapLookupAndDeleteElemAttr{ MapFd: m.fd.Uint(), Key: keyPtr, Value: valuePtr, Flags: uint64(flags), } if err := sys.MapLookupAndDeleteElem(&attr); err != nil { return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) } return nil } // MapUpdateFlags controls the behaviour of the Map.Update call. // // The exact semantics depend on the specific MapType. type MapUpdateFlags uint64 const ( // UpdateAny creates a new element or update an existing one. UpdateAny MapUpdateFlags = iota // UpdateNoExist creates a new element. UpdateNoExist MapUpdateFlags = 1 << (iota - 1) // UpdateExist updates an existing element. UpdateExist // UpdateLock updates elements under bpf_spin_lock. UpdateLock ) // Put replaces or creates a value in map. // // It is equivalent to calling Update with UpdateAny. func (m *Map) Put(key, value interface{}) error { return m.Update(key, value, UpdateAny) } // Update changes the value of a key. func (m *Map) Update(key, value any, flags MapUpdateFlags) error { if m.typ.hasPerCPUValue() { return m.updatePerCPU(key, value, flags) } valuePtr, err := m.marshalValue(value) if err != nil { return fmt.Errorf("marshal value: %w", err) } return m.update(key, valuePtr, flags) } func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error { valuePtr, err := marshalPerCPUValue(value, int(m.valueSize)) if err != nil { return fmt.Errorf("marshal value: %w", err) } return m.update(key, valuePtr, flags) } func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error { keyPtr, err := m.marshalKey(key) if err != nil { return fmt.Errorf("marshal key: %w", err) } attr := sys.MapUpdateElemAttr{ MapFd: m.fd.Uint(), Key: keyPtr, Value: valuePtr, Flags: uint64(flags), } if err = sys.MapUpdateElem(&attr); err != nil { return fmt.Errorf("update: %w", wrapMapError(err)) } return nil } // Delete removes a value. // // Returns ErrKeyNotExist if the key does not exist. func (m *Map) Delete(key interface{}) error { keyPtr, err := m.marshalKey(key) if err != nil { return fmt.Errorf("can't marshal key: %w", err) } attr := sys.MapDeleteElemAttr{ MapFd: m.fd.Uint(), Key: keyPtr, } if err = sys.MapDeleteElem(&attr); err != nil { return fmt.Errorf("delete: %w", wrapMapError(err)) } return nil } // NextKey finds the key following an initial key. // // See NextKeyBytes for details. // // Returns ErrKeyNotExist if there is no next key. func (m *Map) NextKey(key, nextKeyOut interface{}) error { nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize)) if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil { return err } if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil { return fmt.Errorf("can't unmarshal next key: %w", err) } return nil } // NextKeyBytes returns the key following an initial key as a byte slice. // // Passing nil will return the first key. // // Use Iterate if you want to traverse all entries in the map. // // Returns nil if there are no more keys. func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { nextKey := make([]byte, m.keySize) nextKeyPtr := sys.UnsafeSlicePointer(nextKey) err := m.nextKey(key, nextKeyPtr) if errors.Is(err, ErrKeyNotExist) { return nil, nil } return nextKey, err } func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { var ( keyPtr sys.Pointer err error ) if key != nil { keyPtr, err = m.marshalKey(key) if err != nil { return fmt.Errorf("can't marshal key: %w", err) } } attr := sys.MapGetNextKeyAttr{ MapFd: m.fd.Uint(), Key: keyPtr, NextKey: nextKeyOut, } if err = sys.MapGetNextKey(&attr); err != nil { // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the // first map element when a nil key pointer is specified. if platform.IsLinux && key == nil && errors.Is(err, unix.EFAULT) { var guessKey []byte guessKey, err = m.guessNonExistentKey() if err != nil { return err } // Retry the syscall with a valid non-existing key. attr.Key = sys.UnsafeSlicePointer(guessKey) if err = sys.MapGetNextKey(&attr); err == nil { return nil } } return fmt.Errorf("next key: %w", wrapMapError(err)) } return nil } var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) { return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED) }) // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. // This is necessary on kernels before 4.4.132, since those don't support // iterating maps from the start by providing an invalid key pointer. func (m *Map) guessNonExistentKey() ([]byte, error) { // Map a protected page and use that as the value pointer. This saves some // work copying out the value, which we're not interested in. page, err := mmapProtectedPage() if err != nil { return nil, err } valuePtr := sys.UnsafeSlicePointer(page) randKey := make([]byte, int(m.keySize)) for i := 0; i < 4; i++ { switch i { // For hash maps, the 0 key is less likely to be occupied. They're often // used for storing data related to pointers, and their access pattern is // generally scattered across the keyspace. case 0: // An all-0xff key is guaranteed to be out of bounds of any array, since // those have a fixed key size of 4 bytes. The only corner case being // arrays with 2^32 max entries, but those are prohibitively expensive // in many environments. case 1: for r := range randKey { randKey[r] = 0xff } // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so // is unlikely to be taken. case 2: for r := range randKey { randKey[r] = 0x55 } // Last ditch effort, generate a random key. case 3: rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) } err := m.lookup(randKey, valuePtr, 0) if errors.Is(err, ErrKeyNotExist) { return randKey, nil } } return nil, errors.New("couldn't find non-existing key") } // BatchLookup looks up many elements in a map at once. // // "keysOut" and "valuesOut" must be of type slice, a pointer // to a slice or buffer will not work. // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass // "cursor" to subsequent calls of this function to continue the batching // operation in the case of chunking. // // Warning: This API is not very safe to use as the kernel implementation for // batching relies on the user to be aware of subtle details with regarding to // different map type implementations. // // ErrKeyNotExist is returned when the batch lookup has reached // the end of all possible results, even when partial results // are returned. It should be used to evaluate when lookup is "done". func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts) if err != nil { return n, fmt.Errorf("map batch lookup: %w", err) } return n, nil } // BatchLookupAndDelete looks up many elements in a map at once, // // It then deletes all those elements. // "keysOut" and "valuesOut" must be of type slice, a pointer // to a slice or buffer will not work. // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass // "cursor" to subsequent calls of this function to continue the batching // operation in the case of chunking. // // Warning: This API is not very safe to use as the kernel implementation for // batching relies on the user to be aware of subtle details with regarding to // different map type implementations. // // ErrKeyNotExist is returned when the batch lookup has reached // the end of all possible results, even when partial results // are returned. It should be used to evaluate when lookup is "done". func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts) if err != nil { return n, fmt.Errorf("map batch lookup and delete: %w", err) } return n, nil } // MapBatchCursor represents a starting point for a batch operation. type MapBatchCursor struct { m *Map opaque []byte } func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { if m.typ.hasPerCPUValue() { return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts) } count, err := batchCount(keysOut, valuesOut) if err != nil { return 0, err } valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize)) n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts) if errors.Is(sysErr, unix.ENOSPC) { // Hash tables return ENOSPC when the size of the batch is smaller than // any bucket. return n, fmt.Errorf("%w (batch size too small?)", sysErr) } else if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { return 0, sysErr } err = valueBuf.Unmarshal(valuesOut) if err != nil { return 0, err } return n, sysErr } func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { count, err := sliceLen(keysOut) if err != nil { return 0, fmt.Errorf("keys: %w", err) } valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize)) n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts) if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { return 0, sysErr } if bytesBuf := valueBuf.Bytes(); bytesBuf != nil { err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), bytesBuf) if err != nil { return 0, err } } return n, sysErr } func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { // * generic_map_lookup_batch requires that batch_out is key_size bytes. // This is used by array and LPM maps. // // * __htab_map_lookup_and_delete_batch requires u32. This is used by the // various hash maps. // // Use a minimum of 4 bytes to avoid having to distinguish between the two. cursorLen := max(int(m.keySize), 4) inBatch := cursor.opaque if inBatch == nil { // This is the first lookup, allocate a buffer to hold the cursor. cursor.opaque = make([]byte, cursorLen) cursor.m = m } else if cursor.m != m { // Prevent reuse of a cursor across maps. First, it's unlikely to work. // Second, the maps may require different cursorLen and cursor.opaque // may therefore be too short. This could lead to the kernel clobbering // user space memory. return 0, errors.New("a cursor may not be reused across maps") } if err := haveBatchAPI(); err != nil { return 0, err } keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize)) attr := sys.MapLookupBatchAttr{ MapFd: m.fd.Uint(), Keys: keyBuf.Pointer(), Values: valuePtr, Count: uint32(count), InBatch: sys.UnsafeSlicePointer(inBatch), OutBatch: sys.UnsafeSlicePointer(cursor.opaque), } if opts != nil { attr.ElemFlags = opts.ElemFlags attr.Flags = opts.Flags } _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) sysErr = wrapMapError(sysErr) if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { return 0, sysErr } if err := keyBuf.Unmarshal(keysOut); err != nil { return 0, err } return int(attr.Count), sysErr } // BatchUpdate updates the map with multiple keys and values // simultaneously. // "keys" and "values" must be of type slice, a pointer // to a slice or buffer will not work. func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { if m.typ.hasPerCPUValue() { return m.batchUpdatePerCPU(keys, values, opts) } count, err := batchCount(keys, values) if err != nil { return 0, err } valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize)) if err != nil { return 0, err } return m.batchUpdate(count, keys, valuePtr, opts) } func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) if err != nil { return 0, err } attr := sys.MapUpdateBatchAttr{ MapFd: m.fd.Uint(), Keys: keyPtr, Values: valuePtr, Count: uint32(count), } if opts != nil { attr.ElemFlags = opts.ElemFlags attr.Flags = opts.Flags } err = sys.MapUpdateBatch(&attr) if err != nil { if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { return 0, haveFeatErr } return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) } return int(attr.Count), nil } func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) { count, err := sliceLen(keys) if err != nil { return 0, fmt.Errorf("keys: %w", err) } valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize)) if err != nil { return 0, err } return m.batchUpdate(count, keys, sys.UnsafeSlicePointer(valueBuf), opts) } // BatchDelete batch deletes entries in the map by keys. // "keys" must be of type slice, a pointer to a slice or buffer will not work. func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { count, err := sliceLen(keys) if err != nil { return 0, fmt.Errorf("keys: %w", err) } keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) if err != nil { return 0, fmt.Errorf("cannot marshal keys: %v", err) } attr := sys.MapDeleteBatchAttr{ MapFd: m.fd.Uint(), Keys: keyPtr, Count: uint32(count), } if opts != nil { attr.ElemFlags = opts.ElemFlags attr.Flags = opts.Flags } if err = sys.MapDeleteBatch(&attr); err != nil { if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { return 0, haveFeatErr } return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) } return int(attr.Count), nil } func batchCount(keys, values any) (int, error) { keysLen, err := sliceLen(keys) if err != nil { return 0, fmt.Errorf("keys: %w", err) } valuesLen, err := sliceLen(values) if err != nil { return 0, fmt.Errorf("values: %w", err) } if keysLen != valuesLen { return 0, fmt.Errorf("keys and values must have the same length") } return keysLen, nil } // Iterate traverses a map. // // It's safe to create multiple iterators at the same time. // // It's not possible to guarantee that all keys in a map will be // returned if there are concurrent modifications to the map. func (m *Map) Iterate() *MapIterator { return newMapIterator(m) } // Close the Map's underlying file descriptor, which could unload the // Map from the kernel if it is not pinned or in use by a loaded Program. func (m *Map) Close() error { if m == nil { // This makes it easier to clean up when iterating maps // of maps / programs. return nil } return m.fd.Close() } // FD gets the file descriptor of the Map. // // Calling this function is invalid after Close has been called. func (m *Map) FD() int { return m.fd.Int() } // Clone creates a duplicate of the Map. // // Closing the duplicate does not affect the original, and vice versa. // Changes made to the map are reflected by both instances however. // If the original map was pinned, the cloned map will not be pinned by default. // // Cloning a nil Map returns nil. func (m *Map) Clone() (*Map, error) { if m == nil { return nil, nil } dup, err := m.fd.Dup() if err != nil { return nil, fmt.Errorf("can't clone map: %w", err) } return &Map{ m.name, dup, m.typ, m.keySize, m.valueSize, m.maxEntries, m.flags, "", m.fullValueSize, nil, }, nil } // Pin persists the map on the BPF virtual file system past the lifetime of // the process that created it . // // Calling Pin on a previously pinned map will overwrite the path, except when // the new path already exists. Re-pinning across filesystems is not supported. // You can Clone a map to pin it to a different path. // // This requires bpffs to be mounted above fileName. // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd func (m *Map) Pin(fileName string) error { if err := sys.Pin(m.pinnedPath, fileName, m.fd); err != nil { return err } m.pinnedPath = fileName return nil } // Unpin removes the persisted state for the map from the BPF virtual filesystem. // // Failed calls to Unpin will not alter the state returned by IsPinned. // // Unpinning an unpinned Map returns nil. func (m *Map) Unpin() error { if err := sys.Unpin(m.pinnedPath); err != nil { return err } m.pinnedPath = "" return nil } // IsPinned returns true if the map has a non-empty pinned path. func (m *Map) IsPinned() bool { return m.pinnedPath != "" } // Freeze prevents a map to be modified from user space. // // It makes no changes to kernel-side restrictions. func (m *Map) Freeze() error { attr := sys.MapFreezeAttr{ MapFd: m.fd.Uint(), } if err := sys.MapFreeze(&attr); err != nil { if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { return fmt.Errorf("can't freeze map: %w", haveFeatErr) } return fmt.Errorf("can't freeze map: %w", err) } return nil } // finalize populates the Map according to the Contents specified // in spec and freezes the Map if requested by spec. func (m *Map) finalize(spec *MapSpec) error { for _, kv := range spec.Contents { if err := m.Put(kv.Key, kv.Value); err != nil { return fmt.Errorf("putting value: key %v: %w", kv.Key, err) } } if isConstantDataSection(spec.Name) || isKconfigSection(spec.Name) { if err := m.Freeze(); err != nil { return fmt.Errorf("freezing map: %w", err) } } return nil } func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { if data == nil { if m.keySize == 0 { // Queues have a key length of zero, so passing nil here is valid. return sys.UnsafePointer(nil), nil } return sys.Pointer{}, errors.New("can't use nil as key of map") } return marshalMapSyscallInput(data, int(m.keySize)) } func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { var ( buf []byte err error ) switch value := data.(type) { case *Map: if !m.typ.canStoreMap() { return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) } buf, err = marshalMap(value, int(m.valueSize)) case *Program: if !m.typ.canStoreProgram() { return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) } buf, err = marshalProgram(value, int(m.valueSize)) default: return marshalMapSyscallInput(data, int(m.valueSize)) } if err != nil { return sys.Pointer{}, err } return sys.UnsafeSlicePointer(buf), nil } func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error { switch value := value.(type) { case **Map: if !m.typ.canStoreMap() { return fmt.Errorf("can't read a map from %s", m.typ) } other, err := unmarshalMap(buf) if err != nil { return err } // The caller might close the map externally, so ignore errors. _ = (*value).Close() *value = other return nil case *Map: if !m.typ.canStoreMap() { return fmt.Errorf("can't read a map from %s", m.typ) } return errors.New("require pointer to *Map") case **Program: if !m.typ.canStoreProgram() { return fmt.Errorf("can't read a program from %s", m.typ) } other, err := unmarshalProgram(buf) if err != nil { return err } // The caller might close the program externally, so ignore errors. _ = (*value).Close() *value = other return nil case *Program: if !m.typ.canStoreProgram() { return fmt.Errorf("can't read a program from %s", m.typ) } return errors.New("require pointer to *Program") } return buf.Unmarshal(value) } // LoadPinnedMap opens a Map from a pin (file) on the BPF virtual filesystem. // // Requires at least Linux 4.5. func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { fd, typ, err := sys.ObjGetTyped(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(fileName), FileFlags: opts.Marshal(), }) if err != nil { return nil, err } if typ != sys.BPF_TYPE_MAP { _ = fd.Close() return nil, fmt.Errorf("%s is not a Map", fileName) } m, err := newMapFromFD(fd) if err == nil { m.pinnedPath = fileName } return m, err } // unmarshalMap creates a map from a map ID encoded in host endianness. func unmarshalMap(buf sysenc.Buffer) (*Map, error) { var id uint32 if err := buf.Unmarshal(&id); err != nil { return nil, err } return NewMapFromID(MapID(id)) } // marshalMap marshals the fd of a map into a buffer in host endianness. func marshalMap(m *Map, length int) ([]byte, error) { if m == nil { return nil, errors.New("can't marshal a nil Map") } if length != 4 { return nil, fmt.Errorf("can't marshal map to %d bytes", length) } buf := make([]byte, 4) internal.NativeEndian.PutUint32(buf, m.fd.Uint()) return buf, nil } // MapIterator iterates a Map. // // See Map.Iterate. type MapIterator struct { target *Map // Temporary storage to avoid allocations in Next(). This is any instead // of []byte to avoid allocations. cursor any count, maxEntries uint32 done bool err error } func newMapIterator(target *Map) *MapIterator { return &MapIterator{ target: target, maxEntries: target.maxEntries, } } // Next decodes the next key and value. // // Iterating a hash map from which keys are being deleted is not // safe. You may see the same key multiple times. Iteration may // also abort with an error, see IsIterationAborted. // // Returns false if there are no more entries. You must check // the result of Err afterwards. // // See Map.Get for further caveats around valueOut. func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { if mi.err != nil || mi.done { return false } // For array-like maps NextKey returns nil only after maxEntries // iterations. for mi.count <= mi.maxEntries { if mi.cursor == nil { // Pass nil interface to NextKey to make sure the Map's first key // is returned. If we pass an uninitialized []byte instead, it'll see a // non-nil interface and try to marshal it. mi.cursor = make([]byte, mi.target.keySize) mi.err = mi.target.NextKey(nil, mi.cursor) } else { mi.err = mi.target.NextKey(mi.cursor, mi.cursor) } if errors.Is(mi.err, ErrKeyNotExist) { mi.done = true mi.err = nil return false } else if mi.err != nil { mi.err = fmt.Errorf("get next key: %w", mi.err) return false } mi.count++ mi.err = mi.target.Lookup(mi.cursor, valueOut) if errors.Is(mi.err, ErrKeyNotExist) { // Even though the key should be valid, we couldn't look up // its value. If we're iterating a hash map this is probably // because a concurrent delete removed the value before we // could get it. This means that the next call to NextKeyBytes // is very likely to restart iteration. // If we're iterating one of the fd maps like // ProgramArray it means that a given slot doesn't have // a valid fd associated. It's OK to continue to the next slot. continue } if mi.err != nil { mi.err = fmt.Errorf("look up next key: %w", mi.err) return false } buf := mi.cursor.([]byte) if ptr, ok := keyOut.(unsafe.Pointer); ok { copy(unsafe.Slice((*byte)(ptr), len(buf)), buf) } else { mi.err = sysenc.Unmarshal(keyOut, buf) } return mi.err == nil } mi.err = fmt.Errorf("%w", ErrIterationAborted) return false } // Err returns any encountered error. // // The method must be called after Next returns nil. // // Returns ErrIterationAborted if it wasn't possible to do a full iteration. func (mi *MapIterator) Err() error { return mi.err } // MapGetNextID returns the ID of the next eBPF map. // // Returns ErrNotExist, if there is no next eBPF map. func MapGetNextID(startID MapID) (MapID, error) { attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} return MapID(attr.NextId), sys.MapGetNextId(attr) } // NewMapFromID returns the [Map] for a given map id. Returns [ErrNotExist] if // there is no eBPF map with the given id. // // Requires at least Linux 4.13. func NewMapFromID(id MapID) (*Map, error) { fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ Id: uint32(id), }) if err != nil { return nil, err } return newMapFromFD(fd) } // sliceLen returns the length if the value is a slice or an error otherwise. func sliceLen(slice any) (int, error) { sliceValue := reflect.ValueOf(slice) if sliceValue.Kind() != reflect.Slice { return 0, fmt.Errorf("%T is not a slice", slice) } return sliceValue.Len(), nil } ================================================ FILE: map_test.go ================================================ package ebpf import ( "bytes" "errors" "fmt" "math" "os" "path/filepath" "runtime" "slices" "sort" "testing" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) var ( spec1 = &MapSpec{ Name: "foo", Type: Hash, KeySize: 4, ValueSize: 4, MaxEntries: 1, Pinning: PinByName, } ) func TestMap(t *testing.T) { m := createMap(t, Array, 2) t.Log(m) if err := m.Put(uint32(0), uint32(42)); err != nil { t.Fatal("Can't put:", err) } if err := m.Put(uint32(1), uint32(4242)); err != nil { t.Fatal("Can't put:", err) } m2, err := m.Clone() if err != nil { t.Fatal("Can't clone map:", err) } defer m2.Close() m.Close() m = m2 var v uint32 if err := m.Lookup(uint32(0), &v); err != nil { t.Fatal("Can't lookup 0:", err) } if v != 42 { t.Error("Want value 42, got", v) } sliceVal := make([]uint32, 1) qt.Assert(t, qt.IsNil(m.Lookup(uint32(0), sliceVal))) qt.Assert(t, qt.DeepEquals(sliceVal, []uint32{42})) var slice []byte qt.Assert(t, qt.IsNil(m.Lookup(uint32(0), &slice))) qt.Assert(t, qt.DeepEquals(slice, internal.NativeEndian.AppendUint32(nil, 42))) var k uint32 if err := m.NextKey(uint32(0), &k); err != nil { t.Fatal("Can't get:", err) } if k != 1 { t.Error("Want key 1, got", k) } } func TestMapSpecCopy(t *testing.T) { a := &MapSpec{ "foo", Hash, 4, 4, 1, 1, PinByName, 1, []MapKV{{1, 2}}, // Can't copy Contents, use value types nil, // InnerMap 0, // MapExtra bytes.NewReader(nil), &btf.Int{}, &btf.Int{}, nil, } a.InnerMap = a qt.Check(t, qt.IsNil((*MapSpec)(nil).Copy())) qt.Assert(t, testutils.IsDeepCopy(a.Copy(), a)) } func TestMapBatch(t *testing.T) { contents := []uint32{ 42, 4242, 23, 2323, } keysAndValuesForMap := func(m *Map, contents []uint32) (keys, values []uint32, stride int) { possibleCPU := 1 if m.Type().hasPerCPUValue() { possibleCPU = MustPossibleCPU() } keys = make([]uint32, 0, len(contents)) values = make([]uint32, 0, len(contents)*possibleCPU) for key, value := range contents { keys = append(keys, uint32(key)) for i := 0; i < possibleCPU; i++ { values = append(values, value*uint32((i+1))) } } return keys, values, possibleCPU } for _, typ := range []MapType{Array, PerCPUArray} { t.Run(typ.String(), func(t *testing.T) { if typ == PerCPUArray { // https://lore.kernel.org/bpf/20210424214510.806627-2-pctammela@mojatatu.com/ testutils.SkipOnOldKernel(t, "5.13", "batched ops support for percpu array") } m := createMap(t, typ, uint32(len(contents))) keys, values, _ := keysAndValuesForMap(m, contents) count, err := m.BatchUpdate(keys, values, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(count, len(contents))) lookupKeys := make([]uint32, len(keys)) lookupValues := make([]uint32, len(values)) var cursor MapBatchCursor count, err = m.BatchLookup(&cursor, lookupKeys, lookupValues, nil) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(count, len(contents))) qt.Assert(t, qt.ContentEquals(lookupKeys, keys)) qt.Assert(t, qt.ContentEquals(lookupValues, values)) count, err = m.BatchLookup(&cursor, lookupKeys, lookupValues, nil) qt.Assert(t, qt.ErrorIs(err, ErrKeyNotExist)) qt.Assert(t, qt.Equals(count, 0)) }) } for _, typ := range []MapType{Hash, PerCPUHash} { t.Run(typ.String(), func(t *testing.T) { m := createMap(t, typ, uint32(len(contents))) keys, values, stride := keysAndValuesForMap(m, contents) count, err := m.BatchUpdate(keys, values, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(count, len(contents))) // BPF hash tables seem to have lots of collisions when keys // are following a sequence. // This causes ENOSPC since a single large bucket may be larger // than the batch size. We work around this by making the batch size // equal to the map size. lookupKeys := make([]uint32, len(keys)) lookupValues := make([]uint32, len(values)) var cursor MapBatchCursor count, err = m.BatchLookup(&cursor, lookupKeys, lookupValues, nil) qt.Assert(t, qt.ErrorIs(err, ErrKeyNotExist)) qt.Assert(t, qt.Equals(count, len(contents))) qt.Assert(t, qt.ContentEquals(lookupKeys, keys)) qt.Assert(t, qt.ContentEquals(lookupValues, values)) cursor = MapBatchCursor{} count, err = m.BatchLookupAndDelete(&cursor, lookupKeys, lookupValues, nil) qt.Assert(t, qt.ErrorIs(err, ErrKeyNotExist)) qt.Assert(t, qt.Equals(count, len(contents))) qt.Assert(t, qt.ContentEquals(lookupKeys, keys)) qt.Assert(t, qt.ContentEquals(lookupValues, values)) if stride > 1 { values := make([]uint32, stride) qt.Assert(t, qt.ErrorIs(m.Lookup(uint32(0), values), ErrKeyNotExist)) } else { var v uint32 qt.Assert(t, qt.ErrorIs(m.Lookup(uint32(0), &v), ErrKeyNotExist)) } }) } } func TestMapBatchCursorReuse(t *testing.T) { arr1 := createMap(t, Array, 4) arr2 := createMap(t, Array, 4) tmp := make([]uint32, 2) var cursor MapBatchCursor _, err := arr1.BatchLookup(&cursor, tmp, tmp, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) _, err = arr2.BatchLookup(&cursor, tmp, tmp, nil) qt.Assert(t, qt.IsNotNil(err)) } func TestMapLookupKeyTooSmall(t *testing.T) { m := createMap(t, Array, 2) defer m.Close() var small uint16 qt.Assert(t, qt.IsNil(m.Put(uint32(0), uint32(1234)))) qt.Assert(t, qt.IsNotNil(m.Lookup(uint32(0), &small))) } func TestMapLookupKeyNotFoundAllocations(t *testing.T) { m := createMap(t, Array, 2) defer m.Close() var key, out uint32 = 3, 0 var err error allocs := testing.AllocsPerRun(5, func() { err = m.Lookup(&key, &out) }) qt.Assert(t, qt.ErrorIs(err, ErrKeyNotExist)) qt.Assert(t, qt.Equals(allocs, float64(0))) } func TestBatchAPIMapDelete(t *testing.T) { if err := haveBatchAPI(); err != nil { t.Skipf("batch api not available: %v", err) } m := createMap(t, Hash, 10) var ( keys = []uint32{0, 1} values = []uint32{42, 4242} ) count, err := m.BatchUpdate(keys, values, nil) if err != nil { t.Fatalf("BatchUpdate: %v", err) } if count != len(keys) { t.Fatalf("BatchUpdate: expected count, %d, to be %d", count, len(keys)) } var v uint32 if err := m.Lookup(uint32(0), &v); err != nil { t.Fatal("Can't lookup 0:", err) } if v != 42 { t.Error("Want value 42, got", v) } count, err = m.BatchDelete(keys, nil) if err != nil { t.Fatalf("BatchDelete: %v", err) } if count != len(keys) { t.Fatalf("BatchDelete: expected %d deletions got %d", len(keys), count) } if err := m.Lookup(uint32(0), &v); !errors.Is(err, ErrKeyNotExist) { t.Fatalf("Lookup should have failed with error, %v, instead error is %v", ErrKeyNotExist, err) } } func TestMapClose(t *testing.T) { m := createMap(t, Array, 2) if err := m.Close(); err != nil { t.Fatal("Can't close map:", err) } if err := m.Put(uint32(0), uint32(42)); !errors.Is(err, sys.ErrClosedFd) { t.Fatal("Put doesn't check for closed fd", err) } if _, err := m.LookupBytes(uint32(0)); !errors.Is(err, sys.ErrClosedFd) { t.Fatal("Get doesn't check for closed fd", err) } } func TestBatchMapWithLock(t *testing.T) { testutils.SkipOnOldKernel(t, "5.13", "MAP BATCH BPF_F_LOCK") spec, err := LoadCollectionSpec(testutils.NativeFile(t, "testdata/map_spin_lock-%s.elf")) qt.Assert(t, qt.IsNil(err)) coll := mustNewCollection(t, spec, nil) type spinLockValue struct { Cnt uint32 Padding uint32 } m, ok := coll.Maps["spin_lock_map"] if !ok { t.Fatal(err) } keys := []uint32{0, 1} values := []spinLockValue{{Cnt: 42}, {Cnt: 4242}} count, err := m.BatchUpdate(keys, values, &BatchOptions{ElemFlags: uint64(UpdateLock)}) testutils.SkipIfNotSupportedOnOS(t, err) if err != nil { t.Fatalf("BatchUpdate: %v", err) } if count != len(keys) { t.Fatalf("BatchUpdate: expected count, %d, to be %d", count, len(keys)) } var cursor MapBatchCursor lookupKeys := make([]uint32, 2) lookupValues := make([]spinLockValue, 2) count, err = m.BatchLookup(&cursor, lookupKeys, lookupValues, &BatchOptions{ElemFlags: uint64(LookupLock)}) if !errors.Is(err, ErrKeyNotExist) { t.Fatalf("BatchLookup: %v", err) } if count != 2 { t.Fatalf("BatchLookup: expected two keys, got %d", count) } cursor = MapBatchCursor{} deleteKeys := []uint32{0, 1} deleteValues := make([]spinLockValue, 2) count, err = m.BatchLookupAndDelete(&cursor, deleteKeys, deleteValues, nil) if !errors.Is(err, ErrKeyNotExist) { t.Fatalf("BatchLookupAndDelete: %v", err) } if count != 2 { t.Fatalf("BatchLookupAndDelete: expected two keys, got %d", count) } } func TestMapWithLock(t *testing.T) { testutils.SkipOnOldKernel(t, "5.13", "MAP BPF_F_LOCK") spec, err := LoadCollectionSpec(testutils.NativeFile(t, "testdata/map_spin_lock-%s.elf")) qt.Assert(t, qt.IsNil(err)) coll := mustNewCollection(t, spec, nil) type spinLockValue struct { Cnt uint32 Padding uint32 } m, ok := coll.Maps["spin_lock_map"] if !ok { t.Fatal(err) } key := uint32(1) value := spinLockValue{Cnt: 5} err = m.Update(key, value, UpdateLock) if platform.IsWindows && errors.Is(err, unix.EINVAL) { t.Skip("Windows doesn't support UpdateLock") } if err != nil { t.Fatal(err) } value.Cnt = 0 err = m.LookupWithFlags(&key, &value, LookupLock) if err != nil { t.Fatal(err) } if value.Cnt != 5 { t.Fatalf("Want value 5, got %d", value.Cnt) } t.Run("LookupAndDelete", func(t *testing.T) { testutils.SkipOnOldKernel(t, "5.14", "LOOKUP_AND_DELETE flags") value.Cnt = 0 err = m.LookupAndDeleteWithFlags(&key, &value, LookupLock) if err != nil { t.Fatal(err) } if value.Cnt != 5 { t.Fatalf("Want value 5, got %d", value.Cnt) } err = m.LookupWithFlags(&key, &value, LookupLock) if err != nil && !errors.Is(err, ErrKeyNotExist) { t.Fatal(err) } }) } func TestMapCloneNil(t *testing.T) { m, err := (*Map)(nil).Clone() if err != nil { t.Fatal(err) } if m != nil { t.Fatal("Cloning a nil map doesn't return nil") } } func TestMapPin(t *testing.T) { m := createMap(t, Array, 2) if err := m.Put(uint32(0), uint32(42)); err != nil { t.Fatal("Can't put:", err) } tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "map") if err := m.Pin(path); err != nil { testutils.SkipIfNotSupported(t, err) t.Fatal(err) } pinned := m.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) m.Close() m, err := LoadPinnedMap(path, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer m.Close() var v uint32 if err := m.Lookup(uint32(0), &v); err != nil { t.Fatal("Can't lookup 0:", err) } if v != 42 { t.Error("Want value 42, got", v) } } func TestNestedMapPin(t *testing.T) { m := createMapInMap(t, ArrayOfMaps, Array) tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "nested") if err := m.Pin(path); err != nil { t.Fatal(err) } m.Close() m, err := LoadPinnedMap(path, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer m.Close() } func TestNestedMapPinNested(t *testing.T) { if _, err := newMap(t, &MapSpec{ Type: ArrayOfMaps, KeySize: 4, ValueSize: 4, MaxEntries: 2, InnerMap: &MapSpec{ Name: "inner", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, Pinning: PinByName, }, }, nil); err == nil { t.Error("Inner maps should not be pinnable") } } func TestMapPinMultiple(t *testing.T) { testutils.SkipOnOldKernel(t, "4.9", "atomic re-pinning was introduced in 4.9 series") tmp := testutils.TempBPFFS(t) spec := spec1.Copy() m1 := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) pinned := m1.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) newPath := filepath.Join(tmp, "bar") err := m1.Pin(newPath) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) oldPath := filepath.Join(tmp, spec.Name) if _, err := os.Stat(oldPath); err == nil { t.Fatal("Previous pinned map path still exists:", err) } m2, err := LoadPinnedMap(newPath, nil) qt.Assert(t, qt.IsNil(err)) pinned = m2.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) defer m2.Close() } func TestMapPinWithEmptyPath(t *testing.T) { m := createMap(t, Array, 2) err := m.Pin("") qt.Assert(t, qt.Not(qt.IsNil(err))) } func TestMapPinFailReplace(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := spec1.Copy() spec2 := spec1.Copy() spec2.Name = spec1.Name + "bar" m := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) _ = mustNewMap(t, spec2, &MapOptions{PinPath: tmp}) qt.Assert(t, qt.IsTrue(m.IsPinned())) newPath := filepath.Join(tmp, spec2.Name) qt.Assert(t, qt.Not(qt.IsNil(m.Pin(newPath))), qt.Commentf("Pin didn't"+ " fail new path from replacing an existing path")) } func TestMapUnpin(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := spec1.Copy() m := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) pinned := m.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) path := filepath.Join(tmp, spec.Name) m2, err := LoadPinnedMap(path, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer m2.Close() if err = m.Unpin(); err != nil { t.Fatal("Failed to unpin map:", err) } if _, err := os.Stat(path); err == nil { t.Fatal("Pinned map path still exists after unpinning:", err) } } func TestMapLoadPinned(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := spec1.Copy() m1 := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) pinned := m1.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) path := filepath.Join(tmp, spec.Name) m2, err := LoadPinnedMap(path, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer m2.Close() pinned = m2.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) } func TestMapLoadReusePinned(t *testing.T) { for _, typ := range []MapType{Array, Hash, DevMap, DevMapHash} { t.Run(typ.String(), func(t *testing.T) { if typ == DevMap { testutils.SkipOnOldKernel(t, "4.14", "devmap") } if typ == DevMapHash { testutils.SkipOnOldKernel(t, "5.4", "devmap_hash") } tmp := testutils.TempBPFFS(t) spec := &MapSpec{ Name: "pinmap", Type: typ, KeySize: 4, ValueSize: 4, MaxEntries: 1, Pinning: PinByName, } _ = mustNewMap(t, spec, &MapOptions{PinPath: tmp}) _ = mustNewMap(t, spec, &MapOptions{PinPath: tmp}) }) } } func TestMapLoadPinnedUnpin(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := spec1.Copy() m1 := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) pinned := m1.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) path := filepath.Join(tmp, spec.Name) m2, err := LoadPinnedMap(path, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) defer m2.Close() err = m1.Unpin() qt.Assert(t, qt.IsNil(err)) err = m2.Unpin() qt.Assert(t, qt.IsNil(err)) } func TestMapLoadPinnedWithOptions(t *testing.T) { // Introduced in commit 6e71b04a8224. testutils.SkipOnOldKernel(t, "4.15", "file_flags in BPF_OBJ_GET") array := createMap(t, Array, 2) tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "map") if err := array.Pin(path); err != nil { t.Fatal(err) } if err := array.Put(uint32(0), uint32(123)); err != nil { t.Fatal(err) } array.Close() t.Run("read-only", func(t *testing.T) { array, err := LoadPinnedMap(path, &LoadPinOptions{ ReadOnly: true, }) if platform.IsWindows && errors.Is(err, unix.EINVAL) { t.Skip("Windows doesn't support file_flags in OBJ_GET") } testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't load map:", err) } defer array.Close() if err := array.Put(uint32(0), uint32(1)); !errors.Is(err, unix.EPERM) { t.Fatal("Expected EPERM from Put, got", err) } }) t.Run("write-only", func(t *testing.T) { array, err := LoadPinnedMap(path, &LoadPinOptions{ WriteOnly: true, }) if platform.IsWindows && errors.Is(err, unix.EINVAL) { t.Skip("Windows doesn't support file_flags in OBJ_GET") } testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't load map:", err) } defer array.Close() var value uint32 if err := array.Lookup(uint32(0), &value); !errors.Is(err, unix.EPERM) { t.Fatal("Expected EPERM from Lookup, got", err) } }) } func TestMapPinFlags(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := &MapSpec{ Name: "map", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, Pinning: PinByName, } _ = mustNewMap(t, spec, &MapOptions{PinPath: tmp}) _, err := newMap(t, spec, &MapOptions{ PinPath: tmp, LoadPinOptions: LoadPinOptions{ Flags: math.MaxUint32, }, }) if !errors.Is(err, unix.EINVAL) { t.Fatal("Invalid flags should trigger EINVAL:", err) } } func TestMapQueue(t *testing.T) { testutils.SkipOnOldKernel(t, "4.20", "map type queue") m := mustNewMap(t, &MapSpec{ Type: Queue, ValueSize: 4, MaxEntries: 2, }, nil) for _, v := range []uint32{42, 4242} { if err := m.Put(nil, v); err != nil { t.Fatalf("Can't put %d: %s", v, err) } } var v uint32 if err := m.Lookup(nil, &v); err != nil { t.Fatal("Lookup (Peek) on Queue:", err) } if v != 42 { t.Error("Want value 42, got", v) } v = 0 if err := m.LookupAndDelete(nil, &v); err != nil { t.Fatal("Can't lookup and delete element:", err) } if v != 42 { t.Error("Want value 42, got", v) } v = 0 if err := m.LookupAndDelete(nil, unsafe.Pointer(&v)); err != nil { t.Fatal("Can't lookup and delete element using unsafe.Pointer:", err) } if v != 4242 { t.Error("Want value 4242, got", v) } if err := m.LookupAndDelete(nil, &v); !errors.Is(err, ErrKeyNotExist) { t.Fatal("Lookup and delete on empty Queue:", err) } if err := m.Lookup(nil, &v); !errors.Is(err, ErrKeyNotExist) { t.Fatal("Lookup (Peek) on empty Queue:", err) } } func TestMapInMap(t *testing.T) { for _, typ := range []MapType{ArrayOfMaps, HashOfMaps} { t.Run(typ.String(), func(t *testing.T) { inner := createMap(t, Array, 2) if err := inner.Put(uint32(1), uint32(4242)); err != nil { t.Fatal(err) } outer := createMapInMap(t, typ, Array) if err := outer.Put(uint32(0), inner); err != nil { t.Fatal("Can't put inner map:", err) } if err := outer.Put(uint32(0), (*Map)(nil)); err == nil { t.Fatal("Put accepted a nil Map") } var inner2 *Map if err := outer.Lookup(uint32(0), &inner2); err != nil { t.Fatal("Can't lookup 0:", err) } defer inner2.Close() var v uint32 if err := inner2.Lookup(uint32(1), &v); err != nil { t.Fatal("Can't lookup 1 in inner2:", err) } if v != 4242 { t.Error("Expected value 4242, got", v) } inner2.Close() // Make sure we can still access the original map if err := inner.Lookup(uint32(1), &v); err != nil { t.Fatal("Can't lookup 1 in inner:", err) } if v != 4242 { t.Error("Expected value 4242, got", v) } }) } } func TestNewMapInMapFromFD(t *testing.T) { nested := createMapInMap(t, ArrayOfMaps, Array) // Do not copy this, use Clone instead. another, err := NewMapFromFD(testutils.DupFD(t, nested.FD())) testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) another.Close() } func TestPerfEventArray(t *testing.T) { specs := []*MapSpec{ {Type: PerfEventArray}, {Type: PerfEventArray, KeySize: 4}, {Type: PerfEventArray, ValueSize: 4}, } for _, spec := range specs { _ = mustNewMap(t, spec, nil) } } func TestCPUMap(t *testing.T) { testutils.SkipOnOldKernel(t, "4.15", "cpu map") m := mustNewMap(t, &MapSpec{Type: CPUMap, KeySize: 4, ValueSize: 4}, nil) qt.Assert(t, qt.Equals(m.MaxEntries(), uint32(MustPossibleCPU()))) } func TestMapInMapValueSize(t *testing.T) { spec := &MapSpec{ Type: ArrayOfMaps, KeySize: 4, ValueSize: 0, MaxEntries: 2, InnerMap: &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 2, }, } _ = mustNewMap(t, spec, nil) spec.ValueSize = 4 _ = mustNewMap(t, spec, nil) spec.ValueSize = 1 _, err := newMap(t, spec, nil) qt.Assert(t, qt.IsNotNil(err)) } func TestIterateEmptyMap(t *testing.T) { makeMap := func(t *testing.T, mapType MapType) *Map { m, err := newMap(t, &MapSpec{ Type: mapType, KeySize: 4, ValueSize: 8, MaxEntries: 2, }, nil) if errors.Is(err, unix.EINVAL) { t.Skip(mapType, "is not supported") } qt.Assert(t, qt.IsNil(err)) return m } for _, mapType := range []MapType{ Hash, SockHash, } { t.Run(mapType.String(), func(t *testing.T) { m := makeMap(t, mapType) entries := m.Iterate() var key string var value uint64 if entries.Next(&key, &value) { t.Error("Empty hash should not be iterable") } if err := entries.Err(); err != nil { t.Error("Empty hash shouldn't return an error:", err) } }) } for _, mapType := range []MapType{ Array, SockMap, } { t.Run(mapType.String(), func(t *testing.T) { m := makeMap(t, mapType) entries := m.Iterate() var key string var value uint64 for entries.Next(&key, &value) { // Some empty arrays like sockmap don't return any keys. } if err := entries.Err(); err != nil { t.Error("Empty array shouldn't return an error:", err) } }) } } func TestMapIterate(t *testing.T) { hash := createMap(t, Hash, 2) data := []string{"test", "more"} slices.Sort(data) for i, k := range data { if err := hash.Put(k, uint32(i)); err != nil { t.Fatal(err) } } var key string var value uint32 var keys []string entries := hash.Iterate() for entries.Next(&key, &value) { keys = append(keys, key) } qt.Assert(t, qt.IsNil(entries.Err())) sort.Strings(keys) qt.Assert(t, qt.DeepEquals(keys, data)) } func TestIterateWrongMap(t *testing.T) { testutils.SkipOnOldKernel(t, "4.20", "map type queue") m := mustNewMap(t, &MapSpec{ Type: Queue, ValueSize: 4, MaxEntries: 2, Contents: []MapKV{ {nil, uint32(0)}, {nil, uint32(1)}, }, }, nil) var value uint32 entries := m.Iterate() qt.Assert(t, qt.IsFalse(entries.Next(nil, &value))) qt.Assert(t, qt.IsNotNil(entries.Err())) } func TestMapIteratorAllocations(t *testing.T) { arr := createMap(t, Array, 10) var k, v uint32 iter := arr.Iterate() // AllocsPerRun warms up the function for us. allocs := testing.AllocsPerRun(int(arr.MaxEntries()-1), func() { if !iter.Next(&k, &v) { t.Fatal("Next failed while iterating: %w", iter.Err()) } }) qt.Assert(t, qt.Equals(allocs, float64(0))) } func TestMapBatchLookupAllocations(t *testing.T) { testutils.SkipIfNotSupported(t, haveBatchAPI()) for _, typ := range []MapType{Array, PerCPUArray} { if typ == PerCPUArray { // https://lore.kernel.org/bpf/20210424214510.806627-2-pctammela@mojatatu.com/ testutils.SkipOnOldKernel(t, "5.13", "batched ops support for percpu array") } t.Run(typ.String(), func(t *testing.T) { m := mustNewMap(t, &MapSpec{ Name: "test", Type: typ, KeySize: 4, ValueSize: 8, // PerCPU values must be 8 byte aligned. MaxEntries: 10, }, nil) possibleCPU := 1 if m.Type().hasPerCPUValue() { possibleCPU = MustPossibleCPU() } var cursor MapBatchCursor keys := any(make([]uint32, 2)) values := any(make([]uint64, 2*possibleCPU)) // AllocsPerRun warms up the function for us. allocs := testing.AllocsPerRun(1, func() { _, err := m.BatchLookup(&cursor, keys, values, nil) if err != nil { t.Fatal(err) } }) qt.Assert(t, qt.Equals(allocs, 0)) }) } } type customTestUnmarshaler []uint8 func (c customTestUnmarshaler) UnmarshalBinary(data []byte) error { chunkSize := len(data) / len(c) for i := range len(data) / chunkSize { c[i] = data[i*chunkSize] } return nil } func TestMapBatchLookupCustomUnmarshaler(t *testing.T) { testutils.SkipIfNotSupported(t, haveBatchAPI()) m := mustNewMap(t, &MapSpec{ Type: Array, MaxEntries: 3, KeySize: 4, ValueSize: 4, Contents: []MapKV{ {uint32(0), uint32(3)}, {uint32(1), uint32(4)}, {uint32(2), uint32(5)}, }, }, nil) var ( cursor MapBatchCursor // Use data structures that result in different memory size than the // map keys and values. Otherwise their memory is used as backing // memory for the syscall directly and Unmarshal is a no-op. // Use batch size that results in partial second lookup. batchKeys = make(customTestUnmarshaler, 2) batchValues = make(customTestUnmarshaler, 2) keys []uint8 values []uint8 ) _, err := m.BatchLookup(&cursor, batchKeys, batchValues, nil) if err != nil { t.Fatal("Full batch lookup failed:", err) } keys = append(keys, batchKeys...) values = append(values, batchValues...) _, err = m.BatchLookup(&cursor, batchKeys, batchValues, nil) if !errors.Is(err, ErrKeyNotExist) { t.Fatal("Partial batch lookup doesn't return ErrKeyNotExist:", err) } keys = append(keys, batchKeys[0]) values = append(values, batchValues[0]) qt.Assert(t, qt.DeepEquals(keys, []uint8{0, 1, 2})) qt.Assert(t, qt.DeepEquals(values, []uint8{3, 4, 5})) } func TestMapIterateHashKeyOneByteFull(t *testing.T) { hash := mustNewMap(t, &MapSpec{ Type: Hash, KeySize: 1, ValueSize: 1, MaxEntries: 256, }, nil) for i := 0; i < int(hash.MaxEntries()); i++ { if err := hash.Put(uint8(i), uint8(i)); err != nil { t.Fatal(err) } } var key uint8 var value uint8 var keys int entries := hash.Iterate() for entries.Next(&key, &value) { if key != value { t.Fatalf("Expected key == value, got key %v value %v", key, value) } keys++ } if err := entries.Err(); err != nil { t.Fatal(err) } if keys != int(hash.MaxEntries()) { t.Fatalf("Expected to get %d keys, have %d", hash.MaxEntries(), keys) } } func TestMapGuessNonExistentKey(t *testing.T) { if !platform.IsLinux { t.Skip("No need to test linux quirk on", runtime.GOOS) } tests := []struct { name string mapType MapType keys []uint32 }{ { "empty", Hash, []uint32{}, }, { "all zero key", Hash, []uint32{0}, }, { "all ones key", Hash, []uint32{math.MaxUint32}, }, { "alternating bits key", Hash, []uint32{0x5555_5555}, }, { "all special patterns", Hash, []uint32{0, math.MaxUint32, 0x5555_5555}, }, { "empty", Array, []uint32{}, }, { "all zero key", Array, []uint32{0}, }, { "full", Array, []uint32{0, 1}, }, } for _, tt := range tests { t.Run(fmt.Sprintf("%s: %s", tt.mapType, tt.name), func(t *testing.T) { maxEntries := uint32(len(tt.keys)) if maxEntries == 0 { maxEntries = 1 } m := mustNewMap(t, &MapSpec{ Type: tt.mapType, KeySize: 4, ValueSize: 4, MaxEntries: maxEntries, }, nil) for _, key := range tt.keys { if err := m.Put(key, key); err != nil { t.Fatal(err) } } guess, err := m.guessNonExistentKey() if err != nil { t.Fatal(err) } if len(guess) != int(m.keySize) { t.Fatal("Guessed key has wrong size") } var value uint32 if err := m.Lookup(guess, &value); !errors.Is(err, unix.ENOENT) { t.Fatal("Doesn't return ENOENT:", err) } }) } t.Run("Hash: full", func(t *testing.T) { const n = math.MaxUint8 + 1 hash := mustNewMap(t, &MapSpec{ Type: Hash, KeySize: 1, ValueSize: 1, MaxEntries: n, }, nil) for i := 0; i < n; i++ { if err := hash.Put(uint8(i), uint8(i)); err != nil { t.Fatal(err) } } _, err := hash.guessNonExistentKey() if err == nil { t.Fatal("guessNonExistentKey doesn't return error on full hash table") } }) } func TestNotExist(t *testing.T) { hash := createMap(t, Hash, 10) var tmp uint32 err := hash.Lookup("test", &tmp) if !errors.Is(err, ErrKeyNotExist) { t.Error("Lookup doesn't return ErrKeyNotExist") } buf, err := hash.LookupBytes("test") if err != nil { t.Error("Looking up non-existent key return an error:", err) } if buf != nil { t.Error("LookupBytes returns non-nil buffer for non-existent key") } if err := hash.Delete("test"); !errors.Is(err, ErrKeyNotExist) { t.Error("Deleting unknown key doesn't return ErrKeyNotExist", err) } var k = []byte{1, 2, 3, 4} if err := hash.NextKey(&k, &tmp); !errors.Is(err, ErrKeyNotExist) { t.Error("Looking up next key in empty map doesn't return a non-existing error", err) } if err := hash.NextKey(nil, &tmp); !errors.Is(err, ErrKeyNotExist) { t.Error("Looking up next key in empty map doesn't return a non-existing error", err) } } func TestExist(t *testing.T) { hash := createMap(t, Hash, 10) if err := hash.Put("test", uint32(21)); err != nil { t.Errorf("Failed to put key/value pair into hash: %v", err) } if err := hash.Update("test", uint32(42), UpdateNoExist); !errors.Is(err, ErrKeyExist) { t.Error("Updating existing key doesn't return ErrKeyExist") } } func TestIterateMapInMap(t *testing.T) { const idx = uint32(1) parent := createMapInMap(t, ArrayOfMaps, Array) defer parent.Close() a := createMap(t, Array, 2) if err := parent.Put(idx, a); err != nil { t.Fatal(err) } var ( key uint32 m *Map entries = parent.Iterate() ) if !entries.Next(&key, &m) { t.Fatal("Iterator encountered error:", entries.Err()) } m.Close() if key != 1 { t.Error("Iterator didn't skip first entry") } if m == nil { t.Fatal("Map is nil") } } func TestPerCPUMarshaling(t *testing.T) { for _, typ := range []MapType{PerCPUHash, PerCPUArray, LRUCPUHash} { t.Run(typ.String(), func(t *testing.T) { numCPU := MustPossibleCPU() if numCPU < 2 { t.Skip("Test requires at least two CPUs") } if typ == PerCPUHash || typ == PerCPUArray { testutils.SkipOnOldKernel(t, "4.6", "per-CPU hash and array") } if typ == LRUCPUHash { testutils.SkipOnOldKernel(t, "4.10", "LRU per-CPU hash") } arr := createMap(t, typ, 1) values := []*customEncoding{ {"test"}, {"more"}, } if err := arr.Put(uint32(0), values); err != nil { t.Fatal(err) } // Make sure unmarshaling works on slices containing pointers retrievedVal := make([]*customEncoding, numCPU) if err := arr.Lookup(uint32(0), retrievedVal); err == nil { t.Fatal("Slices with nil values should generate error") } for i := range retrievedVal { retrievedVal[i] = &customEncoding{} } if err := arr.Lookup(uint32(0), retrievedVal); err != nil { t.Fatal("Can't retrieve key 0:", err) } var retrieved []*customEncoding if err := arr.Lookup(uint32(0), &retrieved); err != nil { t.Fatal("Can't retrieve key 0:", err) } for i, want := range []string{"TEST", "MORE"} { if retrieved[i] == nil { t.Error("First item is nil") } else if have := retrieved[i].data; have != want { t.Errorf("Put doesn't use BinaryMarshaler, expected %s but got %s", want, have) } } }) } } type bpfCgroupStorageKey struct { CgroupInodeId uint64 AttachType AttachType _ [4]byte // Padding } func TestCgroupPerCPUStorageMarshaling(t *testing.T) { numCPU := MustPossibleCPU() if numCPU < 2 { t.Skip("Test requires at least two CPUs") } testutils.SkipOnOldKernel(t, "5.9", "per-CPU CGoup storage with write from user space support") arr := mustNewMap(t, &MapSpec{ Type: PerCPUCGroupStorage, KeySize: uint32(unsafe.Sizeof(bpfCgroupStorageKey{})), ValueSize: uint32(unsafe.Sizeof(uint64(0))), }, nil) prog := mustNewProgram(t, &ProgramSpec{ Type: CGroupSKB, AttachType: AttachCGroupInetEgress, License: "MIT", Instructions: asm.Instructions{ asm.LoadMapPtr(asm.R1, arr.FD()), asm.Mov.Imm(asm.R2, 0), asm.FnGetLocalStorage.Call(), asm.Mov.Imm(asm.R0, 0), asm.Return(), }, }, nil) cgroup := testutils.CreateCgroup(t) progAttachAttrs := sys.ProgAttachAttr{ TargetFdOrIfindex: uint32(cgroup.Fd()), AttachBpfFd: uint32(prog.FD()), AttachType: uint32(AttachCGroupInetEgress), AttachFlags: 0, ReplaceBpfFd: 0, } err := sys.ProgAttach(&progAttachAttrs) if err != nil { t.Fatal(err) } defer func() { attr := sys.ProgDetachAttr{ TargetFdOrIfindex: uint32(cgroup.Fd()), AttachBpfFd: uint32(prog.FD()), AttachType: uint32(AttachCGroupInetEgress), } if err := sys.ProgDetach(&attr); err != nil { t.Fatal(err) } }() var mapKey = &bpfCgroupStorageKey{ CgroupInodeId: testutils.GetCgroupIno(t, cgroup), AttachType: AttachCGroupInetEgress, } values := []uint64{1, 2} if err := arr.Put(mapKey, values); err != nil { t.Fatalf("Can't set cgroup %s storage: %s", cgroup.Name(), err) } var retrieved []uint64 if err := arr.Lookup(mapKey, &retrieved); err != nil { t.Fatalf("Can't retrieve cgroup %s storage: %s", cgroup.Name(), err) } for i, want := range []uint64{1, 2} { if retrieved[i] == 0 { t.Errorf("Item %d is 0", i) } else if have := retrieved[i]; have != want { t.Errorf("PerCPUCGroupStorage map is not correctly unmarshaled, expected %d but got %d", want, have) } } } func TestMapMarshalUnsafe(t *testing.T) { m := createMap(t, Hash, 1) key := uint32(1) value := uint32(42) if err := m.Put(unsafe.Pointer(&key), unsafe.Pointer(&value)); err != nil { t.Fatal(err) } var res uint32 if err := m.Lookup(unsafe.Pointer(&key), unsafe.Pointer(&res)); err != nil { t.Fatal("Can't get item:", err) } var sum uint32 iter := m.Iterate() for iter.Next(&key, unsafe.Pointer(&res)) { sum += res } if err := iter.Err(); err != nil { t.Fatal(err) } if res != 42 { t.Fatalf("Expected 42, got %d", res) } iter = m.Iterate() iter.Next(unsafe.Pointer(&key), &res) if err := iter.Err(); err != nil { t.Error(err) } if key != 1 { t.Errorf("Expected key 1, got %d", key) } if err := m.Delete(unsafe.Pointer(&key)); err != nil { t.Fatal("Can't delete:", err) } } func TestMapName(t *testing.T) { testutils.SkipIfNotSupported(t, haveObjName()) m := mustNewMap(t, &MapSpec{ Name: "test!123", Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 1, }, nil) var info sys.MapInfo if err := sys.ObjInfo(m.fd, &info); err != nil { t.Fatal(err) } name := unix.ByteSliceToString(info.Name[:]) qt.Assert(t, qt.Equals(name, "test123")) } func TestMapFromFD(t *testing.T) { m := createMap(t, Array, 2) if err := m.Put(uint32(0), uint32(123)); err != nil { t.Fatal(err) } // If you're thinking about copying this, don't. Use // Clone() instead. m2, err := NewMapFromFD(testutils.DupFD(t, m.FD())) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer m2.Close() var val uint32 if err := m2.Lookup(uint32(0), &val); err != nil { t.Fatal("Can't look up key:", err) } if val != 123 { t.Error("Wrong value") } } func TestMapContents(t *testing.T) { spec := &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 2, Contents: []MapKV{ {uint32(0), uint32(23)}, {uint32(1), uint32(42)}, }, } m := mustNewMap(t, spec, nil) var value uint32 if err := m.Lookup(uint32(0), &value); err != nil { t.Error("Can't look up key 0:", err) } else if value != 23 { t.Errorf("Incorrect value for key 0, expected 23, have %d", value) } if err := m.Lookup(uint32(1), &value); err != nil { t.Error("Can't look up key 1:", err) } else if value != 42 { t.Errorf("Incorrect value for key 0, expected 23, have %d", value) } spec.Contents = []MapKV{ // Key is larger than MaxEntries {uint32(14), uint32(0)}, } // Invalid contents should be rejected _, err := newMap(t, spec, nil) qt.Assert(t, qt.IsNotNil(err)) } func TestMapFreeze(t *testing.T) { arr := createMap(t, Array, 2) err := arr.Freeze() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Can't freeze map:", err) } if err := arr.Put(uint32(0), uint32(1)); err == nil { t.Error("Freeze doesn't prevent modification from user space") } info, err := arr.Info() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(info.Frozen())) } func TestMapGetNextID(t *testing.T) { testutils.SkipOnOldKernel(t, "4.13", "bpf_map_get_next_id") var next MapID var err error // Ensure there is at least one map on the system. _ = createMap(t, Hash, 10) if next, err = MapGetNextID(MapID(0)); err != nil { t.Fatal("Can't get next ID:", err) } if next == MapID(0) { t.Fatal("Expected next ID other than 0") } // As there can be multiple eBPF maps, we loop over all of them and // make sure, the IDs increase and the last call will return ErrNotExist for { last := next if next, err = MapGetNextID(last); err != nil { if !errors.Is(err, os.ErrNotExist) { t.Fatal("Expected ErrNotExist, got:", err) } break } if next <= last { t.Fatalf("Expected next ID (%d) to be higher than the last ID (%d)", next, last) } } } func TestNewMapFromID(t *testing.T) { hash := createMap(t, Hash, 10) info, err := hash.Info() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Couldn't get map info:", err) } id, ok := info.ID() if !ok { t.Skip("Map ID not supported") } hash2, err := NewMapFromID(id) if err != nil { t.Fatalf("Can't get map for ID %d: %v", id, err) } hash2.Close() // As there can be multiple maps, we use max(uint32) as MapID to trigger an expected error. _, err = NewMapFromID(MapID(math.MaxUint32)) if !errors.Is(err, os.ErrNotExist) { t.Fatal("Expected ErrNotExist, got:", err) } } func TestMapPinning(t *testing.T) { tmp := testutils.TempBPFFS(t) spec := &MapSpec{ Name: "test", Type: Hash, KeySize: 4, ValueSize: 4, MaxEntries: 1, Pinning: PinByName, } m1 := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) pinned := m1.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) m1Info, err := m1.Info() qt.Assert(t, qt.IsNil(err)) if err := m1.Put(uint32(0), uint32(42)); err != nil { t.Fatal("Can't write value:", err) } m2 := mustNewMap(t, spec, &MapOptions{PinPath: tmp}) m2Info, err := m2.Info() qt.Assert(t, qt.IsNil(err)) if m1ID, ok := m1Info.ID(); ok { m2ID, _ := m2Info.ID() qt.Assert(t, qt.Equals(m2ID, m1ID)) } var value uint32 if err := m2.Lookup(uint32(0), &value); err != nil { t.Fatal("Can't read from map:", err) } if value != 42 { t.Fatal("Pinning doesn't use pinned maps") } spec.KeySize = 8 spec.ValueSize = 8 _, err = newMap(t, spec, &MapOptions{PinPath: tmp}) if err == nil { t.Fatalf("Opening a pinned map with a mismatching spec did not fail") } if !errors.Is(err, ErrMapIncompatible) { t.Fatalf("Opening a pinned map with a mismatching spec failed with the wrong error") } // Check if error string mentions both KeySize and ValueSize. qt.Assert(t, qt.StringContains(err.Error(), "KeySize")) qt.Assert(t, qt.StringContains(err.Error(), "ValueSize")) } func TestMapHandle(t *testing.T) { kv := &btf.Int{Size: 4} m := mustNewMap(t, &MapSpec{ Type: Hash, KeySize: kv.Size, ValueSize: kv.Size, Key: kv, Value: kv, MaxEntries: 1, }, nil) h, err := m.Handle() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(h)) defer h.Close() spec, err := h.Spec(nil) qt.Assert(t, qt.IsNil(err)) typ, err := spec.TypeByID(1) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.ContentEquals(typ, btf.Type(kv))) } func TestPerfEventArrayCompatible(t *testing.T) { ms := &MapSpec{ Type: PerfEventArray, } m := mustNewMap(t, ms, nil) qt.Assert(t, qt.IsNil(ms.Compatible(m))) ms.MaxEntries = m.MaxEntries() - 1 qt.Assert(t, qt.IsNotNil(ms.Compatible(m))) } func TestLoadWrongPin(t *testing.T) { p := createBasicProgram(t) m := createMap(t, Hash, 10) tmp := testutils.TempBPFFS(t) ppath := filepath.Join(tmp, "prog") mpath := filepath.Join(tmp, "map") qt.Assert(t, qt.IsNil(m.Pin(mpath))) qt.Assert(t, qt.IsNil(p.Pin(ppath))) t.Run("Program", func(t *testing.T) { lp, err := LoadPinnedProgram(ppath, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(lp.Close())) _, err = LoadPinnedProgram(mpath, nil) qt.Assert(t, qt.IsNotNil(err)) }) t.Run("Map", func(t *testing.T) { lm, err := LoadPinnedMap(mpath, nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(lm.Close())) _, err = LoadPinnedMap(ppath, nil) qt.Assert(t, qt.IsNotNil(err)) }) } type benchValue struct { ID uint32 Val16 uint16 Val16_2 uint16 Name [8]byte LID uint64 } type customBenchValue benchValue func (cbv *customBenchValue) UnmarshalBinary(buf []byte) error { cbv.ID = internal.NativeEndian.Uint32(buf) cbv.Val16 = internal.NativeEndian.Uint16(buf[4:]) cbv.Val16_2 = internal.NativeEndian.Uint16(buf[6:]) copy(cbv.Name[:], buf[8:]) cbv.LID = internal.NativeEndian.Uint64(buf[16:]) return nil } func (cbv *customBenchValue) MarshalBinary() ([]byte, error) { buf := make([]byte, 24) internal.NativeEndian.PutUint32(buf, cbv.ID) internal.NativeEndian.PutUint16(buf[4:], cbv.Val16) internal.NativeEndian.PutUint16(buf[6:], cbv.Val16_2) copy(buf[8:], cbv.Name[:]) internal.NativeEndian.PutUint64(buf[16:], cbv.LID) return buf, nil } type benchKey struct { id uint64 } func (bk *benchKey) MarshalBinary() ([]byte, error) { buf := make([]byte, 8) internal.NativeEndian.PutUint64(buf, bk.id) return buf, nil } func BenchmarkMarshaling(b *testing.B) { newMap := func(valueSize uint32) *Map { return mustNewMap(b, &MapSpec{ Type: Hash, KeySize: 8, ValueSize: valueSize, MaxEntries: 1, }, nil) } key := uint64(0) m := newMap(24) if err := m.Put(key, benchValue{}); err != nil { b.Fatal(err) } b.Cleanup(func() { m.Close() }) b.Run("ValueUnmarshalReflect", func(b *testing.B) { b.ReportAllocs() var value benchValue for b.Loop() { err := m.Lookup(unsafe.Pointer(&key), &value) if err != nil { b.Fatal("Can't get key:", err) } } }) b.Run("KeyMarshalReflect", func(b *testing.B) { b.ReportAllocs() var value benchValue for b.Loop() { err := m.Lookup(&key, unsafe.Pointer(&value)) if err != nil { b.Fatal("Can't get key:", err) } } }) b.Run("ValueBinaryUnmarshaler", func(b *testing.B) { b.ReportAllocs() var value customBenchValue for b.Loop() { err := m.Lookup(unsafe.Pointer(&key), &value) if err != nil { b.Fatal("Can't get key:", err) } } }) b.Run("KeyBinaryMarshaler", func(b *testing.B) { b.ReportAllocs() var key benchKey var value customBenchValue for b.Loop() { err := m.Lookup(&key, unsafe.Pointer(&value)) if err != nil { b.Fatal("Can't get key:", err) } } }) b.Run("KeyValueUnsafe", func(b *testing.B) { b.ReportAllocs() var value benchValue for b.Loop() { err := m.Lookup(unsafe.Pointer(&key), unsafe.Pointer(&value)) if err != nil { b.Fatal("Can't get key:", err) } } }) } func BenchmarkPerCPUMarshalling(b *testing.B) { key := uint64(1) val := make([]uint64, MustPossibleCPU()) for i := range val { val[i] = uint64(i) } m := mustNewMap(b, &MapSpec{ Type: PerCPUHash, KeySize: 8, ValueSize: 8, MaxEntries: 1, }, nil) if err := m.Put(key, val[0:]); err != nil { b.Fatal(err) } b.Run("reflection", func(b *testing.B) { b.ReportAllocs() var value []uint64 for b.Loop() { err := m.Lookup(unsafe.Pointer(&key), &value) if err != nil { b.Fatal("Can't get key:", err) } } }) } func BenchmarkMap(b *testing.B) { m := createMap(b, Hash, 1) if err := m.Put(uint32(0), uint32(42)); err != nil { b.Fatal(err) } b.Run("Lookup", func(b *testing.B) { var key, value uint32 b.ReportAllocs() for b.Loop() { err := m.Lookup(unsafe.Pointer(&key), unsafe.Pointer(&value)) if err != nil { b.Fatal(err) } } }) b.Run("Update", func(b *testing.B) { var key, value uint32 b.ReportAllocs() for b.Loop() { err := m.Update(unsafe.Pointer(&key), unsafe.Pointer(&value), UpdateAny) if err != nil { b.Fatal(err) } } }) b.Run("NextKey", func(b *testing.B) { var key uint32 b.ReportAllocs() for b.Loop() { err := m.NextKey(nil, unsafe.Pointer(&key)) if err != nil { b.Fatal(err) } } }) b.Run("Delete", func(b *testing.B) { var key uint32 b.ReportAllocs() for b.Loop() { err := m.Delete(unsafe.Pointer(&key)) if err != nil && !errors.Is(err, ErrKeyNotExist) { b.Fatal(err) } } }) } func BenchmarkIterate(b *testing.B) { for _, mt := range []MapType{Hash, PerCPUHash} { m := mustNewMap(b, &MapSpec{ Type: mt, KeySize: 8, ValueSize: 8, MaxEntries: 1000, }, nil) possibleCPU := 1 if m.Type().hasPerCPUValue() { possibleCPU = MustPossibleCPU() } var ( n = m.MaxEntries() keys = make([]uint64, n) values = make([]uint64, n*uint32(possibleCPU)) ) for i := 0; uint32(i) < n; i++ { keys[i] = uint64(i) for j := 0; j < possibleCPU; j++ { values[i] = uint64((i * possibleCPU) + j) } } _, err := m.BatchUpdate(keys, values, nil) testutils.SkipIfNotSupported(b, err) qt.Assert(b, qt.IsNil(err)) b.Run(m.Type().String(), func(b *testing.B) { b.Run("MapIterator", func(b *testing.B) { var k uint64 v := make([]uint64, possibleCPU) b.ReportAllocs() for b.Loop() { iter := m.Iterate() for iter.Next(&k, v) { continue } if err := iter.Err(); err != nil { b.Fatal(err) } } }) b.Run("MapIteratorDelete", func(b *testing.B) { var k uint64 v := make([]uint64, possibleCPU) b.ReportAllocs() for b.Loop() { b.StopTimer() if _, err := m.BatchUpdate(keys, values, nil); err != nil { b.Fatal(err) } b.StartTimer() iter := m.Iterate() for iter.Next(&k, &v) { if err := m.Delete(&k); err != nil { b.Fatal(err) } } if err := iter.Err(); err != nil { b.Fatal(err) } } }) b.Run("BatchLookup", func(b *testing.B) { k := make([]uint64, m.MaxEntries()) v := make([]uint64, m.MaxEntries()*uint32(possibleCPU)) b.ReportAllocs() for b.Loop() { var cursor MapBatchCursor for { _, err := m.BatchLookup(&cursor, k, v, nil) if errors.Is(err, ErrKeyNotExist) { break } if err != nil { b.Fatal(err) } } } }) b.Run("BatchLookupAndDelete", func(b *testing.B) { k := make([]uint64, m.MaxEntries()) v := make([]uint64, m.MaxEntries()*uint32(possibleCPU)) b.ReportAllocs() for b.Loop() { b.StopTimer() if _, err := m.BatchUpdate(keys, values, nil); err != nil { b.Fatal(err) } b.StartTimer() var cursor MapBatchCursor for { _, err := m.BatchLookupAndDelete(&cursor, k, v, nil) if errors.Is(err, ErrKeyNotExist) { break } if err != nil { b.Fatal(err) } } } }) b.Run("BatchDelete", func(b *testing.B) { b.ReportAllocs() for b.Loop() { b.StopTimer() if _, err := m.BatchUpdate(keys, values, nil); err != nil { b.Fatal(err) } b.StartTimer() if _, err := m.BatchDelete(keys, nil); err != nil { b.Fatal(err) } } }) }) } } // Per CPU maps store a distinct value for each CPU. They are useful // to collect metrics. func ExampleMap_perCPU() { arr, err := NewMap(&MapSpec{ Type: PerCPUArray, KeySize: 4, ValueSize: 4, MaxEntries: 2, }) if err != nil { panic(err) } defer arr.Close() possibleCPUs := MustPossibleCPU() perCPUValues := map[uint32]uint32{ 0: 4, 1: 5, } for k, v := range perCPUValues { // We set each perCPU slots to the same value. values := make([]uint32, possibleCPUs) for i := range values { values[i] = v } if err := arr.Put(k, values); err != nil { panic(err) } } for k := 0; k < 2; k++ { var values []uint32 if err := arr.Lookup(uint32(k), &values); err != nil { panic(err) } // Note we will print an unexpected message if this is not true. fmt.Printf("Value of key %v on all CPUs: %v\n", k, values[0]) } var ( key uint32 entries = arr.Iterate() ) var values []uint32 for entries.Next(&key, &values) { expected, ok := perCPUValues[key] if !ok { fmt.Printf("Unexpected key %v\n", key) continue } for i, n := range values { if n != expected { fmt.Printf("Key %v, Value for cpu %v is %v not %v\n", key, i, n, expected) } } } if err := entries.Err(); err != nil { panic(err) } } // It is possible to use unsafe.Pointer to avoid marshalling // and copy overhead. It is the responsibility of the caller to ensure // the correct size of unsafe.Pointers. // // Note that using unsafe.Pointer is only marginally faster than // implementing Marshaler on the type. func ExampleMap_zeroCopy() { hash, err := NewMap(&MapSpec{ Type: Hash, KeySize: 5, ValueSize: 4, MaxEntries: 10, }) if err != nil { panic(err) } defer hash.Close() key := [5]byte{'h', 'e', 'l', 'l', 'o'} value := uint32(23) if err := hash.Put(unsafe.Pointer(&key), unsafe.Pointer(&value)); err != nil { panic(err) } value = 0 if err := hash.Lookup(unsafe.Pointer(&key), unsafe.Pointer(&value)); err != nil { panic("can't get value:" + err.Error()) } fmt.Printf("The value is: %d\n", value) } func ExampleMap_NextKey() { hash, err := NewMap(&MapSpec{ Type: Hash, KeySize: 5, ValueSize: 4, MaxEntries: 10, Contents: []MapKV{ {"hello", uint32(21)}, {"world", uint32(42)}, }, }) if err != nil { panic(err) } defer hash.Close() var cur, next string var keys []string for err = hash.NextKey(nil, &next); ; err = hash.NextKey(cur, &next) { if errors.Is(err, ErrKeyNotExist) { break } if err != nil { panic(err) } keys = append(keys, next) cur = next } // Order of keys is non-deterministic due to randomized map seed sort.Strings(keys) fmt.Printf("Keys are %v\n", keys) } // ExampleMap_Iterate demonstrates how to iterate over all entries // in a map. func ExampleMap_Iterate() { hash, err := NewMap(&MapSpec{ Type: Hash, KeySize: 5, ValueSize: 4, MaxEntries: 10, Contents: []MapKV{ {"hello", uint32(21)}, {"world", uint32(42)}, }, }) if err != nil { panic(err) } defer hash.Close() var ( key string value uint32 entries = hash.Iterate() ) values := make(map[string]uint32) for entries.Next(&key, &value) { // Order of keys is non-deterministic due to randomized map seed values[key] = value } if err := entries.Err(); err != nil { panic(fmt.Sprint("Iterator encountered an error:", err)) } for k, v := range values { fmt.Printf("key: %s, value: %d\n", k, v) } } // It is possible to iterate nested maps and program arrays by // unmarshaling into a *Map or *Program. func ExampleMap_Iterate_nestedMapsAndProgramArrays() { inner := &MapSpec{ Type: Array, KeySize: 4, ValueSize: 4, MaxEntries: 2, Contents: []MapKV{ {uint32(0), uint32(1)}, {uint32(1), uint32(2)}, }, } im, err := NewMap(inner) if err != nil { panic(err) } defer im.Close() outer := &MapSpec{ Type: ArrayOfMaps, InnerMap: inner, KeySize: 4, ValueSize: 4, MaxEntries: 10, Contents: []MapKV{ {uint32(0), im}, }, } arrayOfMaps, err := NewMap(outer) if errors.Is(err, internal.ErrNotSupported) { // Fake the output if on very old kernel. fmt.Println("outerKey: 0") fmt.Println("\tinnerKey 0 innerValue 1") fmt.Println("\tinnerKey 1 innerValue 2") return } if err != nil { panic(err) } defer arrayOfMaps.Close() var ( key uint32 m *Map entries = arrayOfMaps.Iterate() ) for entries.Next(&key, &m) { // Make sure that the iterated map is closed after // we are done. defer m.Close() // Order of keys is non-deterministic due to randomized map seed fmt.Printf("outerKey: %v\n", key) var innerKey, innerValue uint32 items := m.Iterate() for items.Next(&innerKey, &innerValue) { fmt.Printf("\tinnerKey %v innerValue %v\n", innerKey, innerValue) } if err := items.Err(); err != nil { panic(fmt.Sprint("Inner Iterator encountered an error:", err)) } } if err := entries.Err(); err != nil { panic(fmt.Sprint("Iterator encountered an error:", err)) } } ================================================ FILE: marshaler_example_test.go ================================================ package ebpf import ( "encoding" "fmt" "strings" ) // Assert that customEncoding implements the correct interfaces. var ( _ encoding.BinaryMarshaler = (*customEncoding)(nil) _ encoding.BinaryUnmarshaler = (*customEncoding)(nil) ) type customEncoding struct { data string } func (ce *customEncoding) MarshalBinary() ([]byte, error) { return []byte(strings.ToUpper(ce.data)), nil } func (ce *customEncoding) UnmarshalBinary(buf []byte) error { ce.data = string(buf) return nil } // ExampleMarshaler shows how to use custom encoding with map methods. func Example_customMarshaler() { hash, err := NewMap(&MapSpec{ Type: Hash, KeySize: 5, ValueSize: 4, MaxEntries: 10, }) if err != nil { panic(err) } defer hash.Close() if err := hash.Put(&customEncoding{"hello"}, uint32(111)); err != nil { panic(err) } var ( key customEncoding value uint32 entries = hash.Iterate() ) for entries.Next(&key, &value) { fmt.Printf("key: %s, value: %d\n", key.data, value) } if err := entries.Err(); err != nil { panic(err) } } ================================================ FILE: marshalers.go ================================================ package ebpf import ( "encoding" "errors" "fmt" "reflect" "slices" "unsafe" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/sysenc" ) // marshalMapSyscallInput converts an arbitrary value into a pointer suitable // to be passed to the kernel. // // As an optimization, it returns the original value if it is an // unsafe.Pointer. func marshalMapSyscallInput(data any, length int) (sys.Pointer, error) { if ptr, ok := data.(unsafe.Pointer); ok { return sys.UnsafePointer(ptr), nil } buf, err := sysenc.Marshal(data, length) if err != nil { return sys.Pointer{}, err } return buf.Pointer(), nil } func makeMapSyscallOutput(dst any, length int) sysenc.Buffer { if ptr, ok := dst.(unsafe.Pointer); ok { return sysenc.UnsafeBuffer(ptr) } _, ok := dst.(encoding.BinaryUnmarshaler) if ok { return sysenc.SyscallOutput(nil, length) } return sysenc.SyscallOutput(dst, length) } // appendPerCPUSlice encodes a slice containing one value per // possible CPU into a buffer of bytes. // // Values are initialized to zero if the slice has less elements than CPUs. func appendPerCPUSlice(buf []byte, slice any, possibleCPUs, elemLength, alignedElemLength int) ([]byte, error) { sliceType := reflect.TypeOf(slice) if sliceType.Kind() != reflect.Slice { return nil, errors.New("per-CPU value requires slice") } sliceValue := reflect.ValueOf(slice) sliceLen := sliceValue.Len() if sliceLen > possibleCPUs { return nil, fmt.Errorf("per-CPU value greater than number of CPUs") } // Grow increases the slice's capacity, _if_necessary_ buf = slices.Grow(buf, alignedElemLength*possibleCPUs) for i := 0; i < sliceLen; i++ { elem := sliceValue.Index(i).Interface() elemBytes, err := sysenc.Marshal(elem, elemLength) if err != nil { return nil, err } buf = elemBytes.AppendTo(buf) buf = append(buf, make([]byte, alignedElemLength-elemLength)...) } // Ensure buf is zero-padded full size. buf = append(buf, make([]byte, (possibleCPUs-sliceLen)*alignedElemLength)...) return buf, nil } // marshalPerCPUValue encodes a slice containing one value per // possible CPU into a buffer of bytes. // // Values are initialized to zero if the slice has less elements than CPUs. func marshalPerCPUValue(slice any, elemLength int) (sys.Pointer, error) { possibleCPUs, err := PossibleCPU() if err != nil { return sys.Pointer{}, err } alignedElemLength := internal.Align(elemLength, 8) buf := make([]byte, 0, alignedElemLength*possibleCPUs) buf, err = appendPerCPUSlice(buf, slice, possibleCPUs, elemLength, alignedElemLength) if err != nil { return sys.Pointer{}, err } return sys.UnsafeSlicePointer(buf), nil } // marshalBatchPerCPUValue encodes a batch-sized slice of slices containing // one value per possible CPU into a buffer of bytes. func marshalBatchPerCPUValue(slice any, batchLen, elemLength int) ([]byte, error) { sliceType := reflect.TypeOf(slice) if sliceType.Kind() != reflect.Slice { return nil, fmt.Errorf("batch value requires a slice") } sliceValue := reflect.ValueOf(slice) possibleCPUs, err := PossibleCPU() if err != nil { return nil, err } if sliceValue.Len() != batchLen*possibleCPUs { return nil, fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", batchLen*possibleCPUs, sliceValue.Len()) } alignedElemLength := internal.Align(elemLength, 8) buf := make([]byte, 0, batchLen*alignedElemLength*possibleCPUs) for i := 0; i < batchLen; i++ { batch := sliceValue.Slice(i*possibleCPUs, (i+1)*possibleCPUs).Interface() buf, err = appendPerCPUSlice(buf, batch, possibleCPUs, elemLength, alignedElemLength) if err != nil { return nil, fmt.Errorf("batch %d: %w", i, err) } } return buf, nil } // unmarshalPerCPUValue decodes a buffer into a slice containing one value per // possible CPU. // // slice must be a literal slice and not a pointer. func unmarshalPerCPUValue(slice any, elemLength int, buf []byte) error { sliceType := reflect.TypeOf(slice) if sliceType.Kind() != reflect.Slice { return fmt.Errorf("per-CPU value requires a slice") } possibleCPUs, err := PossibleCPU() if err != nil { return err } sliceValue := reflect.ValueOf(slice) if sliceValue.Len() != possibleCPUs { return fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", possibleCPUs, sliceValue.Len()) } sliceElemType := sliceType.Elem() sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr stride := internal.Align(elemLength, 8) for i := 0; i < possibleCPUs; i++ { var elem any v := sliceValue.Index(i) if sliceElemIsPointer { if !v.Elem().CanAddr() { return fmt.Errorf("per-CPU slice elements cannot be nil") } elem = v.Elem().Addr().Interface() } else { elem = v.Addr().Interface() } err := sysenc.Unmarshal(elem, buf[:elemLength]) if err != nil { return fmt.Errorf("cpu %d: %w", i, err) } buf = buf[stride:] } return nil } // unmarshalBatchPerCPUValue decodes a buffer into a batch-sized slice // containing one value per possible CPU. // // slice must have length batchLen * PossibleCPUs(). func unmarshalBatchPerCPUValue(slice any, batchLen, elemLength int, buf []byte) error { sliceType := reflect.TypeOf(slice) if sliceType.Kind() != reflect.Slice { return fmt.Errorf("batch requires a slice") } sliceValue := reflect.ValueOf(slice) possibleCPUs, err := PossibleCPU() if err != nil { return err } if sliceValue.Len() != batchLen*possibleCPUs { return fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", sliceValue.Len(), batchLen*possibleCPUs) } fullValueSize := possibleCPUs * internal.Align(elemLength, 8) if len(buf) != batchLen*fullValueSize { return fmt.Errorf("input buffer has incorrect length, expected %d, got %d", len(buf), batchLen*fullValueSize) } for i := 0; i < batchLen; i++ { elem := sliceValue.Slice(i*possibleCPUs, (i+1)*possibleCPUs).Interface() if err := unmarshalPerCPUValue(elem, elemLength, buf[:fullValueSize]); err != nil { return fmt.Errorf("batch %d: %w", i, err) } buf = buf[fullValueSize:] } return nil } ================================================ FILE: marshalers_test.go ================================================ package ebpf import ( "testing" "github.com/cilium/ebpf/internal" "github.com/go-quicktest/qt" ) func TestMarshalUnmarshalBatchPerCPUValue(t *testing.T) { const ( batchLen = 3 elemLength = 4 ) possibleCPU := MustPossibleCPU() sliceLen := batchLen * possibleCPU slice := makeFilledSlice(sliceLen) buf, err := marshalBatchPerCPUValue(slice, batchLen, elemLength) if err != nil { t.Fatal(err) } output := make([]uint32, sliceLen) err = unmarshalBatchPerCPUValue(output, batchLen, elemLength, buf) if err != nil { t.Fatal(err) } qt.Assert(t, qt.DeepEquals(output, slice)) } func TestMarshalBatchPerCPUValue(t *testing.T) { const ( batchLen = 3 elemLength = 4 ) possibleCPU := MustPossibleCPU() sliceLen := batchLen * possibleCPU slice := makeFilledSlice(sliceLen) expected := make([]byte, sliceLen*internal.Align(elemLength, 8)) b := expected for _, elem := range slice { internal.NativeEndian.PutUint32(b, elem) b = b[8:] } buf, err := marshalBatchPerCPUValue(slice, batchLen, elemLength) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(buf, expected)) tooSmall := slice[:len(slice)-1] buf, err = marshalBatchPerCPUValue(tooSmall, batchLen, elemLength) qt.Assert(t, qt.IsNotNil(err)) qt.Assert(t, qt.HasLen(buf, 0)) tooBig := append(slice, 0) buf, err = marshalBatchPerCPUValue(tooBig, batchLen, elemLength) qt.Assert(t, qt.IsNotNil(err)) qt.Assert(t, qt.HasLen(buf, 0)) } func TestUnmarshalBatchPerCPUValue(t *testing.T) { const ( batchLen = 3 elemLength = 4 ) possibleCPU := MustPossibleCPU() outputLen := batchLen * possibleCPU output := make([]uint32, outputLen) expected := makeFilledSlice(batchLen * possibleCPU) buf := make([]byte, batchLen*possibleCPU*internal.Align(elemLength, 8)) b := buf for _, elem := range expected { internal.NativeEndian.PutUint32(b, elem) b = b[8:] } err := unmarshalBatchPerCPUValue(output, batchLen, elemLength, buf) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.DeepEquals(output, expected)) tooSmall := make([]uint32, outputLen-1) err = unmarshalBatchPerCPUValue(tooSmall, batchLen, elemLength, buf) qt.Assert(t, qt.IsNotNil(err)) tooBig := make([]uint32, outputLen+1) err = unmarshalBatchPerCPUValue(tooBig, batchLen, elemLength, buf) qt.Assert(t, qt.IsNotNil(err)) empty := make([]uint32, outputLen) tooSmallBuf := buf[:len(buf)-1] err = unmarshalBatchPerCPUValue(empty, batchLen, elemLength, tooSmallBuf) qt.Assert(t, qt.IsNotNil(err)) tooBigBuf := append(buf, 0) err = unmarshalBatchPerCPUValue(empty, batchLen, elemLength, tooBigBuf) qt.Assert(t, qt.IsNotNil(err)) } func TestUnmarshalPerCPUValue(t *testing.T) { possibleCPUs := MustPossibleCPU() expected := make([]uint32, possibleCPUs) for i := 0; i < possibleCPUs; i++ { expected[i] = uint32(1021 * (i + 1)) } elemLength := 4 buf := make([]byte, possibleCPUs*internal.Align(elemLength, 8)) b := buf for _, elem := range expected { internal.NativeEndian.PutUint32(b, elem) b = b[8:] } slice := make([]uint32, possibleCPUs) err := unmarshalPerCPUValue(slice, elemLength, buf) if err != nil { t.Fatal(err) } qt.Assert(t, qt.DeepEquals(slice, expected)) smallSlice := make([]uint32, possibleCPUs-1) qt.Assert(t, qt.IsNotNil(unmarshalPerCPUValue(smallSlice, elemLength, buf))) nilElemSlice := make([]*uint32, possibleCPUs) qt.Assert(t, qt.IsNotNil(unmarshalPerCPUValue(nilElemSlice, elemLength, buf))) } func makeFilledSlice(len int) []uint32 { slice := make([]uint32, len) for i := range slice { slice[i] = uint32(1021 * (i + 1)) } return slice } ================================================ FILE: memory.go ================================================ package ebpf import ( "errors" "fmt" "io" "runtime" "github.com/cilium/ebpf/internal/unix" ) // Memory is the building block for accessing the memory of specific bpf map // types (Array and Arena at the time of writing) without going through the bpf // syscall interface. // // Given the fd of a bpf map created with the BPF_F_MMAPABLE flag, a shared // 'file'-based memory-mapped region can be allocated in the process' address // space, exposing the bpf map's memory by simply accessing a memory location. var ErrReadOnly = errors.New("resource is read-only") // Memory implements accessing a Map's memory without making any syscalls. // Pay attention to the difference between Go and C struct alignment rules. Use // [structs.HostLayout] on supported Go versions to help with alignment. // // Note on memory coherence: avoid using packed structs in memory shared between // user space and eBPF C programs. This drops a struct's memory alignment to 1, // forcing the compiler to use single-byte loads and stores for field accesses. // This may lead to partially-written data to be observed from user space. // // On most architectures, the memmove implementation used by Go's copy() will // access data in word-sized chunks. If paired with a matching access pattern on // the eBPF C side (and if using default memory alignment), accessing shared // memory without atomics or other synchronization primitives should be sound // for individual values. For accesses beyond a single value, the usual // concurrent programming rules apply. type Memory struct { b []byte ro bool heap bool cleanup runtime.Cleanup } func newMemory(fd, size int) (*Memory, error) { // Typically, maps created with BPF_F_RDONLY_PROG remain writable from user // space until frozen. As a security precaution, the kernel doesn't allow // mapping bpf map memory as read-write into user space if the bpf map was // frozen, or if it was created using the RDONLY_PROG flag. // // The user would be able to write to the map after freezing (since the kernel // can't change the protection mode of an already-mapped page), while the // verifier assumes the contents to be immutable. b, err := unix.Mmap(fd, 0, size, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) // If the map is frozen when an rw mapping is requested, expect EPERM. If the // map was created with BPF_F_RDONLY_PROG, expect EACCES. var ro bool if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EACCES) { ro = true b, err = unix.Mmap(fd, 0, size, unix.PROT_READ, unix.MAP_SHARED) } if err != nil { return nil, fmt.Errorf("setting up memory-mapped region: %w", err) } mm := &Memory{b: b, ro: ro, heap: false} mm.cleanup = runtime.AddCleanup(mm, memoryCleanupFunc(), b) return mm, nil } func memoryCleanupFunc() func([]byte) { return func(b []byte) { if err := unix.Munmap(b); err != nil { panic(fmt.Errorf("unmapping memory: %w", err)) } } } func (mm *Memory) close() { mm.cleanup.Stop() memoryCleanupFunc()(mm.b) mm.b = nil } // Size returns the size of the memory-mapped region in bytes. func (mm *Memory) Size() uint32 { return uint32(len(mm.b)) } // ReadOnly returns true if the memory-mapped region is read-only. func (mm *Memory) ReadOnly() bool { return mm.ro } // bounds returns true if an access at off of the given size is within bounds. func (mm *Memory) bounds(off, size uint32) bool { if off+size < off { return false } return off+size <= uint32(len(mm.b)) } // ReadAt implements [io.ReaderAt]. Useful for creating a new [io.OffsetWriter]. // // See [Memory] for details around memory coherence. func (mm *Memory) ReadAt(p []byte, off int64) (int, error) { if mm.b == nil { return 0, fmt.Errorf("memory-mapped region closed") } if p == nil { return 0, fmt.Errorf("input buffer p is nil") } if off < 0 || off >= int64(len(mm.b)) { return 0, fmt.Errorf("read offset out of range") } n := copy(p, mm.b[off:]) if n < len(p) { return n, io.EOF } return n, nil } // WriteAt implements [io.WriterAt]. Useful for creating a new // [io.SectionReader]. // // See [Memory] for details around memory coherence. func (mm *Memory) WriteAt(p []byte, off int64) (int, error) { if mm.b == nil { return 0, fmt.Errorf("memory-mapped region closed") } if mm.ro { return 0, fmt.Errorf("memory-mapped region not writable: %w", ErrReadOnly) } if p == nil { return 0, fmt.Errorf("output buffer p is nil") } if off < 0 || off >= int64(len(mm.b)) { return 0, fmt.Errorf("write offset out of range") } n := copy(mm.b[off:], p) if n < len(p) { return n, io.EOF } return n, nil } ================================================ FILE: memory_test.go ================================================ package ebpf import ( "io" "math" "os" "runtime" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) func mustMmapableArray(tb testing.TB, extraFlags uint32) *Map { tb.Helper() m, err := newMap(tb, &MapSpec{ Name: "ebpf_mmap", Type: Array, KeySize: 4, ValueSize: 8, MaxEntries: 8, Flags: sys.BPF_F_MMAPABLE | extraFlags, }, nil) testutils.SkipIfNotSupported(tb, err) qt.Assert(tb, qt.IsNil(err)) return m } func TestMemory(t *testing.T) { mm, err := mustMmapableArray(t, 0).Memory() qt.Assert(t, qt.IsNil(err)) // Ensure the cleanup is set correctly and doesn't unmap the region while // we're using it. runtime.GC() // The mapping is always at least one page long, and the Map created here fits // in a single page. qt.Assert(t, qt.Equals(mm.Size(), uint32(os.Getpagesize()))) // No BPF_F_RDONLY_PROG flag, so the Memory should be read-write. qt.Assert(t, qt.IsFalse(mm.ReadOnly())) want := []byte{1, 2, 3, 4, 4, 3, 2, 1} w := io.NewOffsetWriter(mm, 16) n, err := w.Write(want) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(n, 8)) r := io.NewSectionReader(mm, 16, int64(len(want))) got := make([]byte, len(want)) n, err = r.Read(got) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(n, len(want))) } func TestMemoryBounds(t *testing.T) { mm, err := mustMmapableArray(t, 0).Memory() qt.Assert(t, qt.IsNil(err)) end := mm.Size() qt.Assert(t, qt.IsTrue(mm.bounds(0, 0))) qt.Assert(t, qt.IsTrue(mm.bounds(end, 0))) qt.Assert(t, qt.IsTrue(mm.bounds(end-8, 8))) qt.Assert(t, qt.IsTrue(mm.bounds(0, end))) qt.Assert(t, qt.IsFalse(mm.bounds(end-8, 9))) qt.Assert(t, qt.IsFalse(mm.bounds(end, 1))) qt.Assert(t, qt.IsFalse(mm.bounds(math.MaxUint32, 1))) } func TestMemoryReadOnly(t *testing.T) { rd, err := mustMmapableArray(t, sys.BPF_F_RDONLY_PROG).Memory() qt.Assert(t, qt.IsNil(err)) // BPF_F_RDONLY_PROG flag, so the Memory should be read-only. qt.Assert(t, qt.IsTrue(rd.ReadOnly())) // Frozen maps can't be mapped rw either. frozen := mustMmapableArray(t, 0) qt.Assert(t, qt.IsNil(frozen.Freeze())) fz, err := frozen.Memory() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(fz.ReadOnly())) } func TestMemoryClose(t *testing.T) { mm, err := mustMmapableArray(t, 0).Memory() qt.Assert(t, qt.IsNil(err)) // unmap panics if the operation fails. mm.close() } ================================================ FILE: memory_unsafe.go ================================================ package ebpf import ( "errors" "fmt" "os" "reflect" "runtime" "unsafe" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/unix" ) // This file contains an experimental, unsafe implementation of Memory that // allows taking a Go pointer to a memory-mapped region. This currently does not // have first-class support from the Go runtime, so it may break in future Go // versions. The Go proposal for the runtime to track off-heap pointers is here: // https://github.com/golang/go/issues/70224. // // In Go, the programmer should not have to worry about freeing memory. Since // this API synthesizes Go variables around global variables declared in a BPF // C program, we want to lean on the runtime for making sure accessing them is // safe at all times. Unfortunately, Go (as of 1.24) does not have the ability // of automatically managing memory that was not allocated by the runtime. // // This led to a solution that requests regular Go heap memory by allocating a // slice (making the runtime track pointers into the slice's backing array) and // memory-mapping the bpf map's memory over it. Then, before returning the // Memory to the caller, a finalizer is set on the backing array, making sure // the bpf map's memory is unmapped from the heap before releasing the backing // array to the runtime for reallocation. // // This obviates the need to maintain a reference to the *Memory at all times, // which is difficult for the caller to achieve if the variable access is done // through another object (like a sync.Atomic) that can potentially be passed // around the Go application. Accidentally losing the reference to the *Memory // would result in hard-to-debug segfaults, which are always unexpected in Go. //go:linkname heapObjectsCanMove runtime.heapObjectsCanMove func heapObjectsCanMove() bool // Set from a file behind the ebpf_unsafe_memory_experiment build tag to enable // features that require mapping bpf map memory over the Go heap. var unsafeMemory = false // ErrInvalidType is returned when the given type cannot be used as a Memory or // Variable pointer. var ErrInvalidType = errors.New("invalid type") func newUnsafeMemory(fd, size int) (*Memory, error) { // Some architectures need the size to be page-aligned to work with MAP_FIXED. if size%os.Getpagesize() != 0 { return nil, fmt.Errorf("memory: must be a multiple of page size (requested %d bytes)", size) } // Allocate a page-aligned span of memory on the Go heap. alloc, err := allocate(size) if err != nil { return nil, fmt.Errorf("allocating memory: %w", err) } // Typically, maps created with BPF_F_RDONLY_PROG remain writable from user // space until frozen. As a security precaution, the kernel doesn't allow // mapping bpf map memory as read-write into user space if the bpf map was // frozen, or if it was created using the RDONLY_PROG flag. // // The user would be able to write to the map after freezing (since the kernel // can't change the protection mode of an already-mapped page), while the // verifier assumes the contents to be immutable. // // Map the bpf map memory over a page-aligned allocation on the Go heap. err = mapmap(fd, alloc, size, unix.PROT_READ|unix.PROT_WRITE) // If the map is frozen when an rw mapping is requested, expect EPERM. If the // map was created with BPF_F_RDONLY_PROG, expect EACCES. var ro bool if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EACCES) { ro = true err = mapmap(fd, alloc, size, unix.PROT_READ) } if err != nil { return nil, fmt.Errorf("setting up memory-mapped region: %w", err) } mm := &Memory{ unsafe.Slice((*byte)(alloc), size), ro, true, runtime.Cleanup{}, } return mm, nil } // allocate returns a pointer to a page-aligned section of memory on the Go // heap, managed by the runtime. // //go:nocheckptr func allocate(size int) (unsafe.Pointer, error) { // Memory-mapping over a piece of the Go heap is unsafe when the GC can // randomly decide to move objects around, in which case the mapped region // will not move along with it. if heapObjectsCanMove() { return nil, errors.New("this Go runtime has a moving garbage collector") } if size == 0 { return nil, errors.New("size must be greater than 0") } // Request at least two pages of memory from the runtime to ensure we can // align the requested allocation to a page boundary. This is needed for // MAP_FIXED and makes sure we don't mmap over some other allocation on the Go // heap. size = internal.Align(size+os.Getpagesize(), os.Getpagesize()) // Allocate a new slice and store a pointer to its backing array. alloc := unsafe.Pointer(unsafe.SliceData(make([]byte, size))) // nolint:govet // // Align the pointer to a page boundary within the allocation. This may alias // the initial pointer if it was already page-aligned. Ignore govet warnings // since we're calling [runtime.KeepAlive] on the original Go memory. aligned := unsafe.Pointer(internal.Align(uintptr(alloc), uintptr(os.Getpagesize()))) runtime.KeepAlive(alloc) // Return an aligned pointer into the backing array, losing the original // reference. The runtime.SetFinalizer docs specify that its argument 'must be // a pointer to an object, complit or local var', but this is still somewhat // vague and not enforced by the current implementation. // // Currently, finalizers can be set and triggered from any address within a // heap allocation, even individual struct fields or arbitrary offsets within // a slice. In this case, finalizers set on struct fields or slice offsets // will only run when the whole struct or backing array are collected. The // accepted runtime.AddCleanup proposal makes this behaviour more explicit and // is set to deprecate runtime.SetFinalizer. // // Alternatively, we'd have to track the original allocation and the aligned // pointer separately, which severely complicates finalizer setup and makes it // prone to human error. For now, just bump the pointer and treat it as the // new and only reference to the backing array. return aligned, nil } // mapmap memory-maps the given file descriptor at the given address and sets a // finalizer on addr to unmap it when it's no longer reachable. func mapmap(fd int, addr unsafe.Pointer, size, flags int) error { // Map the bpf map memory over the Go heap. This will result in the following // mmap layout in the process' address space (0xc000000000 is a span of Go // heap), visualized using pmap: // // Address Kbytes RSS Dirty Mode Mapping // 000000c000000000 1824 864 864 rw--- [ anon ] // 000000c0001c8000 4 4 4 rw-s- [ anon ] // 000000c0001c9000 2268 16 16 rw--- [ anon ] // // This will break up the Go heap, but as long as the runtime doesn't try to // move our allocation around, this is safe for as long as we hold a reference // to our allocated object. // // Use MAP_SHARED to make sure the kernel sees any writes we do, and MAP_FIXED // to ensure the mapping starts exactly at the address we requested. If alloc // isn't page-aligned, the mapping operation will fail. if _, err := unix.MmapPtr(fd, 0, addr, uintptr(size), flags, unix.MAP_SHARED|unix.MAP_FIXED); err != nil { return fmt.Errorf("setting up memory-mapped region: %w", err) } // Set a finalizer on the heap allocation to undo the mapping before the span // is collected and reused by the runtime. This has a few reasons: // // - Avoid leaking memory/mappings. // - Future writes to this memory should never clobber a bpf map's contents. // - Some bpf maps are mapped read-only, causing a segfault if the runtime // reallocates and zeroes the span later. runtime.SetFinalizer((*byte)(addr), unmap(size)) return nil } // unmap returns a function that takes a pointer to a memory-mapped region on // the Go heap. The function undoes any mappings and discards the span's // contents. // // Used as a finalizer in [newMemory], split off into a separate function for // testing and to avoid accidentally closing over the unsafe.Pointer to the // memory region, which would cause a cyclical reference. // // The resulting function panics if the mmap operation returns an error, since // it would mean the integrity of the Go heap is compromised. func unmap(size int) func(*byte) { return func(a *byte) { // Create another mapping at the same address to undo the original mapping. // This will cause the kernel to repair the slab since we're using the same // protection mode and flags as the original mapping for the Go heap. // // Address Kbytes RSS Dirty Mode Mapping // 000000c000000000 4096 884 884 rw--- [ anon ] // // Using munmap here would leave an unmapped hole in the heap, compromising // its integrity. // // MmapPtr allocates another unsafe.Pointer at the same address. Even though // we discard it here, it may temporarily resurrect the backing array and // delay its collection to the next GC cycle. _, err := unix.MmapPtr(-1, 0, unsafe.Pointer(a), uintptr(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_FIXED|unix.MAP_ANON) if err != nil { panic(fmt.Errorf("undoing bpf map memory mapping: %w", err)) } } } // checkUnsafeMemory ensures value T can be accessed in mm at offset off. // // The comparable constraint narrows down the set of eligible types to exclude // slices, maps and functions. These complex types cannot be mapped to memory // directly. func checkUnsafeMemory[T comparable](mm *Memory, off uint32) error { if mm.b == nil { return fmt.Errorf("memory-mapped region is nil") } if mm.ro { return ErrReadOnly } if !mm.heap { return fmt.Errorf("memory region is not heap-mapped, build with '-tags ebpf_unsafe_memory_experiment' to enable: %w", ErrNotSupported) } t := reflect.TypeFor[T]() if err := checkType(t.String(), t); err != nil { return err } size := t.Size() if size == 0 { return fmt.Errorf("zero-sized type %s: %w", t, ErrInvalidType) } if off%uint32(t.Align()) != 0 { return fmt.Errorf("unaligned access of memory-mapped region: %d-byte aligned read at offset %d", t.Align(), off) } vs, bs := uint32(size), uint32(len(mm.b)) if off+vs > bs { return fmt.Errorf("%d-byte value at offset %d exceeds mmap size of %d bytes", vs, off, bs) } return nil } // checkType recursively checks if the given type is supported for memory // mapping. Only fixed-size, non-Go-pointer types are supported: bools, floats, // (u)int[8-64], arrays, and structs containing them. As an exception, uintptr // is allowed since the backing memory is expected to contain 32-bit pointers on // 32-bit systems despite BPF always allocating 64 bits for pointers in a data // section. // // Doesn't check for loops since it rejects pointers. Should that ever change, a // visited set would be needed. func checkType(name string, t reflect.Type) error { // Special-case atomic types to allow them to be used as root types as well as // struct fields. Notably, omit atomic.Value and atomic.Pointer since those // are pointer types. Also, atomic.Value embeds an interface value, which // doesn't make sense to share with C land. if t.PkgPath() == "sync/atomic" { switch t.Name() { case "Bool", "Int32", "Int64", "Uint32", "Uint64", "Uintptr": return nil } } switch t.Kind() { case reflect.Uintptr, reflect.Bool, reflect.Float32, reflect.Float64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: return nil case reflect.Array: at := t.Elem() if err := checkType(fmt.Sprintf("%s.%s", name, at.String()), at); err != nil { return err } case reflect.Struct: var hasHostLayout bool for i := range t.NumField() { at := t.Field(i).Type // Require [structs.HostLayout] to be embedded in all structs. Check the // full package path to reject a user-defined HostLayout type. if at.PkgPath() == "structs" && at.Name() == "HostLayout" { hasHostLayout = true continue } if err := checkType(fmt.Sprintf("%s.%s", name, at.String()), at); err != nil { return err } } if !hasHostLayout { return fmt.Errorf("struct %s must embed structs.HostLayout: %w", name, ErrInvalidType) } default: // For basic types like int and bool, the kind name is the same as the type // name, so the fallthrough case would print 'int type int not supported'. // Omit the kind name if it matches the type name. if t.String() == t.Kind().String() { // Output: type int not supported return fmt.Errorf("type %s not supported: %w", name, ErrInvalidType) } // Output: interface value io.Reader not supported return fmt.Errorf("%s type %s not supported: %w", t.Kind(), name, ErrInvalidType) } return nil } // memoryPointer returns a pointer to a value of type T at offset off in mm. // Taking a pointer to a read-only Memory or to a Memory that is not heap-mapped // is not supported. // // T must contain only fixed-size, non-Go-pointer types: bools, floats, // (u)int[8-64], arrays, and structs containing them. Structs must embed // [structs.HostLayout]. [ErrInvalidType] is returned if T is not a valid type. // // Memory must be writable, off must be aligned to the size of T, and the value // must be within bounds of the Memory. // // To access read-only memory, use [Memory.ReadAt]. func memoryPointer[T comparable](mm *Memory, off uint32) (*T, error) { if err := checkUnsafeMemory[T](mm, off); err != nil { return nil, fmt.Errorf("memory pointer: %w", err) } return (*T)(unsafe.Pointer(&mm.b[off])), nil } ================================================ FILE: memory_unsafe_tag.go ================================================ //go:build ebpf_unsafe_memory_experiment package ebpf func init() { unsafeMemory = true } ================================================ FILE: memory_unsafe_test.go ================================================ package ebpf import ( "runtime" "structs" "sync/atomic" "testing" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/sys" ) func TestUnsafeMemoryDisabled(t *testing.T) { mm, err := mustMmapableArray(t, 0).Memory() qt.Assert(t, qt.IsNil(err)) _, err = memoryPointer[uint32](mm, 0) qt.Assert(t, qt.ErrorIs(err, ErrNotSupported)) } func TestUnsafeMemoryUnmap(t *testing.T) { mm, err := mustMmapableArray(t, 0).unsafeMemory() qt.Assert(t, qt.IsNil(err)) // Avoid unmap running twice. runtime.SetFinalizer(unsafe.SliceData(mm.b), nil) // unmap panics if the operation fails. unmap(int(mm.Size()))(unsafe.SliceData(mm.b)) } func TestUnsafeMemoryPointer(t *testing.T) { m := mustMmapableArray(t, 0) mm, err := m.unsafeMemory() qt.Assert(t, qt.IsNil(err)) // Requesting an unaligned value should fail. _, err = memoryPointer[uint32](mm, 7) qt.Assert(t, qt.IsNotNil(err)) u64, err := memoryPointer[uint64](mm, 8) qt.Assert(t, qt.IsNil(err)) want := uint64(0xf00d) *u64 = want qt.Assert(t, qt.Equals(*u64, want)) // Read back the value using the bpf syscall interface. var got uint64 qt.Assert(t, qt.IsNil(m.Lookup(uint32(1), &got))) qt.Assert(t, qt.Equals(got, want)) _, err = memoryPointer[*uint32](mm, 0) qt.Assert(t, qt.ErrorIs(err, ErrInvalidType)) } func TestUnsafeMemoryReadOnly(t *testing.T) { rd, err := mustMmapableArray(t, sys.BPF_F_RDONLY_PROG).unsafeMemory() qt.Assert(t, qt.IsNil(err)) // BPF_F_RDONLY_PROG flag, so the Memory should be read-only. qt.Assert(t, qt.IsTrue(rd.ReadOnly())) // Frozen maps can't be mapped rw either. frozen := mustMmapableArray(t, 0) qt.Assert(t, qt.IsNil(frozen.Freeze())) fz, err := frozen.Memory() qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(fz.ReadOnly())) _, err = fz.WriteAt([]byte{1}, 0) qt.Assert(t, qt.ErrorIs(err, ErrReadOnly)) _, err = memoryPointer[uint32](fz, 0) qt.Assert(t, qt.ErrorIs(err, ErrReadOnly)) } func TestCheckUnsafeMemory(t *testing.T) { mm, err := mustMmapableArray(t, 0).unsafeMemory() qt.Assert(t, qt.IsNil(err)) // Primitive types qt.Assert(t, qt.IsNil(checkUnsafeMemory[bool](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[uint32](mm, 0))) // Arrays qt.Assert(t, qt.IsNil(checkUnsafeMemory[[4]byte](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[[2]struct { _ structs.HostLayout A uint32 B uint64 }](mm, 0))) // Structs qt.Assert(t, qt.IsNil(checkUnsafeMemory[struct { _ structs.HostLayout _ uint32 }](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[struct { _ structs.HostLayout _ [4]byte }](mm, 0))) // Atomics qt.Assert(t, qt.IsNil(checkUnsafeMemory[atomic.Uint32](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[struct { _ structs.HostLayout _ atomic.Uint32 }](mm, 0))) // Special cases qt.Assert(t, qt.IsNil(checkUnsafeMemory[uintptr](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[atomic.Uintptr](mm, 0))) qt.Assert(t, qt.IsNil(checkUnsafeMemory[struct { _ structs.HostLayout _ uintptr }](mm, 0))) // No pointers qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[*uint32](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[**uint32](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[[1]*uint8](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[struct { _ structs.HostLayout _ *uint8 }](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[atomic.Pointer[uint64]](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[atomic.Value](mm, 0), ErrInvalidType)) // No variable-sized types qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[int](mm, 0), ErrInvalidType)) qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[uint](mm, 0), ErrInvalidType)) // No interface types qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[any](mm, 0), ErrInvalidType)) // No zero-sized types qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[struct{ _ structs.HostLayout }](mm, 0), ErrInvalidType)) // No structs without HostLayout qt.Assert(t, qt.ErrorIs(checkUnsafeMemory[struct{ _ uint32 }](mm, 0), ErrInvalidType)) } ================================================ FILE: netlify.toml ================================================ [build] base = "docs/" publish = "site/" command = "mkdocs build" environment = { PYTHON_VERSION = "3.13" } ================================================ FILE: perf/doc.go ================================================ // Package perf allows reading from BPF perf event arrays. // // A perf event array contains multiple perf event ringbuffers which can be used // to exchange sample like data with user space. package perf ================================================ FILE: perf/reader.go ================================================ //go:build !windows package perf import ( "encoding/binary" "errors" "fmt" "io" "os" "sync" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/epoll" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) var ( ErrClosed = os.ErrClosed ErrFlushed = epoll.ErrFlushed errEOR = errors.New("end of ring") ) var perfEventHeaderSize = binary.Size(perfEventHeader{}) // perfEventHeader must match 'struct perf_event_header` in . type perfEventHeader struct { Type uint32 Misc uint16 Size uint16 } // Record contains either a sample or a counter of the // number of lost samples. type Record struct { // The CPU this record was generated on. CPU int // The data submitted via bpf_perf_event_output. // Due to a kernel bug, this can contain between 0 and 7 bytes of trailing // garbage from the ring depending on the input sample's length. RawSample []byte // The number of samples which could not be output, since // the ring buffer was full. LostSamples uint64 // The minimum number of bytes remaining in the per-CPU buffer after this Record has been read. // Negative for overwritable buffers. Remaining int } // Read a record from a reader and tag it as being from the given CPU. // // buf must be at least perfEventHeaderSize bytes long. func readRecord(rd io.Reader, rec *Record, buf []byte, overwritable bool) error { // Assert that the buffer is large enough. buf = buf[:perfEventHeaderSize] _, err := io.ReadFull(rd, buf) if errors.Is(err, io.EOF) { return errEOR } else if err != nil { return fmt.Errorf("read perf event header: %v", err) } header := perfEventHeader{ internal.NativeEndian.Uint32(buf[0:4]), internal.NativeEndian.Uint16(buf[4:6]), internal.NativeEndian.Uint16(buf[6:8]), } switch header.Type { case unix.PERF_RECORD_LOST: rec.RawSample = rec.RawSample[:0] rec.LostSamples, err = readLostRecords(rd) return err case unix.PERF_RECORD_SAMPLE: rec.LostSamples = 0 // We can reuse buf here because perfEventHeaderSize > perfEventSampleSize. rec.RawSample, err = readRawSample(rd, buf, rec.RawSample) return err default: return &unknownEventError{header.Type} } } func readLostRecords(rd io.Reader) (uint64, error) { // lostHeader must match 'struct perf_event_lost in kernel sources. var lostHeader struct { ID uint64 Lost uint64 } err := binary.Read(rd, internal.NativeEndian, &lostHeader) if err != nil { return 0, fmt.Errorf("can't read lost records header: %v", err) } return lostHeader.Lost, nil } var perfEventSampleSize = binary.Size(uint32(0)) // This must match 'struct perf_event_sample in kernel sources. type perfEventSample struct { Size uint32 } func readRawSample(rd io.Reader, buf, sampleBuf []byte) ([]byte, error) { buf = buf[:perfEventSampleSize] if _, err := io.ReadFull(rd, buf); err != nil { return nil, fmt.Errorf("read sample size: %w", err) } sample := perfEventSample{ internal.NativeEndian.Uint32(buf), } var data []byte if size := int(sample.Size); cap(sampleBuf) < size { data = make([]byte, size) } else { data = sampleBuf[:size] } if _, err := io.ReadFull(rd, data); err != nil { return nil, fmt.Errorf("read sample: %w", err) } return data, nil } // Reader allows reading bpf_perf_event_output // from user space. type Reader struct { poller *epoll.Poller // mu protects read/write access to the Reader structure with the // exception fields protected by 'pauseMu'. // If locking both 'mu' and 'pauseMu', 'mu' must be locked first. mu sync.Mutex array *ebpf.Map rings []*perfEventRing epollEvents []unix.EpollEvent epollRings []*perfEventRing eventHeader []byte deadline time.Time overwritable bool bufferSize int pendingErr error // pauseMu protects eventFds so that Pause / Resume can be invoked while // Read is blocked. pauseMu sync.Mutex eventFds []*sys.FD paused bool } // ReaderOptions control the behaviour of the user // space reader. type ReaderOptions struct { // The number of events required in any per CPU buffer before // Read will process data. This is mutually exclusive with Watermark. // The default is zero, which means Watermark will take precedence. WakeupEvents int // The number of written bytes required in any per CPU buffer before // Read will process data. Must be smaller than PerCPUBuffer. // The default is to start processing as soon as data is available. Watermark int // This perf ring buffer is overwritable, once full the oldest event will be // overwritten by newest. Overwritable bool } // NewReader creates a new reader with default options. // // array must be a PerfEventArray. perCPUBuffer gives the size of the // per CPU buffer in bytes. It is rounded up to the nearest multiple // of the current page size. func NewReader(array *ebpf.Map, perCPUBuffer int) (*Reader, error) { return NewReaderWithOptions(array, perCPUBuffer, ReaderOptions{}) } // NewReaderWithOptions creates a new reader with the given options. func NewReaderWithOptions(array *ebpf.Map, perCPUBuffer int, opts ReaderOptions) (pr *Reader, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() } } if perCPUBuffer < 1 { return nil, errors.New("perCPUBuffer must be larger than 0") } if opts.WakeupEvents > 0 && opts.Watermark > 0 { return nil, errors.New("WakeupEvents and Watermark cannot both be non-zero") } var ( nCPU = int(array.MaxEntries()) rings = make([]*perfEventRing, 0, nCPU) eventFds = make([]*sys.FD, 0, nCPU) ) poller, err := epoll.New() if err != nil { return nil, err } defer closeOnError(poller) // bpf_perf_event_output checks which CPU an event is enabled on, // but doesn't allow using a wildcard like -1 to specify "all CPUs". // Hence we have to create a ring for each CPU. bufferSize := 0 for i := 0; i < nCPU; i++ { event, ring, err := newPerfEventRing(i, perCPUBuffer, opts) if errors.Is(err, unix.ENODEV) { // The requested CPU is currently offline, skip it. continue } if err != nil { return nil, fmt.Errorf("failed to create perf ring for CPU %d: %v", i, err) } defer closeOnError(event) defer closeOnError(ring) bufferSize = ring.size() rings = append(rings, ring) eventFds = append(eventFds, event) if err := poller.Add(event.Int(), 0); err != nil { return nil, err } } // Closing a PERF_EVENT_ARRAY removes all event fds // stored in it, so we keep a reference alive. array, err = array.Clone() if err != nil { return nil, err } pr = &Reader{ array: array, rings: rings, poller: poller, deadline: time.Time{}, epollEvents: make([]unix.EpollEvent, len(rings)), epollRings: make([]*perfEventRing, 0, len(rings)), eventHeader: make([]byte, perfEventHeaderSize), eventFds: eventFds, overwritable: opts.Overwritable, bufferSize: bufferSize, } if err = pr.Resume(); err != nil { return nil, err } return pr, nil } // Close frees resources used by the reader. // // It interrupts calls to Read. // // Calls to perf_event_output from eBPF programs will return // ENOENT after calling this method. func (pr *Reader) Close() error { if err := pr.poller.Close(); err != nil { if errors.Is(err, os.ErrClosed) { return nil } return fmt.Errorf("close poller: %w", err) } // Trying to poll will now fail, so Read() can't block anymore. Acquire the // locks so that we can clean up. pr.mu.Lock() defer pr.mu.Unlock() pr.pauseMu.Lock() defer pr.pauseMu.Unlock() for _, ring := range pr.rings { ring.Close() } for _, event := range pr.eventFds { event.Close() } pr.rings = nil pr.eventFds = nil pr.array.Close() return nil } // SetDeadline controls how long Read and ReadInto will block waiting for samples. // // Passing a zero time.Time will remove the deadline. Passing a deadline in the // past will prevent the reader from blocking if there are no records to be read. func (pr *Reader) SetDeadline(t time.Time) { pr.mu.Lock() defer pr.mu.Unlock() pr.deadline = t } // Read the next record from the perf ring buffer. // // The method blocks until there are at least Watermark bytes in one // of the per CPU buffers. Records from buffers below the Watermark // are not returned. // // Records can contain between 0 and 7 bytes of trailing garbage from the ring // depending on the input sample's length. // // Calling [Close] interrupts the method with [os.ErrClosed]. Calling [Flush] // makes it return all records currently in the ring buffer, followed by [ErrFlushed]. // // Returns [os.ErrDeadlineExceeded] if a deadline was set and after all records // have been read from the ring. // // See [Reader.ReadInto] for a more efficient version of this method. func (pr *Reader) Read() (Record, error) { var r Record return r, pr.ReadInto(&r) } var errMustBePaused = fmt.Errorf("perf ringbuffer: must have been paused before reading overwritable buffer") // ReadInto is like [Reader.Read] except that it allows reusing Record and associated buffers. func (pr *Reader) ReadInto(rec *Record) error { pr.mu.Lock() defer pr.mu.Unlock() pr.pauseMu.Lock() defer pr.pauseMu.Unlock() if pr.overwritable && !pr.paused { return errMustBePaused } if pr.rings == nil { return fmt.Errorf("perf ringbuffer: %w", ErrClosed) } for { if len(pr.epollRings) == 0 { if pe := pr.pendingErr; pe != nil { // All rings have been emptied since the error occurred, return // appropriate error. pr.pendingErr = nil return pe } // NB: The deferred pauseMu.Unlock will panic if Wait panics, which // might obscure the original panic. pr.pauseMu.Unlock() _, err := pr.poller.Wait(pr.epollEvents, pr.deadline) pr.pauseMu.Lock() if errors.Is(err, os.ErrDeadlineExceeded) || errors.Is(err, ErrFlushed) { // We've hit the deadline, check whether there is any data in // the rings that we've not been woken up for. pr.pendingErr = err } else if err != nil { return err } // Re-validate pr.paused since we dropped pauseMu. if pr.overwritable && !pr.paused { return errMustBePaused } // Waking up userspace is expensive, make the most of it by checking // all rings. for _, ring := range pr.rings { ring.loadHead() pr.epollRings = append(pr.epollRings, ring) } } // Start at the last available event. The order in which we // process them doesn't matter, and starting at the back allows // resizing epollRings to keep track of processed rings. err := pr.readRecordFromRing(rec, pr.epollRings[len(pr.epollRings)-1]) if err == errEOR { // We've emptied the current ring buffer, process // the next one. pr.epollRings = pr.epollRings[:len(pr.epollRings)-1] continue } return err } } // Pause stops all notifications from this Reader. // // While the Reader is paused, any attempts to write to the event buffer from // BPF programs will return -ENOENT. // // Subsequent calls to Read will block until a call to Resume. func (pr *Reader) Pause() error { pr.pauseMu.Lock() defer pr.pauseMu.Unlock() if pr.eventFds == nil { return fmt.Errorf("%w", ErrClosed) } for i := range pr.eventFds { if err := pr.array.Delete(uint32(i)); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) { return fmt.Errorf("could't delete event fd for CPU %d: %w", i, err) } } pr.paused = true return nil } // Resume allows this perf reader to emit notifications. // // Subsequent calls to Read will block until the next event notification. func (pr *Reader) Resume() error { pr.pauseMu.Lock() defer pr.pauseMu.Unlock() if pr.eventFds == nil { return fmt.Errorf("%w", ErrClosed) } for i, fd := range pr.eventFds { if fd == nil { continue } if err := pr.array.Put(uint32(i), fd.Uint()); err != nil { return fmt.Errorf("couldn't put event fd %d for CPU %d: %w", fd, i, err) } } pr.paused = false return nil } // BufferSize is the size in bytes of each per-CPU buffer func (pr *Reader) BufferSize() int { return pr.bufferSize } // Flush unblocks Read/ReadInto and successive Read/ReadInto calls will return pending samples at this point, // until you receive a [ErrFlushed] error. func (pr *Reader) Flush() error { return pr.poller.Flush() } // NB: Has to be preceded by a call to ring.loadHead. func (pr *Reader) readRecordFromRing(rec *Record, ring *perfEventRing) error { defer ring.writeTail() rec.CPU = ring.cpu err := readRecord(ring, rec, pr.eventHeader, pr.overwritable) if pr.overwritable && (errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF)) { return errEOR } rec.Remaining = ring.remaining() return err } type unknownEventError struct { eventType uint32 } func (uev *unknownEventError) Error() string { return fmt.Sprintf("unknown event type: %d", uev.eventType) } // IsUnknownEvent returns true if the error occurred // because an unknown event was submitted to the perf event ring. func IsUnknownEvent(err error) bool { var uee *unknownEventError return errors.As(err, &uee) } ================================================ FILE: perf/reader_test.go ================================================ //go:build !windows package perf import ( "bytes" "encoding/binary" "errors" "fmt" "math" "os" "syscall" "testing" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" "github.com/go-quicktest/qt" ) var ( readTimeout = 250 * time.Millisecond ) func TestMain(m *testing.M) { testmain.Run(m) } func TestPerfReader(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() qt.Assert(t, qt.Equals(rd.BufferSize(), 4096)) outputSamples(t, events, 5, 5) _, rem := checkRecord(t, rd) qt.Assert(t, qt.IsTrue(rem >= 5), qt.Commentf("expected at least 5 Remaining")) _, rem = checkRecord(t, rd) qt.Assert(t, qt.Equals(rem, 0), qt.Commentf("expected zero Remaining")) rd.SetDeadline(time.Now().Add(4 * time.Millisecond)) _, err = rd.Read() qt.Assert(t, qt.ErrorIs(err, os.ErrDeadlineExceeded), qt.Commentf("expected os.ErrDeadlineExceeded")) } func TestReaderSetDeadline(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() rd.SetDeadline(time.Now().Add(-time.Second)) if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from first Read, got:", err) } if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from second Read, got:", err) } rd.SetDeadline(time.Now().Add(10 * time.Millisecond)) if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from third Read, got:", err) } } func TestReaderSetDeadlinePendingEvents(t *testing.T) { events := perfEventArray(t) rd, err := NewReaderWithOptions(events, 4096, ReaderOptions{WakeupEvents: 2}) if err != nil { t.Fatal(err) } defer rd.Close() outputSamples(t, events, 5) rd.SetDeadline(time.Now().Add(-time.Second)) _, rem := checkRecord(t, rd) qt.Assert(t, qt.Equals(rem, 0), qt.Commentf("expected zero Remaining")) outputSamples(t, events, 5) // another sample should not be returned before we get ErrFlushed to indicate initial set of samples read _, err = rd.Read() if !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from second Read, got:", err) } // the second sample should now be read _, _ = checkRecord(t, rd) } func TestReaderFlushPendingEvents(t *testing.T) { testutils.LockOSThreadToSingleCPU(t) events := perfEventArray(t) rd, err := NewReaderWithOptions(events, 4096, ReaderOptions{WakeupEvents: 2}) if err != nil { t.Fatal(err) } defer rd.Close() outputSamples(t, events, 5) wait := make(chan int) go func() { wait <- 0 _, rem := checkRecord(t, rd) wait <- rem }() <-wait time.Sleep(10 * time.Millisecond) err = rd.Flush() qt.Assert(t, qt.IsNil(err)) rem := <-wait qt.Assert(t, qt.Equals(rem, 0), qt.Commentf("expected zero Remaining")) outputSamples(t, events, 5) // another sample should not be returned before we get ErrFlushed to indicate initial set of samples read _, err = rd.Read() if !errors.Is(err, ErrFlushed) { t.Error("Expected ErrFlushed from second Read, got:", err) } // the second sample should now be read _, _ = checkRecord(t, rd) } func outputSamples(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) { prog := outputSamplesProg(tb, events, sampleSizes...) ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(tb, err) if err != nil { tb.Fatal(err) } if errno := syscall.Errno(-int32(ret)); errno != 0 { tb.Fatal("Expected 0 as return value, got", errno) } } // outputSamplesProg creates a program which submits a series of samples to a PerfEventArray. // // The format of each sample is: // // index: 0 1 2 3 ... size - 1 // content: size id 0xff 0xff ... 0xff [padding] // // padding is an implementation detail of the perf buffer and 1-7 bytes long. The // contents are undefined. func outputSamplesProg(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) *ebpf.Program { tb.Helper() // Requires at least 4.9 (0515e5999a46 "bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") testutils.SkipOnOldKernel(tb, "4.9", "perf events support") const bpfFCurrentCPU = 0xffffffff var maxSampleSize byte for _, sampleSize := range sampleSizes { if sampleSize < 2 { tb.Fatalf("Sample size %d is too small to contain size and counter", sampleSize) } if sampleSize > maxSampleSize { maxSampleSize = sampleSize } } // Fill a buffer on the stack, and stash context somewhere insns := asm.Instructions{ asm.LoadImm(asm.R0, ^int64(0), asm.DWord), asm.Mov.Reg(asm.R9, asm.R1), } bufDwords := int(maxSampleSize/8) + 1 for i := 0; i < bufDwords; i++ { insns = append(insns, asm.StoreMem(asm.RFP, int16(i+1)*-8, asm.R0, asm.DWord), ) } for i, sampleSize := range sampleSizes { insns = append(insns, // Restore stashed context. asm.Mov.Reg(asm.R1, asm.R9), // map asm.LoadMapPtr(asm.R2, events.FD()), // flags asm.LoadImm(asm.R3, bpfFCurrentCPU, asm.DWord), // buffer asm.Mov.Reg(asm.R4, asm.RFP), asm.Add.Imm(asm.R4, int32(bufDwords*-8)), // buffer[0] = size asm.StoreImm(asm.R4, 0, int64(sampleSize), asm.Byte), // buffer[1] = i asm.StoreImm(asm.R4, 1, int64(i&math.MaxUint8), asm.Byte), // size asm.Mov.Imm(asm.R5, int32(sampleSize)), asm.FnPerfEventOutput.Call(), ) } insns = append(insns, asm.Return()) prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ License: "GPL", Type: ebpf.XDP, Instructions: insns, }) if err != nil { tb.Fatal(err) } tb.Cleanup(func() { prog.Close() }) return prog } func checkRecord(tb testing.TB, rd *Reader) (id int, remaining int) { tb.Helper() rec, err := rd.Read() qt.Assert(tb, qt.IsNil(err)) qt.Assert(tb, qt.IsTrue(rec.CPU >= 0), qt.Commentf("Record has invalid CPU number")) size := int(rec.RawSample[0]) qt.Assert(tb, qt.IsTrue(len(rec.RawSample) >= size), qt.Commentf("RawSample is at least size bytes")) for i, v := range rec.RawSample[2:size] { qt.Assert(tb, qt.Equals(v, 0xff), qt.Commentf("filler at position %d should match", i+2)) } // padding is ignored since it's value is undefined. return int(rec.RawSample[1]), rec.Remaining } func TestPerfReaderLostSample(t *testing.T) { // To generate a lost sample perf record: // // 1. Fill the perf ring buffer almost completely, with the output_large program. // The buffer is sized in number of pages, which are architecture dependant. // // 2. Write an extra event that doesn't fit in the space remaining. // // 3. Write a smaller event that does fit, with output_single program. // Lost sample records are generated opportunistically, when the kernel // is writing an event and realizes that there were events lost previously. // // The event size is hardcoded in the test BPF programs, there's no way // to parametrize it without rebuilding the programs. // // The event size needs to be selected so that, for any page size, there are at least // 48 bytes left in the perf ring page after filling it with a whole number of events: // // - PERF_RECORD_LOST: 8 (perf_event_header) + 16 (PERF_RECORD_LOST) // // - output_single: 8 (perf_event_header) + 4 (size) + 5 (payload) + 7 (padding to 64bits) // // By selecting an event size of the form 2^n + 2^(n+1), for any page size 2^(n+m), m >= 0, // the number of bytes left, x, after filling a page with a whole number of events is: // // 2^(n+m) 2^n * 2^m // x = 2^n * frac(---------------) <=> x = 2^n * frac(---------------) // 2^n + 2^(n+1) 2^n + 2^n * 2 // // 2^n * 2^m // <=> x = 2^n * frac(---------------) // 2^n * (1 + 2) // // 2^m // <=> x = 2^n * frac(-----) // 3 // // 1 2 // <=> x = 2^n * - or x = 2^n * - // 3 3 // // Selecting n = 6, we have: // // x = 64 or x = 128, no matter the page size 2^(6+m) // // event size = 2^6 + 2^7 = 192 // // Accounting for perf headers, output_large uses a 180 byte payload: // // 8 (perf_event_header) + 4 (size) + 180 (payload) const ( eventSize = 192 ) var ( pageSize = os.Getpagesize() maxEvents = (pageSize / eventSize) ) if remainder := pageSize % eventSize; remainder != 64 && remainder != 128 { // Page size isn't 2^(6+m), m >= 0 t.Fatal("unsupported page size:", pageSize) } var sampleSizes []byte // Fill the ring with the maximum number of output_large events that will fit, // and generate a lost event by writing an additional event. for i := 0; i < maxEvents+1; i++ { sampleSizes = append(sampleSizes, 180) } // Generate a small event to trigger the lost record sampleSizes = append(sampleSizes, 5) events := perfEventArray(t) rd, err := NewReader(events, pageSize) if err != nil { t.Fatal(err) } defer rd.Close() outputSamples(t, events, sampleSizes...) for range sampleSizes { record, err := rd.Read() if err != nil { t.Fatal(err) } if record.RawSample == nil && record.LostSamples != 1 { t.Fatal("Expected a record with LostSamples 1, got", record.LostSamples) } } } func TestPerfReaderOverwritable(t *testing.T) { // Smallest buffer size. pageSize := os.Getpagesize() const sampleSize = math.MaxUint8 // Account for perf header (8) and size (4), align to 8 bytes as perf does. realSampleSize := internal.Align(sampleSize+8+4, 8) maxEvents := pageSize / realSampleSize var sampleSizes []byte for i := 0; i < maxEvents; i++ { sampleSizes = append(sampleSizes, sampleSize) } // Append an extra sample that will overwrite the first sample. sampleSizes = append(sampleSizes, sampleSize) events := perfEventArray(t) rd, err := NewReaderWithOptions(events, pageSize, ReaderOptions{Overwritable: true}) if err != nil { t.Fatal(err) } defer rd.Close() _, err = rd.Read() qt.Assert(t, qt.ErrorIs(err, errMustBePaused)) outputSamples(t, events, sampleSizes...) qt.Assert(t, qt.IsNil(rd.Pause())) rd.SetDeadline(time.Now()) nextID := maxEvents for i := 0; i < maxEvents; i++ { id, rem := checkRecord(t, rd) qt.Assert(t, qt.Equals(id, nextID)) qt.Assert(t, qt.Equals(rem, -1)) nextID-- } } func TestPerfReaderOverwritableEmpty(t *testing.T) { events := perfEventArray(t) rd, err := NewReaderWithOptions(events, os.Getpagesize(), ReaderOptions{Overwritable: true}) if err != nil { t.Fatal(err) } defer rd.Close() err = rd.Pause() if err != nil { t.Fatal(err) } rd.SetDeadline(time.Now().Add(4 * time.Millisecond)) _, err = rd.Read() qt.Assert(t, qt.ErrorIs(err, os.ErrDeadlineExceeded), qt.Commentf("expected os.ErrDeadlineExceeded")) err = rd.Resume() if err != nil { t.Fatal(err) } } func TestPerfReaderClose(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() errs := make(chan error, 1) waiting := make(chan struct{}) go func() { close(waiting) _, err := rd.Read() errs <- err }() <-waiting // Close should interrupt Read if err := rd.Close(); err != nil { t.Fatal(err) } select { case <-errs: case <-time.After(time.Second): t.Fatal("Close doesn't interrupt Read") } // And we should be able to call it multiple times if err := rd.Close(); err != nil { t.Fatal(err) } if _, err := rd.Read(); err == nil { t.Fatal("Read on a closed PerfReader doesn't return an error") } } func TestCreatePerfEvent(t *testing.T) { fd, err := createPerfEvent(0, ReaderOptions{Watermark: 1, Overwritable: false}) if err != nil { t.Fatal("Can't create perf event:", err) } fd.Close() } func TestReadRecord(t *testing.T) { var buf bytes.Buffer err := binary.Write(&buf, internal.NativeEndian, &perfEventHeader{}) if err != nil { t.Fatal(err) } var rec Record err = readRecord(&buf, &rec, make([]byte, perfEventHeaderSize), false) if !IsUnknownEvent(err) { t.Error("readRecord should return unknown event error, got", err) } } func TestPause(t *testing.T) { t.Parallel() events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() // Reader is already unpaused by default. It should be idempotent. if err = rd.Resume(); err != nil { t.Fatal(err) } // Write a sample. The reader should read it. prog := outputSamplesProg(t, events, 5) ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) if err != nil || ret != 0 { t.Fatal("Can't write sample") } if _, err := rd.Read(); err != nil { t.Fatal(err) } // Pause. No notification should trigger. if err = rd.Pause(); err != nil { t.Fatal(err) } errChan := make(chan error, 1) go func() { // Read one notification then send any errors and exit. _, err := rd.Read() errChan <- err }() ret, _, err = prog.Test(internal.EmptyBPFContext) if err == nil && ret == 0 { t.Fatal("Unexpectedly wrote sample while paused") } // else Success select { case err := <-errChan: // Failure: Pause was unsuccessful. t.Fatalf("received notification on paused reader: %s", err) case <-time.After(readTimeout): // Success } // Pause should be idempotent. if err = rd.Pause(); err != nil { t.Fatal(err) } // Resume. Now notifications should continue. if err = rd.Resume(); err != nil { t.Fatal(err) } ret, _, err = prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { t.Fatal("Can't write sample") } select { case err := <-errChan: if err != nil { t.Fatal(err) } // else Success case <-time.After(readTimeout): t.Fatal("timed out waiting for notification after resume") } if err = rd.Close(); err != nil { t.Fatal(err) } // Pause/Resume after close should be no-op. err = rd.Pause() qt.Assert(t, qt.Not(qt.Equals(err, ErrClosed)), qt.Commentf("returns unwrapped ErrClosed")) qt.Assert(t, qt.ErrorIs(err, ErrClosed), qt.Commentf("doesn't wrap ErrClosed")) err = rd.Resume() qt.Assert(t, qt.Not(qt.Equals(err, ErrClosed)), qt.Commentf("returns unwrapped ErrClosed")) qt.Assert(t, qt.ErrorIs(err, ErrClosed), qt.Commentf("doesn't wrap ErrClosed")) } func TestPerfReaderWakeupEvents(t *testing.T) { testutils.LockOSThreadToSingleCPU(t) events := perfEventArray(t) numEvents := 2 rd, err := NewReaderWithOptions(events, 4096, ReaderOptions{WakeupEvents: numEvents}) if err != nil { t.Fatal(err) } defer rd.Close() prog := outputSamplesProg(t, events, 5) // Send enough events to trigger WakeupEvents. for i := 0; i < numEvents; i++ { _, _, err = prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) } time.AfterFunc(5*time.Second, func() { // Interrupt Read() in case the implementation is buggy. rd.Close() }) for i := 0; i < numEvents; i++ { checkRecord(t, rd) } } func TestReadWithoutWakeup(t *testing.T) { t.Parallel() events := perfEventArray(t) rd, err := NewReaderWithOptions(events, 1, ReaderOptions{WakeupEvents: 2}) if err != nil { t.Fatal(err) } defer rd.Close() prog := outputSamplesProg(t, events, 5) ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(ret, 0)) rd.SetDeadline(time.Now()) checkRecord(t, rd) } func BenchmarkReader(b *testing.B) { events := perfEventArray(b) prog := outputSamplesProg(b, events, 80) rd, err := NewReader(events, 4096) if err != nil { b.Fatal(err) } defer rd.Close() buf := internal.EmptyBPFContext b.ReportAllocs() for b.Loop() { ret, _, err := prog.Test(buf) if err != nil { b.Fatal(err) } else if errno := syscall.Errno(-int32(ret)); errno != 0 { b.Fatal("Expected 0 as return value, got", errno) } if _, err = rd.Read(); err != nil { b.Fatal(err) } } } func BenchmarkReadInto(b *testing.B) { events := perfEventArray(b) prog := outputSamplesProg(b, events, 80) rd, err := NewReader(events, 4096) if err != nil { b.Fatal(err) } defer rd.Close() buf := internal.EmptyBPFContext b.ReportAllocs() var rec Record for b.Loop() { // NB: Submitting samples into the perf event ring dominates // the benchmark time unfortunately. ret, _, err := prog.Test(buf) if err != nil { b.Fatal(err) } else if errno := syscall.Errno(-int32(ret)); errno != 0 { b.Fatal("Expected 0 as return value, got", errno) } if err := rd.ReadInto(&rec); err != nil { b.Fatal(err) } } } // This exists just to make the example below nicer. func bpfPerfEventOutputProgram() (*ebpf.Program, *ebpf.Map) { return nil, nil } // ExampleReader submits a perf event using BPF, // and then reads it in user space. // // The BPF will look something like this: // // struct map events __section("maps") = { // .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, // }; // // __section("xdp") int output_single(void *ctx) { // unsigned char buf[] = { // 1, 2, 3, 4, 5 // }; // // return perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &buf[0], 5); // } // // Also see BPF_F_CTXLEN_MASK if you want to sample packet data // from SKB or XDP programs. func ExampleReader() { prog, events := bpfPerfEventOutputProgram() defer prog.Close() defer events.Close() rd, err := NewReader(events, 4096) if err != nil { panic(err) } defer rd.Close() // Writes out a sample with content 1,2,3,4,4 ret, _, err := prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { panic("Can't write sample") } record, err := rd.Read() if err != nil { panic(err) } // Data is padded with 0 for alignment fmt.Println("Sample:", record.RawSample) } // ReadRecord allows reducing memory allocations. func ExampleReader_ReadInto() { prog, events := bpfPerfEventOutputProgram() defer prog.Close() defer events.Close() rd, err := NewReader(events, 4096) if err != nil { panic(err) } defer rd.Close() for i := 0; i < 2; i++ { // Write out two samples ret, _, err := prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { panic("Can't write sample") } } var rec Record for i := 0; i < 2; i++ { if err := rd.ReadInto(&rec); err != nil { panic(err) } fmt.Println("Sample:", rec.RawSample[:5]) } } func perfEventArray(tb testing.TB) *ebpf.Map { events, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.PerfEventArray, }) if err != nil { tb.Fatal(err) } tb.Cleanup(func() { events.Close() }) return events } ================================================ FILE: perf/ring.go ================================================ //go:build !windows package perf import ( "errors" "fmt" "io" "math" "os" "runtime" "sync/atomic" "unsafe" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) // perfEventRing is a page of metadata followed by // a variable number of pages which form a ring buffer. type perfEventRing struct { cpu int mmap []byte ringReader cleanup runtime.Cleanup } func newPerfEventRing(cpu, perCPUBuffer int, opts ReaderOptions) (_ *sys.FD, _ *perfEventRing, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() } } if opts.Watermark >= perCPUBuffer { return nil, nil, errors.New("watermark must be smaller than perCPUBuffer") } fd, err := createPerfEvent(cpu, opts) if err != nil { return nil, nil, err } defer closeOnError(fd) if err := unix.SetNonblock(fd.Int(), true); err != nil { return nil, nil, err } protections := unix.PROT_READ if !opts.Overwritable { protections |= unix.PROT_WRITE } mmap, err := unix.Mmap(fd.Int(), 0, perfBufferSize(perCPUBuffer), protections, unix.MAP_SHARED) if err != nil { return nil, nil, fmt.Errorf("can't mmap: %v", err) } // This relies on the fact that we allocate an extra metadata page, // and that the struct is smaller than an OS page. // This use of unsafe.Pointer isn't explicitly sanctioned by the // documentation, since a byte is smaller than sampledPerfEvent. meta := (*unix.PerfEventMmapPage)(unsafe.Pointer(&mmap[0])) var reader ringReader if opts.Overwritable { reader = newReverseReader(meta, mmap[meta.Data_offset:meta.Data_offset+meta.Data_size]) } else { reader = newForwardReader(meta, mmap[meta.Data_offset:meta.Data_offset+meta.Data_size]) } ring := &perfEventRing{ cpu: cpu, mmap: mmap, ringReader: reader, } ring.cleanup = runtime.AddCleanup(ring, func(mmap []byte) { _ = unix.Munmap(mmap) }, ring.mmap) return fd, ring, nil } // perfBufferSize returns a valid mmap buffer size for use with perf_event_open (1+2^n pages) func perfBufferSize(perCPUBuffer int) int { pageSize := os.Getpagesize() // Smallest whole number of pages nPages := (perCPUBuffer + pageSize - 1) / pageSize // Round up to nearest power of two number of pages nPages = int(math.Pow(2, math.Ceil(math.Log2(float64(nPages))))) // Add one for metadata nPages += 1 return nPages * pageSize } func (ring *perfEventRing) Close() error { ring.cleanup.Stop() mmap := ring.mmap ring.mmap = nil return unix.Munmap(mmap) } func createPerfEvent(cpu int, opts ReaderOptions) (*sys.FD, error) { wakeup := 0 bits := 0 if opts.WakeupEvents > 0 { wakeup = opts.WakeupEvents } else { wakeup = opts.Watermark if wakeup == 0 { wakeup = 1 } bits |= unix.PerfBitWatermark } if opts.Overwritable { bits |= unix.PerfBitWriteBackward } attr := unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_BPF_OUTPUT, Bits: uint64(bits), Sample_type: unix.PERF_SAMPLE_RAW, Wakeup: uint32(wakeup), } attr.Size = uint32(unsafe.Sizeof(attr)) fd, err := unix.PerfEventOpen(&attr, -1, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) if err != nil { return nil, fmt.Errorf("can't create perf event: %w", err) } return sys.NewFD(fd) } type ringReader interface { loadHead() size() int remaining() int writeTail() Read(p []byte) (int, error) } type forwardReader struct { meta *unix.PerfEventMmapPage head, tail uint64 mask uint64 ring []byte } func newForwardReader(meta *unix.PerfEventMmapPage, ring []byte) *forwardReader { return &forwardReader{ meta: meta, head: atomic.LoadUint64(&meta.Data_head), tail: atomic.LoadUint64(&meta.Data_tail), // cap is always a power of two mask: uint64(cap(ring) - 1), ring: ring, } } func (rr *forwardReader) loadHead() { rr.head = atomic.LoadUint64(&rr.meta.Data_head) } func (rr *forwardReader) size() int { return len(rr.ring) } func (rr *forwardReader) remaining() int { return int((rr.head - rr.tail) & rr.mask) } func (rr *forwardReader) writeTail() { // Commit the new tail. This lets the kernel know that // the ring buffer has been consumed. atomic.StoreUint64(&rr.meta.Data_tail, rr.tail) } func (rr *forwardReader) Read(p []byte) (int, error) { start := int(rr.tail & rr.mask) n := len(p) // Truncate if the read wraps in the ring buffer if remainder := cap(rr.ring) - start; n > remainder { n = remainder } // Truncate if there isn't enough data if remainder := int(rr.head - rr.tail); n > remainder { n = remainder } copy(p, rr.ring[start:start+n]) rr.tail += uint64(n) if rr.tail == rr.head { return n, io.EOF } return n, nil } type reverseReader struct { meta *unix.PerfEventMmapPage // head is the position where the kernel last wrote data. head uint64 // read is the position we read the next data from. Updated as reads are made. read uint64 // tail is the end of the ring buffer. No reads must be made past it. tail uint64 mask uint64 ring []byte } func newReverseReader(meta *unix.PerfEventMmapPage, ring []byte) *reverseReader { rr := &reverseReader{ meta: meta, mask: uint64(cap(ring) - 1), ring: ring, } rr.loadHead() return rr } func (rr *reverseReader) loadHead() { // The diagram below represents an overwritable perf ring buffer: // // head read tail // | | | // V V V // +---+--------+------------+---------+--------+ // | |H-D....D|H-C........C|H-B.....B|H-A....A| // +---+--------+------------+---------+--------+ // <--Write from right to left // Read from left to right--> // (H means header) // // The buffer is read left to right beginning from head to tail. // [head, read) is the read portion of the buffer, [read, tail) the unread one. // read is adjusted as we progress through the buffer. // Avoid reading sample D multiple times by discarding unread samples C, B, A. rr.tail = rr.head // Get the new head and starting reading from it. rr.head = atomic.LoadUint64(&rr.meta.Data_head) rr.read = rr.head if rr.tail-rr.head > uint64(cap(rr.ring)) { // ring has been fully written, only permit at most cap(rr.ring) // bytes to be read. rr.tail = rr.head + uint64(cap(rr.ring)) } } func (rr *reverseReader) size() int { return len(rr.ring) } func (rr *reverseReader) remaining() int { // remaining data is inaccurate for overwritable buffers // once an overwrite happens, so return -1 here. return -1 } func (rr *reverseReader) writeTail() { // We do not care about tail for over writable perf buffer. // So, this function is noop. } func (rr *reverseReader) Read(p []byte) (int, error) { start := int(rr.read & rr.mask) n := len(p) // Truncate if the read wraps in the ring buffer if remainder := cap(rr.ring) - start; n > remainder { n = remainder } // Truncate if there isn't enough data if remainder := int(rr.tail - rr.read); n > remainder { n = remainder } copy(p, rr.ring[start:start+n]) rr.read += uint64(n) if rr.read == rr.tail { return n, io.EOF } return n, nil } ================================================ FILE: perf/ring_test.go ================================================ //go:build !windows package perf import ( "io" "os" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/unix" ) func TestRingBufferReader(t *testing.T) { ring := makeForwardRing(2, 0) checkRead(t, ring, []byte{0, 1}, io.EOF) checkRead(t, ring, []byte{}, io.EOF) // Wrapping read ring = makeForwardRing(2, 1) checkRead(t, ring, []byte{1}, nil) checkRead(t, ring, []byte{0}, io.EOF) checkRead(t, ring, []byte{}, io.EOF) } func TestRingBufferReverseReader(t *testing.T) { // First case: read 4, starting from offset 2. // The buffer should contain the following: // // [0 1 2 3] // ^ // | // head // // As we read from position 2, we should get [2, 3]. // Then, when we read it for the second time, we should get [0, 1] as we would // have looped around the buffer. ring := makeReverseRing(4, 2) checkRead(t, ring, []byte{2, 3}, nil) checkRead(t, ring, []byte{0, 1}, io.EOF) checkRead(t, ring, []byte{}, io.EOF) // Complicated case: read bytes until previous_head. // // [0 1 2 3] // ^ ^ // | | // | +---previous_head // head ring = makeReverseRing(4, 2) checkReadBuffer(t, ring, []byte{2}, nil, make([]byte, 1)) // Next read would be {3}, but we don't consume it. // Pretend the kernel wrote another 2 bytes. ring.meta.Data_head -= 2 ring.loadHead() // {3} is discarded. checkRead(t, ring, []byte{0, 1}, io.EOF) // Complicated case: read the whole buffer because it was "overwritten". // // [0 1 2 3] // ^ // | // +---previous_head // | // head // // So, we should first read [2, 3] then [0, 1]. ring = makeReverseRing(4, 2) ring.meta.Data_head -= ring.meta.Data_size ring.loadHead() checkRead(t, ring, []byte{2, 3}, nil) checkRead(t, ring, []byte{0, 1}, io.EOF) } // ensure that the next call to Read() yields the correct result. // // Read is called with a buffer that is larger than want so // that corner cases around wrapping can be checked. Use // checkReadBuffer if that is not desired. func checkRead(t *testing.T, r io.Reader, want []byte, wantErr error) { checkReadBuffer(t, r, want, wantErr, make([]byte, len(want)+1)) } func checkReadBuffer(t *testing.T, r io.Reader, want []byte, wantErr error, buf []byte) { t.Helper() n, err := r.Read(buf) buf = buf[:n] qt.Assert(t, qt.Equals(err, wantErr)) qt.Assert(t, qt.DeepEquals(buf, want)) } func makeBuffer(size int) []byte { buf := make([]byte, size) for i := range buf { buf[i] = byte(i) } return buf } func makeReverseRing(size, offset int) *reverseReader { if size != 0 && (size&(size-1)) != 0 { panic("size must be power of two") } meta := unix.PerfEventMmapPage{ Data_head: 0 - uint64(size) - uint64(offset), Data_tail: 0, // never written by the kernel Data_size: uint64(size), } return newReverseReader(&meta, makeBuffer(size)) } func makeForwardRing(size, offset int) *forwardReader { if size != 0 && (size&(size-1)) != 0 { panic("size must be power of two") } meta := unix.PerfEventMmapPage{ Data_head: uint64(size + offset), Data_tail: uint64(offset), Data_size: uint64(size), } return newForwardReader(&meta, makeBuffer(size)) } func TestPerfEventRing(t *testing.T) { check := func(buffer, watermark int, overwritable bool) { event, ring, err := newPerfEventRing(0, buffer, ReaderOptions{Watermark: watermark, Overwritable: overwritable}) if err != nil { t.Fatal(err) } defer event.Close() defer ring.Close() size := ring.size() // Ring size should be at least as big as buffer if size < buffer { t.Fatalf("ring size %d smaller than buffer %d", size, buffer) } // Ring size should be of the form 2^n pages (meta page has already been removed) if size%os.Getpagesize() != 0 { t.Fatalf("ring size %d not whole number of pages (pageSize %d)", size, os.Getpagesize()) } nPages := size / os.Getpagesize() if nPages&(nPages-1) != 0 { t.Fatalf("ring size %d (%d pages) not a power of two pages (pageSize %d)", size, nPages, os.Getpagesize()) } } // watermark > buffer _, _, err := newPerfEventRing(0, 8192, ReaderOptions{Watermark: 8193, Overwritable: false}) if err == nil { t.Fatal("watermark > buffer allowed") } _, _, err = newPerfEventRing(0, 8192, ReaderOptions{Watermark: 8193, Overwritable: true}) if err == nil { t.Fatal("watermark > buffer allowed") } // watermark == buffer _, _, err = newPerfEventRing(0, 8192, ReaderOptions{Watermark: 8192, Overwritable: false}) if err == nil { t.Fatal("watermark == buffer allowed") } _, _, err = newPerfEventRing(0, 8192, ReaderOptions{Watermark: 8193, Overwritable: true}) if err == nil { t.Fatal("watermark == buffer allowed") } // buffer not a power of two, watermark < buffer check(8193, 8192, false) check(8193, 8192, true) // large buffer not a multiple of page size at all (prime) check(65537, 8192, false) check(65537, 8192, true) } ================================================ FILE: pin/doc.go ================================================ // Package pin provides utility functions for working with pinned objects on bpffs. package pin ================================================ FILE: pin/load.go ================================================ package pin import ( "fmt" "io" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/link" ) // Pinner is an interface implemented by all eBPF objects that support pinning // to a bpf virtual filesystem. type Pinner interface { io.Closer Pin(string) error } // Load retrieves a pinned object from a bpf virtual filesystem. It returns one // of [ebpf.Map], [ebpf.Program], or [link.Link]. // // Trying to open anything other than a bpf object is an error. func Load(path string, opts *ebpf.LoadPinOptions) (Pinner, error) { fd, typ, err := sys.ObjGetTyped(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(path), FileFlags: opts.Marshal(), }) if err != nil { return nil, fmt.Errorf("opening pin %s: %w", path, err) } switch typ { case sys.BPF_TYPE_MAP: return ebpf.NewMapFromFD(fd.Disown()) case sys.BPF_TYPE_PROG: return ebpf.NewProgramFromFD(fd.Disown()) case sys.BPF_TYPE_LINK: return link.NewFromFD(fd.Disown()) } return nil, fmt.Errorf("unknown object type %d", typ) } ================================================ FILE: pin/load_test.go ================================================ package pin import ( "path/filepath" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" ) func mustPinnedProgram(t *testing.T, path string) *ebpf.Program { t.Helper() typ := ebpf.SocketFilter if platform.IsWindows { typ = ebpf.WindowsSample } spec := &ebpf.ProgramSpec{ Name: "test", Type: typ, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 2, asm.DWord), asm.Return(), }, License: "MIT", } p, err := ebpf.NewProgram(spec) if err != nil { t.Fatal(err) } t.Cleanup(func() { p.Close() }) if err := p.Pin(path); err != nil { t.Fatal(err) } return p } func mustPinnedMap(t *testing.T, path string) *ebpf.Map { t.Helper() typ := ebpf.Array if platform.IsWindows { typ = ebpf.WindowsArray } spec := &ebpf.MapSpec{ Name: "test", Type: typ, KeySize: 4, ValueSize: 4, MaxEntries: 1, } m, err := ebpf.NewMap(spec) if err != nil { t.Fatal(err) } t.Cleanup(func() { m.Close() }) if err := m.Pin(path); err != nil { t.Fatal(err) } return m } func TestLoad(t *testing.T) { testutils.SkipOnOldKernel(t, "4.10", "reading program fdinfo") tmp := testutils.TempBPFFS(t) mpath := filepath.Join(tmp, "map") ppath := filepath.Join(tmp, "prog") mustPinnedMap(t, mpath) mustPinnedProgram(t, ppath) _, err := Load(tmp, nil) qt.Assert(t, qt.IsNotNil(err)) m, err := Load(mpath, nil) qt.Assert(t, qt.IsNil(err)) defer m.Close() qt.Assert(t, qt.Satisfies(m, testutils.Contains[*ebpf.Map])) p, err := Load(ppath, nil) qt.Assert(t, qt.IsNil(err)) defer p.Close() qt.Assert(t, qt.Satisfies(p, testutils.Contains[*ebpf.Program])) } func TestMain(m *testing.M) { testmain.Run(m) } ================================================ FILE: pin/pin.go ================================================ package pin import "io" // Pin represents an object and its pinned path. type Pin struct { Path string Object io.Closer } func (p *Pin) close() { if p.Object != nil { p.Object.Close() } } // Take ownership of Pin.Object. // // The caller is responsible for calling close on [Pin.Object]. func (p *Pin) Take() { p.Object = nil } ================================================ FILE: pin/walk_other.go ================================================ //go:build !windows package pin import ( "fmt" "io/fs" "iter" "os" "path/filepath" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/unix" ) // WalkDir walks the file tree rooted at path and yields a [Pin] for each // BPF object below the path. // // Callers must invoke [Pin.Take] if they wish to hold on to the object. func WalkDir(root string, opts *ebpf.LoadPinOptions) iter.Seq2[*Pin, error] { return func(yield func(*Pin, error) bool) { fsType, err := linux.FSType(root) if err != nil { yield(nil, err) return } if fsType != unix.BPF_FS_MAGIC { yield(nil, fmt.Errorf("%s is not on a bpf filesystem", root)) return } fn := func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return nil } path = filepath.Join(root, path) obj, err := Load(path, opts) if err != nil { return err } pin := &Pin{Path: path, Object: obj} defer pin.close() if !yield(pin, nil) { return fs.SkipAll } return nil } err = fs.WalkDir(os.DirFS(root), ".", fn) if err != nil { yield(nil, fmt.Errorf("walk: %w", err)) return } } } ================================================ FILE: pin/walk_test.go ================================================ package pin import ( "iter" "os" "path/filepath" "reflect" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils" ) func TestWalkDir(t *testing.T) { testutils.SkipOnOldKernel(t, "4.13", "reading program objinfo") tmp := testutils.TempBPFFS(t) dir := filepath.Join(tmp, "dir") if !platform.IsWindows { // Windows doesn't have a BPF file system, so mkdir below fails. qt.Assert(t, qt.IsNil(os.Mkdir(dir, 0755))) } progPath := filepath.Join(tmp, "pinned_prog") mustPinnedProgram(t, progPath) mapPath := filepath.Join(dir, "pinned_map") mustPinnedMap(t, mapPath) next, stop := iter.Pull2(WalkDir(tmp, nil)) defer stop() pin, err, ok := next() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(reflect.TypeOf(pin.Object), reflect.TypeFor[*ebpf.Map]())) qt.Assert(t, qt.Equals(pin.Path, mapPath)) pin, err, ok = next() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(reflect.TypeOf(pin.Object), reflect.TypeFor[*ebpf.Program]())) qt.Assert(t, qt.Equals(pin.Path, progPath)) _, _, ok = next() qt.Assert(t, qt.IsFalse(ok)) t.Run("Not BPFFS", func(t *testing.T) { if platform.IsWindows { t.Skip("Windows does not have BPFFS") } next, stop := iter.Pull2(WalkDir("/", nil)) defer stop() _, err, ok = next() qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.IsNotNil(err)) _, _, ok = next() qt.Assert(t, qt.IsFalse(ok)) }) } ================================================ FILE: pin/walk_windows.go ================================================ package pin import ( "errors" "fmt" "iter" "strings" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/efw" ) // WalkDir walks the file tree rooted at path and yields a [Pin] for each // BPF object below the path. // // Callers must invoke [Pin.Take] if they wish to hold on to the object. func WalkDir(root string, opts *ebpf.LoadPinOptions) iter.Seq2[*Pin, error] { return func(yield func(*Pin, error) bool) { root, err := efw.EbpfCanonicalizePinPath(root) if err != nil { yield(nil, fmt.Errorf("failed to canonicalize pin path %q: %w", root, err)) return } cursor := root for { next, _, err := efw.EbpfGetNextPinnedObjectPath(cursor, efw.EBPF_OBJECT_UNKNOWN) if errors.Is(err, efw.EBPF_NO_MORE_KEYS) { break } else if err != nil { yield(nil, err) return } if !strings.HasPrefix(next, root) { break } obj, err := Load(next, opts) if err != nil { yield(nil, err) return } pin := &Pin{next, obj} ok := yield(pin, nil) pin.close() if !ok { return } cursor = next } } } ================================================ FILE: prog.go ================================================ package ebpf import ( "bytes" "encoding/binary" "errors" "fmt" "math" "path/filepath" "runtime" "slices" "strings" "time" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/sysenc" "github.com/cilium/ebpf/internal/unix" ) // ErrNotSupported is returned whenever the kernel doesn't support a feature. var ErrNotSupported = internal.ErrNotSupported // ErrProgIncompatible is returned when a loaded Program is incompatible with a // given spec. var ErrProgIncompatible = errors.New("program is incompatible") // errBadRelocation is returned when the verifier rejects a program due to a // bad CO-RE relocation. // // This error is detected based on heuristics and therefore may not be reliable. var errBadRelocation = errors.New("bad CO-RE relocation") // errUnknownKfunc is returned when the verifier rejects a program due to an // unknown kfunc. // // This error is detected based on heuristics and therefore may not be reliable. var errUnknownKfunc = errors.New("unknown kfunc") // ProgramID represents the unique ID of an eBPF program. type ProgramID = sys.ProgramID const ( // Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN. // This is currently the maximum of spare space allocated for SKB // and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN. outputPad = 256 + 2 ) // minVerifierLogSize is the default number of bytes allocated for the // verifier log. const minVerifierLogSize = 64 * 1024 // maxVerifierLogSize is the maximum size of verifier log buffer the kernel // will accept before returning EINVAL. May be increased to MaxUint32 in the // future, but avoid the unnecessary EINVAL for now. const maxVerifierLogSize = math.MaxUint32 >> 2 // maxVerifierAttempts is the maximum number of times the verifier will retry // loading a program with a growing log buffer before giving up. Since we double // the log size on every attempt, this is the absolute maximum number of // attempts before the buffer reaches [maxVerifierLogSize]. const maxVerifierAttempts = 30 // ProgramOptions control loading a program into the kernel. type ProgramOptions struct { // Bitmap controlling the detail emitted by the kernel's eBPF verifier log. // LogLevel-type values can be ORed together to request specific kinds of // verifier output. See the documentation on [ebpf.LogLevel] for details. // // opts.LogLevel = (ebpf.LogLevelBranch | ebpf.LogLevelStats) // // If left to its default value, the program will first be loaded without // verifier output enabled. Upon error, the program load will be repeated // with LogLevelBranch and the given (or default) LogSize value. // // Unless LogDisabled is set, setting this to a non-zero value will enable the verifier // log, populating the [ebpf.Program.VerifierLog] field on successful loads // and including detailed verifier errors if the program is rejected. This // will always allocate an output buffer, but will result in only a single // attempt at loading the program. LogLevel LogLevel // Starting size of the verifier log buffer. If the verifier log is larger // than this size, the buffer will be grown to fit the entire log. Leave at // its default value unless troubleshooting. LogSizeStart uint32 // Disables the verifier log completely, regardless of other options. LogDisabled bool // Type information used for CO-RE relocations. // // This is useful in environments where the kernel BTF is not available // (containers) or where it is in a non-standard location. Defaults to // use the kernel BTF from a well-known location if nil. KernelTypes *btf.Spec // Additional targets to consider for CO-RE relocations. This can be used to // pass BTF information for kernel modules when it's not present on // KernelTypes. ExtraRelocationTargets []*btf.Spec } // ProgramSpec defines a Program. type ProgramSpec struct { // Name is passed to the kernel as a debug aid. // // Unsupported characters will be stripped. Name string // Type determines at which hook in the kernel a program will run. Type ProgramType // Network interface index the user intends to attach this program to after // loading. Only valid for some program types. // // Provides driver-specific context about the target interface to the // verifier, required when using certain BPF helpers. Ifindex uint32 // AttachType of the program, needed to differentiate allowed context // accesses in some newer program types like CGroupSockAddr. // // Available on kernels 4.17 and later. AttachType AttachType // Name of a kernel data structure or function to attach to. Its // interpretation depends on Type and AttachType. AttachTo string // The program to attach to. Must be provided manually. AttachTarget *Program // The name of the ELF section this program originated from. SectionName string Instructions asm.Instructions // Flags is passed to the kernel and specifies additional program // load attributes. Flags uint32 // License of the program. Some helpers are only available if // the license is deemed compatible with the GPL. // // See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1 License string // Version used by Kprobe programs. // // Deprecated on kernels 5.0 and later. Leave empty to let the library // detect this value automatically. KernelVersion uint32 // The byte order this program was compiled for, may be nil. ByteOrder binary.ByteOrder } // Copy returns a copy of the spec. func (ps *ProgramSpec) Copy() *ProgramSpec { if ps == nil { return nil } cpy := *ps cpy.Instructions = make(asm.Instructions, len(ps.Instructions)) copy(cpy.Instructions, ps.Instructions) return &cpy } // Tag calculates the kernel tag for a series of instructions. // // Use asm.Instructions.Tag if you need to calculate for non-native endianness. // // Deprecated: The value produced by this method no longer matches tags produced // by the kernel since Linux 6.18. Use [ProgramSpec.Compatible] instead. func (ps *ProgramSpec) Tag() (string, error) { return ps.Instructions.Tag(internal.NativeEndian) } // Compatible returns nil if a loaded Program's kernel tag matches the one of // the ProgramSpec. // // Returns [ErrProgIncompatible] if the tags do not match. func (ps *ProgramSpec) Compatible(info *ProgramInfo) error { if platform.IsWindows { return fmt.Errorf("%w: Windows does not support tag readback from kernel", internal.ErrNotSupportedOnOS) } ok, err := ps.Instructions.HasTag(info.Tag, internal.NativeEndian) if err != nil { return err } if !ok { return fmt.Errorf("%w: ProgramSpec and Program tags do not match", ErrProgIncompatible) } return nil } // targetsKernelModule returns true if the program supports being attached to a // symbol provided by a kernel module. func (ps *ProgramSpec) targetsKernelModule() bool { if ps.AttachTo == "" { return false } switch ps.Type { case Tracing: switch ps.AttachType { case AttachTraceFEntry, AttachTraceFExit: return true } case Kprobe: return true } return false } // VerifierError is returned by [NewProgram] and [NewProgramWithOptions] if a // program is rejected by the verifier. // // Use [errors.As] to access the error. type VerifierError = internal.VerifierError // Program represents BPF program loaded into the kernel. // // It is not safe to close a Program which is used by other goroutines. type Program struct { // Contains the output of the kernel verifier if enabled, // otherwise it is empty. VerifierLog string fd *sys.FD name string pinnedPath string typ ProgramType } // NewProgram creates a new Program. // // See [NewProgramWithOptions] for details. // // Returns a [VerifierError] containing the full verifier log if the program is // rejected by the kernel. func NewProgram(spec *ProgramSpec) (*Program, error) { return NewProgramWithOptions(spec, ProgramOptions{}) } // NewProgramWithOptions creates a new Program. // // Loading a program for the first time will perform // feature detection by loading small, temporary programs. // // Returns a [VerifierError] containing the full verifier log if the program is // rejected by the kernel. func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { if spec == nil { return nil, errors.New("can't load a program from a nil spec") } prog, err := newProgramWithOptions(spec, opts, btf.NewCache()) if errors.Is(err, asm.ErrUnsatisfiedMapReference) { return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err) } return prog, err } var ( coreBadLoad = []byte(fmt.Sprintf("(18) r10 = 0x%x\n", btf.COREBadRelocationSentinel)) // This log message was introduced by ebb676daa1a3 ("bpf: Print function name in // addition to function id") which first appeared in v4.10 and has remained // unchanged since. coreBadCall = []byte(fmt.Sprintf("invalid func unknown#%d\n", btf.COREBadRelocationSentinel)) kfuncBadCall = []byte(fmt.Sprintf("invalid func unknown#%d\n", kfuncCallPoisonBase)) ) func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, c *btf.Cache) (*Program, error) { if len(spec.Instructions) == 0 { return nil, errors.New("instructions cannot be empty") } if spec.Type == UnspecifiedProgram { return nil, errors.New("can't load program of unspecified type") } if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian { return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian) } // Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load") // require the version field to be set to the value of the KERNEL_VERSION // macro for kprobe-type programs. // Overwrite Kprobe program version if set to zero or the magic version constant. kv := spec.KernelVersion if spec.Type == Kprobe && (kv == 0 || kv == internal.MagicKernelVersion) { v, err := linux.KernelVersion() if err != nil { return nil, fmt.Errorf("detecting kernel version: %w", err) } kv = v.Kernel() } p, progType := platform.DecodeConstant(spec.Type) if p != platform.Native { return nil, fmt.Errorf("program type %s (%s): %w", spec.Type, p, internal.ErrNotSupportedOnOS) } attr := &sys.ProgLoadAttr{ ProgName: maybeFillObjName(spec.Name), ProgType: sys.ProgType(progType), ProgFlags: spec.Flags, ProgIfindex: spec.Ifindex, ExpectedAttachType: sys.AttachType(spec.AttachType), License: sys.NewStringPointer(spec.License), KernVersion: kv, } insns := make(asm.Instructions, len(spec.Instructions)) copy(insns, spec.Instructions) var b btf.Builder if err := applyRelocations(insns, spec.ByteOrder, &b, c, opts.KernelTypes, opts.ExtraRelocationTargets); err != nil { return nil, fmt.Errorf("apply CO-RE relocations: %w", err) } errExtInfos := haveProgramExtInfos() if !b.Empty() && errors.Is(errExtInfos, ErrNotSupported) { // There is at least one CO-RE relocation which relies on a stable local // type ID. // Return ErrNotSupported instead of E2BIG if there is no BTF support. return nil, errExtInfos } if errExtInfos == nil { // Only add func and line info if the kernel supports it. This allows // BPF compiled with modern toolchains to work on old kernels. fib, lib, err := btf.MarshalExtInfos(insns, &b) if err != nil { return nil, fmt.Errorf("marshal ext_infos: %w", err) } attr.FuncInfoRecSize = btf.FuncInfoSize attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize attr.FuncInfo = sys.SlicePointer(fib) attr.LineInfoRecSize = btf.LineInfoSize attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize attr.LineInfo = sys.SlicePointer(lib) } if !b.Empty() { handle, err := btf.NewHandle(&b) if err != nil { return nil, fmt.Errorf("load BTF: %w", err) } defer handle.Close() attr.ProgBtfFd = uint32(handle.FD()) } kconfig, err := resolveKconfigReferences(insns) if err != nil { return nil, fmt.Errorf("resolve .kconfig: %w", err) } defer kconfig.Close() if err := resolveKsymReferences(insns); err != nil { return nil, fmt.Errorf("resolve .ksyms: %w", err) } if err := fixupAndValidate(insns); err != nil { return nil, err } handles, err := fixupKfuncs(insns, c) if err != nil { return nil, fmt.Errorf("fixing up kfuncs: %w", err) } defer handles.Close() if len(handles) > 0 { fdArray := handles.fdArray() attr.FdArray = sys.SlicePointer(fdArray) } buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) err = insns.Marshal(buf, internal.NativeEndian) if err != nil { return nil, err } bytecode := buf.Bytes() attr.Insns = sys.SlicePointer(bytecode) attr.InsnCnt = uint32(len(bytecode) / asm.InstructionSize) if spec.AttachTarget != nil { targetID, err := findTargetInProgram(spec.AttachTarget, spec.AttachTo, spec.Type, spec.AttachType) if err != nil { return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) } attr.AttachBtfId = targetID attr.AttachBtfObjFd = uint32(spec.AttachTarget.FD()) defer runtime.KeepAlive(spec.AttachTarget) } else if spec.AttachTo != "" { var targetMember string attachTo := spec.AttachTo if spec.Type == StructOps { attachTo, targetMember, _ = strings.Cut(attachTo, ":") if targetMember == "" { return nil, fmt.Errorf("struct_ops: AttachTo must be ':' (got %s)", spec.AttachTo) } } module, targetID, err := findProgramTargetInKernel(attachTo, spec.Type, spec.AttachType, c) if err != nil && !errors.Is(err, errUnrecognizedAttachType) { // We ignore errUnrecognizedAttachType since AttachTo may be non-empty // for programs that don't attach anywhere. return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) } if spec.Type == StructOps { var s *btf.Spec target := btf.Type((*btf.Struct)(nil)) s, module, err = findTargetInKernel(attachTo, &target, c) if err != nil { return nil, fmt.Errorf("lookup struct_ops kern type %q: %w", attachTo, err) } kType := target.(*btf.Struct) targetID, err = s.TypeID(kType) if err != nil { return nil, fmt.Errorf("type id for %s: %w", kType.TypeName(), err) } idx := slices.IndexFunc(kType.Members, func(m btf.Member) bool { return m.Name == targetMember }) if idx < 0 { return nil, fmt.Errorf("member %q not found in %s", targetMember, kType.Name) } // ExpectedAttachType: index of the target member in the struct attr.ExpectedAttachType = sys.AttachType(idx) } attr.AttachBtfId = targetID if module != nil && attr.AttachBtfObjFd == 0 { attr.AttachBtfObjFd = uint32(module.FD()) defer module.Close() } } if platform.IsWindows && opts.LogLevel != 0 { return nil, fmt.Errorf("log level: %w", internal.ErrNotSupportedOnOS) } var logBuf []byte var fd *sys.FD if opts.LogDisabled { // Loading with logging disabled should never retry. fd, err = sys.ProgLoad(attr) if err == nil { return &Program{"", fd, spec.Name, "", spec.Type}, nil } } else { // Only specify log size if log level is also specified. Setting size // without level results in EINVAL. Level will be bumped to LogLevelBranch // if the first load fails. if opts.LogLevel != 0 { attr.LogLevel = opts.LogLevel attr.LogSize = internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize) } attempts := 1 for { if attr.LogLevel != 0 { logBuf = make([]byte, attr.LogSize) attr.LogBuf = sys.SlicePointer(logBuf) } fd, err = sys.ProgLoad(attr) if err == nil { return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil } if !retryLogAttrs(attr, opts.LogSizeStart, err) { break } if attempts >= maxVerifierAttempts { return nil, fmt.Errorf("load program: %w (bug: hit %d verifier attempts)", err, maxVerifierAttempts) } attempts++ } } end := bytes.IndexByte(logBuf, 0) if end < 0 { end = len(logBuf) } tail := logBuf[max(end-256, 0):end] switch { case errors.Is(err, unix.EPERM): if len(logBuf) > 0 && logBuf[0] == 0 { // EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can // check that the log is empty to reduce false positives. return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) } case errors.Is(err, unix.EFAULT): // EFAULT is returned when the kernel hits a verifier bug, and always // overrides ENOSPC, defeating the buffer growth strategy. Warn the user // that they may need to increase the buffer size manually. return nil, fmt.Errorf("load program: %w (hit verifier bug, increase LogSizeStart to fit the log and check dmesg)", err) case errors.Is(err, unix.EINVAL): if bytes.Contains(tail, coreBadCall) { err = errBadRelocation break } else if bytes.Contains(tail, kfuncBadCall) { err = errUnknownKfunc break } case errors.Is(err, unix.EACCES): if bytes.Contains(tail, coreBadLoad) { err = errBadRelocation break } } // hasFunctionReferences may be expensive, so check it last. if (errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM)) && hasFunctionReferences(spec.Instructions) { if err := haveBPFToBPFCalls(); err != nil { return nil, fmt.Errorf("load program: %w", err) } } return nil, internal.ErrorWithLog("load program", err, logBuf) } func retryLogAttrs(attr *sys.ProgLoadAttr, startSize uint32, err error) bool { if attr.LogSize == maxVerifierLogSize { // Maximum buffer size reached, don't grow or retry. return false } // ENOSPC means the log was enabled on the previous iteration, so we only // need to grow the buffer. if errors.Is(err, unix.ENOSPC) { if attr.LogTrueSize != 0 { // Kernel supports LogTrueSize and previous iteration undershot the buffer // size. Try again with the given true size. attr.LogSize = attr.LogTrueSize return true } // Ensure the size doesn't overflow. const factor = 2 if attr.LogSize >= maxVerifierLogSize/factor { attr.LogSize = maxVerifierLogSize return true } // Make an educated guess how large the buffer should be by multiplying. Due // to int division, this rounds down odd sizes. attr.LogSize = internal.Between(attr.LogSize, minVerifierLogSize, maxVerifierLogSize/factor) attr.LogSize *= factor return true } if attr.LogLevel == 0 { // Loading the program failed, it wasn't a buffer-related error, and the log // was disabled the previous iteration. Enable basic logging and retry. attr.LogLevel = LogLevelBranch attr.LogSize = internal.Between(startSize, minVerifierLogSize, maxVerifierLogSize) return true } // Loading the program failed for a reason other than buffer size and the log // was already enabled the previous iteration. Don't retry. return false } // NewProgramFromFD creates a [Program] around a raw fd. // // You should not use fd after calling this function. // // Requires at least Linux 4.13. Returns an error on Windows. func NewProgramFromFD(fd int) (*Program, error) { f, err := sys.NewFD(fd) if err != nil { return nil, err } return newProgramFromFD(f) } // NewProgramFromID returns the [Program] for a given program id. Returns // [ErrNotExist] if there is no eBPF program with the given id. // // Requires at least Linux 4.13. func NewProgramFromID(id ProgramID) (*Program, error) { fd, err := sys.ProgGetFdById(&sys.ProgGetFdByIdAttr{ Id: uint32(id), }) if err != nil { return nil, fmt.Errorf("get program by id: %w", err) } return newProgramFromFD(fd) } func newProgramFromFD(fd *sys.FD) (*Program, error) { info, err := minimalProgramInfoFromFd(fd) if err != nil { fd.Close() return nil, fmt.Errorf("discover program type: %w", err) } return &Program{"", fd, info.Name, "", info.Type}, nil } func (p *Program) String() string { if p.name != "" { return fmt.Sprintf("%s(%s)#%v", p.typ, p.name, p.fd) } return fmt.Sprintf("%s(%v)", p.typ, p.fd) } // Type returns the underlying type of the program. func (p *Program) Type() ProgramType { return p.typ } // Info returns metadata about the program. // // Requires at least 4.10. func (p *Program) Info() (*ProgramInfo, error) { return newProgramInfoFromFd(p.fd) } // Stats returns runtime statistics about the Program. Requires BPF statistics // collection to be enabled, see [EnableStats]. // // Requires at least Linux 5.8. func (p *Program) Stats() (*ProgramStats, error) { return newProgramStatsFromFd(p.fd) } // Handle returns a reference to the program's type information in the kernel. // // Returns ErrNotSupported if the kernel has no BTF support, or if there is no // BTF associated with the program. func (p *Program) Handle() (*btf.Handle, error) { info, err := p.Info() if err != nil { return nil, err } id, ok := info.BTFID() if !ok { return nil, fmt.Errorf("program %s: retrieve BTF ID: %w", p, ErrNotSupported) } return btf.NewHandleFromID(id) } // FD gets the file descriptor of the Program. // // It is invalid to call this function after Close has been called. func (p *Program) FD() int { return p.fd.Int() } // Clone creates a duplicate of the Program. // // Closing the duplicate does not affect the original, and vice versa. // // Cloning a nil Program returns nil. func (p *Program) Clone() (*Program, error) { if p == nil { return nil, nil } dup, err := p.fd.Dup() if err != nil { return nil, fmt.Errorf("can't clone program: %w", err) } return &Program{p.VerifierLog, dup, p.name, "", p.typ}, nil } // Pin persists the Program on the BPF virtual file system past the lifetime of // the process that created it // // Calling Pin on a previously pinned program will overwrite the path, except when // the new path already exists. Re-pinning across filesystems is not supported. // // This requires bpffs to be mounted above fileName. // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd func (p *Program) Pin(fileName string) error { if err := sys.Pin(p.pinnedPath, fileName, p.fd); err != nil { return err } p.pinnedPath = fileName return nil } // Unpin removes the persisted state for the Program from the BPF virtual filesystem. // // Failed calls to Unpin will not alter the state returned by IsPinned. // // Unpinning an unpinned Program returns nil. func (p *Program) Unpin() error { if err := sys.Unpin(p.pinnedPath); err != nil { return err } p.pinnedPath = "" return nil } // IsPinned returns true if the Program has a non-empty pinned path. func (p *Program) IsPinned() bool { return p.pinnedPath != "" } // Close the Program's underlying file descriptor, which could unload // the program from the kernel if it is not pinned or attached to a // kernel hook. func (p *Program) Close() error { if p == nil { return nil } return p.fd.Close() } // Various options for Run'ing a Program type RunOptions struct { // Program's data input. Required field. // // The kernel expects at least 14 bytes input for an ethernet header for // XDP and SKB programs. Data []byte // Program's data after Program has run. Caller must allocate. Optional field. DataOut []byte // Program's context input. Optional field. Context interface{} // Program's context after Program has run. Must be a pointer or slice. Optional field. ContextOut interface{} // Minimum number of times to run Program. Optional field. Defaults to 1. // // The program may be executed more often than this due to interruptions, e.g. // when runtime.AllThreadsSyscall is invoked. Repeat uint32 // Optional flags. Flags uint32 // CPU to run Program on. Optional field. // Note not all program types support this field. CPU uint32 // BatchSize (default 64) affects the kernel's packet buffer allocation behaviour when running // programs with BPF_F_TEST_XDP_LIVE_FRAMES and a non-zero [RunOptions.Repeat] value. // For more details, see the kernel documentation on BPF_PROG_RUN: // https://docs.kernel.org/bpf/bpf_prog_run.html#running-xdp-programs-in-live-frame-mode BatchSize uint32 // Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer // or similar. Typically used during benchmarking. Optional field. // // Deprecated: use [testing.B.ReportMetric] with unit "ns/op" instead. Reset func() } // Test runs the Program in the kernel with the given input and returns the // value returned by the eBPF program. // // Note: the kernel expects at least 14 bytes input for an ethernet header for // XDP and SKB programs. // // This function requires at least Linux 4.12. func (p *Program) Test(in []byte) (uint32, []byte, error) { // Older kernels ignore the dataSizeOut argument when copying to user space. // Combined with things like bpf_xdp_adjust_head() we don't really know what the final // size will be. Hence we allocate an output buffer which we hope will always be large // enough, and panic if the kernel wrote past the end of the allocation. // See https://patchwork.ozlabs.org/cover/1006822/ var out []byte if len(in) > 0 { out = make([]byte, len(in)+outputPad) } opts := RunOptions{ Data: in, DataOut: out, Repeat: 1, } ret, _, err := p.run(&opts) if err != nil { return ret, nil, fmt.Errorf("test program: %w", err) } return ret, opts.DataOut, nil } // Run runs the Program in kernel with given RunOptions. // // Note: the same restrictions from Test apply. func (p *Program) Run(opts *RunOptions) (uint32, error) { if opts == nil { opts = &RunOptions{} } ret, _, err := p.run(opts) if err != nil { return ret, fmt.Errorf("run program: %w", err) } return ret, nil } // Benchmark runs the Program with the given input for a number of times // and returns the time taken per iteration. // // Returns the result of the last execution of the program and the time per // run or an error. reset is called whenever the benchmark syscall is // interrupted, and should be set to testing.B.ResetTimer or similar. // // This function requires at least Linux 4.12. func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) { if uint(repeat) > math.MaxUint32 { return 0, 0, fmt.Errorf("repeat is too high") } opts := RunOptions{ Data: in, Repeat: uint32(repeat), Reset: reset, } ret, total, err := p.run(&opts) if err != nil { return ret, total, fmt.Errorf("benchmark program: %w", err) } return ret, total, nil } var haveProgRun = internal.NewFeatureTest("BPF_PROG_RUN", func() error { if platform.IsWindows { return nil } prog, err := NewProgram(&ProgramSpec{ // SocketFilter does not require privileges on newer kernels. Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", }) if err != nil { // This may be because we lack sufficient permissions, etc. return err } defer prog.Close() in := internal.EmptyBPFContext attr := sys.ProgRunAttr{ ProgFd: uint32(prog.FD()), DataSizeIn: uint32(len(in)), DataIn: sys.SlicePointer(in), } err = sys.ProgRun(&attr) switch { case errors.Is(err, unix.EINVAL): // Check for EINVAL specifically, rather than err != nil since we // otherwise misdetect due to insufficient permissions. return internal.ErrNotSupported case errors.Is(err, unix.EINTR): // We know that PROG_TEST_RUN is supported if we get EINTR. return nil case errors.Is(err, sys.ENOTSUPP): // The first PROG_TEST_RUN patches shipped in 4.12 didn't include // a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is // supported, but not for the program type used in the probe. return nil } return err }, "4.12", "windows:0.20") func (p *Program) run(opts *RunOptions) (uint32, time.Duration, error) { if uint(len(opts.Data)) > math.MaxUint32 { return 0, 0, fmt.Errorf("input is too long") } if err := haveProgRun(); err != nil { return 0, 0, err } var ctxIn []byte if opts.Context != nil { var err error ctxIn, err = binary.Append(nil, internal.NativeEndian, opts.Context) if err != nil { return 0, 0, fmt.Errorf("cannot serialize context: %v", err) } } var ctxOut []byte if opts.ContextOut != nil { ctxOut = make([]byte, binary.Size(opts.ContextOut)) } else if platform.IsWindows && len(ctxIn) > 0 { // Windows rejects a non-zero ctxIn with a nil ctxOut. ctxOut = make([]byte, len(ctxIn)) } attr := sys.ProgRunAttr{ ProgFd: p.fd.Uint(), DataSizeIn: uint32(len(opts.Data)), DataSizeOut: uint32(len(opts.DataOut)), DataIn: sys.SlicePointer(opts.Data), DataOut: sys.SlicePointer(opts.DataOut), Repeat: uint32(opts.Repeat), CtxSizeIn: uint32(len(ctxIn)), CtxSizeOut: uint32(len(ctxOut)), CtxIn: sys.SlicePointer(ctxIn), CtxOut: sys.SlicePointer(ctxOut), Flags: opts.Flags, Cpu: opts.CPU, BatchSize: opts.BatchSize, } if p.Type() == Syscall && ctxIn != nil && ctxOut != nil { // Linux syscall program errors on non-nil ctxOut, uses ctxIn // for both input and output. Shield the user from this wart. if len(ctxIn) != len(ctxOut) { return 0, 0, errors.New("length mismatch: Context and ContextOut") } attr.CtxOut, attr.CtxSizeOut = sys.TypedPointer[uint8]{}, 0 ctxOut = ctxIn } retry: for { err := sys.ProgRun(&attr) if err == nil { break retry } if errors.Is(err, unix.EINTR) { if attr.Repeat <= 1 { // Older kernels check whether enough repetitions have been // executed only after checking for pending signals. // // run signal? done? run ... // // As a result we can get EINTR for repeat==1 even though // the program was run exactly once. Treat this as a // successful run instead. // // Since commit 607b9cc92bd7 ("bpf: Consolidate shared test timing code") // the conditions are reversed: // run done? signal? ... break retry } if opts.Reset != nil { opts.Reset() } continue retry } if errors.Is(err, sys.ENOTSUPP) { return 0, 0, fmt.Errorf("kernel doesn't support running %s: %w", p.Type(), ErrNotSupported) } return 0, 0, err } if opts.DataOut != nil { if int(attr.DataSizeOut) > cap(opts.DataOut) { // Houston, we have a problem. The program created more data than we allocated, // and the kernel wrote past the end of our buffer. panic("kernel wrote past end of output buffer") } opts.DataOut = opts.DataOut[:int(attr.DataSizeOut)] } if opts.ContextOut != nil { b := bytes.NewReader(ctxOut) if err := binary.Read(b, internal.NativeEndian, opts.ContextOut); err != nil { return 0, 0, fmt.Errorf("failed to decode ContextOut: %v", err) } } total := time.Duration(attr.Duration) * time.Nanosecond return attr.Retval, total, nil } func unmarshalProgram(buf sysenc.Buffer) (*Program, error) { var id uint32 if err := buf.Unmarshal(&id); err != nil { return nil, err } // Looking up an entry in a nested map or prog array returns an id, // not an fd. return NewProgramFromID(ProgramID(id)) } func marshalProgram(p *Program, length int) ([]byte, error) { if p == nil { return nil, errors.New("can't marshal a nil Program") } if length != 4 { return nil, fmt.Errorf("can't marshal program to %d bytes", length) } buf := make([]byte, 4) internal.NativeEndian.PutUint32(buf, p.fd.Uint()) return buf, nil } // LoadPinnedProgram loads a Program from a pin (file) on the BPF virtual // filesystem. // // Requires at least Linux 4.11. func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) { fd, typ, err := sys.ObjGetTyped(&sys.ObjGetAttr{ Pathname: sys.NewStringPointer(fileName), FileFlags: opts.Marshal(), }) if err != nil { return nil, err } if typ != sys.BPF_TYPE_PROG { _ = fd.Close() return nil, fmt.Errorf("%s is not a Program", fileName) } p, err := newProgramFromFD(fd) if err == nil { p.pinnedPath = fileName if haveObjName() != nil { p.name = filepath.Base(fileName) } } return p, err } // ProgramGetNextID returns the ID of the next eBPF program. // // Returns ErrNotExist, if there is no next eBPF program. func ProgramGetNextID(startID ProgramID) (ProgramID, error) { attr := &sys.ProgGetNextIdAttr{Id: uint32(startID)} return ProgramID(attr.NextId), sys.ProgGetNextId(attr) } // BindMap binds map to the program and is only released once program is released. // // This may be used in cases where metadata should be associated with the program // which otherwise does not contain any references to the map. func (p *Program) BindMap(m *Map) error { attr := &sys.ProgBindMapAttr{ ProgFd: uint32(p.FD()), MapFd: uint32(m.FD()), } return sys.ProgBindMap(attr) } var errUnrecognizedAttachType = errors.New("unrecognized attach type") // find an attach target type in the kernel. // // name, progType and attachType determine which type we need to attach to. // // The attach target may be in a loaded kernel module. // In that case the returned handle will be non-nil. // The caller is responsible for closing the handle. // // Returns errUnrecognizedAttachType if the combination of progType and attachType // is not recognised. func findProgramTargetInKernel(name string, progType ProgramType, attachType AttachType, cache *btf.Cache) (*btf.Handle, btf.TypeID, error) { type match struct { p ProgramType a AttachType } var ( typeName, featureName string target btf.Type ) switch (match{progType, attachType}) { case match{StructOps, AttachStructOps}: typeName = name featureName = "struct_ops " + name target = (*btf.Struct)(nil) case match{LSM, AttachLSMMac}: typeName = "bpf_lsm_" + name featureName = name + " LSM hook" target = (*btf.Func)(nil) case match{Tracing, AttachTraceIter}: typeName = "bpf_iter_" + name featureName = name + " iterator" target = (*btf.Func)(nil) case match{Tracing, AttachTraceFEntry}: typeName = name featureName = fmt.Sprintf("fentry %s", name) target = (*btf.Func)(nil) case match{Tracing, AttachTraceFExit}: typeName = name featureName = fmt.Sprintf("fexit %s", name) target = (*btf.Func)(nil) case match{Tracing, AttachModifyReturn}: typeName = name featureName = fmt.Sprintf("fmod_ret %s", name) target = (*btf.Func)(nil) case match{Tracing, AttachTraceRawTp}: typeName = fmt.Sprintf("btf_trace_%s", name) featureName = fmt.Sprintf("raw_tp %s", name) target = (*btf.Typedef)(nil) default: return nil, 0, errUnrecognizedAttachType } spec, module, err := findTargetInKernel(typeName, &target, cache) if errors.Is(err, btf.ErrNotFound) { return nil, 0, &internal.UnsupportedFeatureError{Name: featureName} } // See cilium/ebpf#894. Until we can disambiguate between equally-named kernel // symbols, we should explicitly refuse program loads. They will not reliably // do what the caller intended. if errors.Is(err, btf.ErrMultipleMatches) { return nil, 0, fmt.Errorf("attaching to ambiguous kernel symbol is not supported: %w", err) } if err != nil { return nil, 0, fmt.Errorf("find target for %s: %w", featureName, err) } id, err := spec.TypeID(target) if err != nil { module.Close() return nil, 0, err } return module, id, nil } // findTargetInKernel attempts to find a named type in the current kernel. // // target will point at the found type after a successful call. Searches both // vmlinux and any loaded modules. // // Returns a non-nil handle if the type was found in a module, or btf.ErrNotFound // if the type wasn't found at all. func findTargetInKernel(typeName string, target *btf.Type, cache *btf.Cache) (*btf.Spec, *btf.Handle, error) { kernelSpec, err := cache.Kernel() if err != nil { return nil, nil, err } err = kernelSpec.TypeByName(typeName, target) if errors.Is(err, btf.ErrNotFound) { spec, module, err := findTargetInModule(typeName, target, cache) if err != nil { // EPERM may be returned when we do not have CAP_SYS_ADMIN. // Wrap error with btf.ErrNotFound so callers can handle it accordingly. if errors.Is(err, unix.EPERM) { return spec, nil, fmt.Errorf("find target in modules: %w (%w)", btf.ErrNotFound, err) } return nil, nil, fmt.Errorf("find target in modules: %w", err) } return spec, module, nil } if err != nil { return nil, nil, fmt.Errorf("find target in vmlinux: %w", err) } return kernelSpec, nil, err } // findTargetInModule attempts to find a named type in any loaded module. // // base must contain the kernel's types and is used to parse kmod BTF. Modules // are searched in the order they were loaded. // // Returns btf.ErrNotFound if the target can't be found in any module. func findTargetInModule(typeName string, target *btf.Type, cache *btf.Cache) (*btf.Spec, *btf.Handle, error) { it := new(btf.HandleIterator) defer it.Handle.Close() for it.Next() { info, err := it.Handle.Info() if err != nil { return nil, nil, fmt.Errorf("get info for BTF ID %d: %w", it.ID, err) } if !info.IsModule() { continue } spec, err := cache.Module(info.Name) if err != nil { return nil, nil, fmt.Errorf("parse types for module %s: %w", info.Name, err) } err = spec.TypeByName(typeName, target) if errors.Is(err, btf.ErrNotFound) { continue } if err != nil { return nil, nil, fmt.Errorf("lookup type in module %s: %w", info.Name, err) } return spec, it.Take(), nil } if err := it.Err(); err != nil { return nil, nil, fmt.Errorf("iterate modules: %w", err) } return nil, nil, btf.ErrNotFound } // find an attach target type in a program. // // Returns errUnrecognizedAttachType. func findTargetInProgram(prog *Program, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) { type match struct { p ProgramType a AttachType } var typeName string switch (match{progType, attachType}) { case match{Extension, AttachNone}, match{Tracing, AttachTraceFEntry}, match{Tracing, AttachTraceFExit}: typeName = name default: return 0, errUnrecognizedAttachType } btfHandle, err := prog.Handle() if err != nil { return 0, fmt.Errorf("load target BTF: %w", err) } defer btfHandle.Close() spec, err := btfHandle.Spec(nil) if err != nil { return 0, err } var targetFunc *btf.Func err = spec.TypeByName(typeName, &targetFunc) if err != nil { return 0, fmt.Errorf("find target %s: %w", typeName, err) } return spec.TypeID(targetFunc) } ================================================ FILE: prog_linux_test.go ================================================ package ebpf import ( "fmt" "math" "runtime" "slices" "testing" "time" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) func TestProgramTestRunInterrupt(t *testing.T) { testutils.SkipOnOldKernel(t, "5.0", "EINTR from BPF_PROG_TEST_RUN") prog := createBasicProgram(t) var ( tgid = unix.Getpid() tidChan = make(chan int, 1) exit = make(chan struct{}) errs = make(chan error, 1) timeout = time.After(5 * time.Second) ) defer close(exit) go func() { runtime.LockOSThread() defer func() { // Wait for the test to allow us to unlock the OS thread, to // ensure that we don't send SIGUSR1 to the wrong thread. <-exit runtime.UnlockOSThread() }() tidChan <- unix.Gettid() // Block this thread in the BPF syscall, so that we can // trigger EINTR by sending a signal. opts := RunOptions{ Data: internal.EmptyBPFContext, Repeat: math.MaxInt32, Reset: func() { // We don't know how long finishing the // test run would take, so flag that we've seen // an interruption and abort the goroutine. close(errs) runtime.Goexit() }, } _, _, err := prog.run(&opts) errs <- err }() tid := <-tidChan for { err := unix.Tgkill(tgid, tid, unix.SIGUSR1) if err != nil { t.Fatal("Can't send signal to goroutine thread:", err) } select { case err, ok := <-errs: if !ok { return } testutils.SkipIfNotSupported(t, err) if err == nil { t.Fatal("testRun wasn't interrupted") } t.Fatal("testRun returned an error:", err) case <-timeout: t.Fatal("Timed out trying to interrupt the goroutine") default: } } } func TestProgramVerifierLogLinux(t *testing.T) { check := func(t *testing.T, err error) { t.Helper() var ve *internal.VerifierError qt.Assert(t, qt.ErrorAs(err, &ve)) loglen := len(fmt.Sprintf("%+v", ve)) qt.Assert(t, qt.IsTrue(loglen > minVerifierLogSize), qt.Commentf("Log buffer didn't grow past minimum, got %d bytes", loglen)) } // Generate a base program of sufficient size whose verifier log does not fit // in the minimum buffer size. Stay under 4096 insn limit of older kernels. var base asm.Instructions for i := 0; i < 4093; i++ { base = append(base, asm.Mov.Reg(asm.R0, asm.R1)) } // Touch R10 (read-only frame pointer) to reliably force a verifier error. invalid := slices.Clone(base) invalid = append(invalid, asm.Mov.Reg(asm.R10, asm.R0)) invalid = append(invalid, asm.Return()) valid := slices.Clone(base) valid = append(valid, asm.Return()) // Start out with testing against the invalid program. spec := &ProgramSpec{ Type: SocketFilter, License: "MIT", Instructions: invalid, } _, err := newProgram(t, spec, nil) check(t, err) // Run tests against a valid program from here on out. spec.Instructions = valid // Explicitly request verifier log for a valid program and a start size. prog := mustNewProgram(t, spec, &ProgramOptions{ LogLevel: LogLevelInstruction, LogSizeStart: minVerifierLogSize * 2, }) qt.Assert(t, qt.IsTrue(len(prog.VerifierLog) > minVerifierLogSize)) } func TestProgramTestRunSyscall(t *testing.T) { testutils.SkipOnOldKernel(t, "5.14", "BPF_PROG_TYPE_SYSCALL") prog := mustNewProgram(t, &ProgramSpec{ Type: Syscall, Flags: sys.BPF_F_SLEEPABLE, License: "MIT", Instructions: []asm.Instruction{ // fn (ctx *u64) { *ctx++; return *ctx } asm.LoadMem(asm.R0, asm.R1, 0, asm.DWord), asm.Add.Imm(asm.R0, 1), asm.StoreMem(asm.R1, 0, asm.R0, asm.DWord), asm.Return(), }, }, nil) // only Context rc, err := prog.Run(&RunOptions{Context: uint64(42)}) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(rc, 43)) // Context and ContextOut out := uint64(0) rc, err = prog.Run(&RunOptions{Context: uint64(99), ContextOut: &out}) if err != nil { t.Fatal(err) } qt.Assert(t, qt.Equals(rc, 100)) qt.Assert(t, qt.Equals(out, 100)) } ================================================ FILE: prog_test.go ================================================ package ebpf import ( "bytes" "encoding/binary" "errors" "fmt" "math" "os" "path/filepath" "runtime" "strings" "syscall" "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/unix" ) func TestProgramRun(t *testing.T) { pat := []byte{0xDE, 0xAD, 0xBE, 0xEF} buf := internal.EmptyBPFContext // r1 : ctx_start // r1+4: ctx_end ins := asm.Instructions{ // r2 = *(r1+4) asm.LoadMem(asm.R2, asm.R1, 4, asm.Word), // r1 = *(r1+0) asm.LoadMem(asm.R1, asm.R1, 0, asm.Word), // r3 = r1 asm.Mov.Reg(asm.R3, asm.R1), // r3 += len(buf) asm.Add.Imm(asm.R3, int32(len(buf))), // if r3 > r2 goto +len(pat) asm.JGT.Reg(asm.R3, asm.R2, "out"), } for i, b := range pat { ins = append(ins, asm.StoreImm(asm.R1, int16(i), int64(b), asm.Byte)) } ins = append(ins, // return 42 asm.LoadImm(asm.R0, 42, asm.DWord).WithSymbol("out"), asm.Return(), ) if platform.IsWindows { // Windows uses an incompatible context for XDP. Pointers are // 64 bit. // See https://github.com/microsoft/ebpf-for-windows/issues/3873 // r2 = *(r1+8) ins[0] = asm.LoadMem(asm.R2, asm.R1, 8, asm.DWord) // r1 = *(r1+0) ins[1] = asm.LoadMem(asm.R1, asm.R1, 0, asm.DWord) } t.Log(ins) prog := mustNewProgram(t, &ProgramSpec{ Name: "test", Type: XDP, Instructions: ins, License: "MIT", }, nil) p2, err := prog.Clone() if err != nil { t.Fatal("Can't clone program") } defer p2.Close() prog.Close() prog = p2 out := make([]byte, len(buf)) ret := mustRun(t, prog, &RunOptions{Data: buf, DataOut: out}) qt.Assert(t, qt.Equals(ret, 42)) qt.Assert(t, qt.DeepEquals(out[:len(pat)], pat)) } func TestProgramRunWithOptions(t *testing.T) { testutils.SkipOnOldKernel(t, "5.15", "XDP ctx_in/ctx_out") buf := internal.EmptyBPFContext var prog *Program var in, out any if platform.IsWindows { type winSampleProgramContext struct { _ uint64 // data_start (currently leaks kernel pointer) _ uint64 // data_end (currently leaks kernel pointer) Uint32Data uint32 Uint16Data uint16 _ uint16 HelperData1 uint32 HelperData2 uint32 } prog = createProgram(t, WindowsSample, 0) in = &winSampleProgramContext{Uint32Data: 23, HelperData2: 42} out = &winSampleProgramContext{Uint32Data: 23, HelperData2: 42} } else { prog = createProgram(t, XDP, int64(sys.XDP_ABORTED)) in = &sys.XdpMd{Data: 0, DataEnd: uint32(len(buf))} out = &sys.XdpMd{} } opts := RunOptions{ Data: buf, Context: in, ContextOut: out, } ret, err := prog.Run(&opts) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } if ret != 0 { t.Error("Expected return value to be 0, got", ret) } qt.Assert(t, qt.DeepEquals(out, in)) } func TestProgramRunRawTracepoint(t *testing.T) { testutils.SkipOnOldKernel(t, "5.10", "RawTracepoint test run") prog := createProgram(t, RawTracepoint, 0) ret, err := prog.Run(&RunOptions{}) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } if ret != 0 { t.Error("Expected return value to be 0, got", ret) } } func TestProgramRunEmptyData(t *testing.T) { prog := createProgram(t, SocketFilter, 0) _, err := prog.Run(nil) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.ErrorIs(err, unix.EINVAL)) } func TestProgramBenchmark(t *testing.T) { if platform.IsWindows { t.Skip("BPF_PROG_TEST_RUN requires providing context on Windows") } prog := createBasicProgram(t) ret, duration, err := prog.Benchmark(internal.EmptyBPFContext, 1, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Error from Benchmark:", err) } if ret != 2 { t.Error("Expected return value 2, got", ret) } if duration == 0 { t.Error("Expected non-zero duration") } } func TestProgramClose(t *testing.T) { prog := createBasicProgram(t) if err := prog.Close(); err != nil { t.Fatal("Can't close program:", err) } } func TestProgramPin(t *testing.T) { spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(t, spec, nil) tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "program") if err := prog.Pin(path); err != nil { t.Fatal(err) } pinned := prog.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) prog.Close() prog, err := LoadPinnedProgram(path, nil) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer prog.Close() qt.Assert(t, qt.Equals(prog.Type(), spec.Type)) if haveObjName() == nil { qt.Assert(t, qt.Equals(prog.name, "test")) } else { qt.Assert(t, qt.Equals(prog.name, "program")) } if !prog.IsPinned() { t.Error("Expected IsPinned to be true") } } func TestProgramUnpin(t *testing.T) { prog := createBasicProgram(t) tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "program") if err := prog.Pin(path); err != nil { t.Fatal(err) } pinned := prog.IsPinned() qt.Assert(t, qt.IsTrue(pinned)) if err := prog.Unpin(); err != nil { t.Fatal("Failed to unpin program:", err) } if _, err := os.Stat(path); err == nil { t.Fatal("Pinned program path still exists after unpinning:", err) } } func TestProgramLoadPinnedWithFlags(t *testing.T) { // Introduced in commit 6e71b04a8224. testutils.SkipOnOldKernel(t, "4.14", "file_flags in BPF_OBJ_GET") prog := createBasicProgram(t) tmp := testutils.TempBPFFS(t) path := filepath.Join(tmp, "program") if err := prog.Pin(path); err != nil { t.Fatal(err) } prog.Close() _, err := LoadPinnedProgram(path, &LoadPinOptions{ Flags: math.MaxUint32, }) testutils.SkipIfNotSupported(t, err) if !errors.Is(err, unix.EINVAL) { t.Fatal("Invalid flags don't trigger an error:", err) } } func TestProgramVerifierOutputOnError(t *testing.T) { _, err := newProgram(t, &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.Return(), }, License: "MIT", }, nil) if err == nil { t.Fatal("Expected program to be invalid") } ve, ok := err.(*VerifierError) if !ok { t.Fatal("NewProgram does return an unwrapped VerifierError") } switch { case platform.IsLinux: if !strings.Contains(ve.Error(), "R0 !read_ok") { t.Logf("%+v", ve) t.Error("Missing verifier log in error summary") } case platform.IsWindows: if !strings.Contains(ve.Error(), "r0.type == number") { t.Logf("%+v", ve) t.Error("Missing verifier log in error summary") } default: t.Error("Unsupported platform", runtime.GOOS) } } func TestProgramKernelVersion(t *testing.T) { testutils.SkipOnOldKernel(t, "4.20", "KernelVersion") _ = mustNewProgram(t, &ProgramSpec{ Type: Kprobe, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, KernelVersion: 42, License: "MIT", }, nil) } func TestProgramVerifierLog(t *testing.T) { check := func(t *testing.T, err error) { t.Helper() var ve *internal.VerifierError qt.Assert(t, qt.ErrorAs(err, &ve)) loglen := 0 for _, line := range ve.Log { loglen += len(line) } qt.Assert(t, qt.IsTrue(loglen > 0)) } // Touch R10 (read-only frame pointer) to reliably force a verifier error. invalid := asm.Instructions{ asm.Mov.Reg(asm.R10, asm.R0), asm.Return(), } valid := asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), } // Start out with testing against the invalid program. spec := &ProgramSpec{ Type: SocketFilter, License: "MIT", Instructions: invalid, } // Don't explicitly request a verifier log for an invalid program. _, err := newProgram(t, spec, nil) check(t, err) // Disabling the verifier log should result in a VerifierError without a log. _, err = newProgram(t, spec, &ProgramOptions{ LogDisabled: true, }) var ve *internal.VerifierError qt.Assert(t, qt.ErrorAs(err, &ve)) qt.Assert(t, qt.HasLen(ve.Log, 0)) // Explicitly request a verifier log for an invalid program. _, err = newProgram(t, spec, &ProgramOptions{ LogLevel: LogLevelInstruction, }) check(t, err) // Run tests against a valid program from here on out. spec.Instructions = valid // Don't request a verifier log, expect the valid program to be created // without errors. prog := mustNewProgram(t, spec, nil) qt.Assert(t, qt.HasLen(prog.VerifierLog, 0)) // Explicitly request verifier log for a valid program. prog = mustNewProgram(t, spec, &ProgramOptions{ LogLevel: LogLevelInstruction, }) qt.Assert(t, qt.Not(qt.HasLen(prog.VerifierLog, 0))) } func TestProgramVerifierLogRetry(t *testing.T) { someError := errors.New("not a buffer error") t.Run("retry with oversized buffer, no log_true_size", func(t *testing.T) { // First load failure, without logging enabled. Retry with logging enabled. attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize)) // Second failure with logging enabled. No buffer error, don't retry. qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError))) qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize)) }) t.Run("retry with oversized buffer, with log_true_size", func(t *testing.T) { // First load failure, without logging enabled. Retry with larger buffer. attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) // Buffer was sufficiently large and log_true_size was set. Don't retry and // don't modify LogSize to LogTrueSize. attr.LogTrueSize = 123 qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError))) qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize)) }) t.Run("retry with undersized buffer, no log_true_size", func(t *testing.T) { // First load failure, without logging enabled. Retry with larger buffer. attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) // Second failure, this time the kernel signals an undersized buffer. Retry // with double the size. qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize*2)) }) t.Run("retry with undersized buffer, with log_true_size", func(t *testing.T) { // First load failure, without logging enabled. Retry with larger buffer. attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError))) // Second failure, the kernel signals undersized buffer and also sets // log_true_size. Retry with the exact size required. attr.LogTrueSize = 123 qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) qt.Assert(t, qt.Equals(attr.LogSize, 123)) }) t.Run("grow to maximum buffer size", func(t *testing.T) { // Previous loads pushed the log size to (or above) half of the maximum, // which would make it overflow on the next retry. Make sure the log size // actually hits the maximum so we can bail out. attr := &sys.ProgLoadAttr{LogLevel: LogLevelBranch, LogSize: maxVerifierLogSize / 2} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC))) qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize)) // Don't retry if the buffer is already at the maximum size, no matter // the return code. qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError))) qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC))) }) t.Run("start at maximum buffer size", func(t *testing.T) { // The user requested a log buffer exceeding the maximum size, but no log // level. Retry with the maximum size and default log level. attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, math.MaxUint32, unix.EINVAL))) qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch)) qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize)) // Log still doesn't fit maximum-size buffer. Don't retry. qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC))) }) t.Run("ensure growth terminates within max attempts", func(t *testing.T) { attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0} var terminated bool for i := 1; i <= maxVerifierAttempts; i++ { if !retryLogAttrs(attr, 0, syscall.ENOSPC) { terminated = true } } qt.Assert(t, qt.IsTrue(terminated)) }) } func TestProgramWithUnsatisfiedMap(t *testing.T) { coll, err := LoadCollectionSpec("testdata/loader-el.elf") if err != nil { t.Fatal(err) } // The program will have at least one map reference. progSpec := coll.Programs["xdp_prog"] progSpec.ByteOrder = nil _, err = newProgram(t, progSpec, nil) if !errors.Is(err, asm.ErrUnsatisfiedMapReference) { t.Fatal("Expected an error wrapping asm.ErrUnsatisfiedMapReference, got", err) } t.Log(err) } func TestProgramName(t *testing.T) { testutils.SkipIfNotSupported(t, haveObjName()) prog := mustNewProgram(t, &ProgramSpec{ Name: "test*123", Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 1, asm.DWord), asm.Return(), }, License: "MIT", }, nil) var info sys.ProgInfo if err := sys.ObjInfo(prog.fd, &info); err != nil { t.Fatal(err) } name := unix.ByteSliceToString(info.Name[:]) qt.Assert(t, qt.Equals(name, "test123")) } func TestProgramCloneNil(t *testing.T) { p, err := (*Program)(nil).Clone() if err != nil { t.Fatal(err) } if p != nil { t.Fatal("Cloning a nil Program doesn't return nil") } } func TestProgramMarshaling(t *testing.T) { const idx = uint32(0) arr := createMap(t, ProgramArray, 1) defer arr.Close() if err := arr.Put(idx, (*Program)(nil)); err == nil { t.Fatal("Put accepted a nil Program") } prog := createBasicProgram(t) if err := arr.Put(idx, prog); err != nil { t.Fatal("Can't put program:", err) } if err := arr.Lookup(idx, Program{}); err == nil { t.Fatal("Lookup accepts non-pointer Program") } var prog2 *Program defer prog2.Close() if err := arr.Lookup(idx, prog2); err == nil { t.Fatal("Get accepts *Program") } testutils.SkipOnOldKernel(t, "4.12", "lookup for ProgramArray") if err := arr.Lookup(idx, &prog2); err != nil { t.Fatal("Can't unmarshal program:", err) } defer prog2.Close() if prog2 == nil { t.Fatal("Unmarshalling set program to nil") } } func TestProgramFromFD(t *testing.T) { spec := fixupProgramSpec(basicProgramSpec) prog := mustNewProgram(t, spec, nil) // If you're thinking about copying this, don't. Use // Clone() instead. prog2, err := NewProgramFromFD(testutils.DupFD(t, prog.FD())) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } defer prog2.Close() // Name and type are supposed to be copied from program info. if haveObjName() == nil && prog2.name != "test" { t.Errorf("Expected program to have name test, got '%s'", prog2.name) } qt.Assert(t, qt.Equals(prog2.Type(), spec.Type)) } func TestHaveProgTestRun(t *testing.T) { testutils.CheckFeatureTest(t, haveProgRun) } func TestProgramGetNextID(t *testing.T) { testutils.SkipOnOldKernel(t, "4.13", "bpf_prog_get_next_id") // Ensure there is at least one program loaded _ = createBasicProgram(t) // As there can be multiple eBPF programs, we loop over all of them and // make sure, the IDs increase and the last call will return ErrNotExist last := ProgramID(0) for { next, err := ProgramGetNextID(last) if errors.Is(err, os.ErrNotExist) { if last == 0 { t.Fatal("Got ErrNotExist on the first iteration") } break } if err != nil { t.Fatal("Unexpected error:", err) } if next <= last { t.Fatalf("Expected next ID (%d) to be higher than the last ID (%d)", next, last) } last = next } } func TestNewProgramFromID(t *testing.T) { prog := createBasicProgram(t) info, err := prog.Info() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal("Could not get program info:", err) } id, ok := info.ID() if !ok { t.Skip("Program ID not supported") } prog2, err := NewProgramFromID(id) if err != nil { t.Fatalf("Can't get FD for program ID %d: %v", id, err) } prog2.Close() // As there can be multiple programs, we use max(uint32) as ProgramID to trigger an expected error. _, err = NewProgramFromID(ProgramID(math.MaxUint32)) if !errors.Is(err, os.ErrNotExist) { t.Fatal("Expected ErrNotExist, got:", err) } } func TestProgramRejectIncorrectByteOrder(t *testing.T) { spec := basicProgramSpec.Copy() spec.ByteOrder = binary.BigEndian if spec.ByteOrder == internal.NativeEndian { spec.ByteOrder = binary.LittleEndian } _, err := newProgram(t, spec, nil) if err == nil { t.Error("Incorrect ByteOrder should be rejected at load time") } } // This uses unkeyed fields on purpose to force setting a non-zero value when // a new field is added. func TestProgramSpecCopy(t *testing.T) { a := &ProgramSpec{ "test", 1, 1, 1, "attach", nil, // Can't copy Program "section", asm.Instructions{ asm.Return(), }, 1, "license", 1, binary.LittleEndian, } qt.Check(t, qt.IsNil((*ProgramSpec)(nil).Copy())) qt.Assert(t, testutils.IsDeepCopy(a.Copy(), a)) } func TestProgramSpecCompatible(t *testing.T) { arr := createMap(t, Array, 2) spec := &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, -1, asm.DWord), asm.LoadMapPtr(asm.R1, arr.FD()), asm.Mov.Imm32(asm.R0, 0), asm.Return(), }, License: "MIT", } prog := mustNewProgram(t, spec, nil) info, err := prog.Info() testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) err = spec.Compatible(info) testutils.SkipIfNotSupportedOnOS(t, err) qt.Assert(t, qt.IsNil(err)) } func TestProgramAttachToKernel(t *testing.T) { // See https://github.com/torvalds/linux/commit/290248a5b7d829871b3ea3c62578613a580a1744 testutils.SkipOnOldKernel(t, "5.5", "attach_btf_id") tests := []struct { attachTo string programType ProgramType attachType AttachType flags uint32 }{ { attachTo: "task_getpgid", programType: LSM, attachType: AttachLSMMac, }, { attachTo: "inet_dgram_connect", programType: Tracing, attachType: AttachTraceFEntry, }, { attachTo: "inet_dgram_connect", programType: Tracing, attachType: AttachTraceFExit, }, { attachTo: "bpf_modify_return_test", programType: Tracing, attachType: AttachModifyReturn, }, { attachTo: "kfree_skb", programType: Tracing, attachType: AttachTraceRawTp, }, { attachTo: "bpf_testmod_test_read", programType: Tracing, attachType: AttachTraceFEntry, }, { attachTo: "bpf_testmod_test_read", programType: Tracing, attachType: AttachTraceFExit, }, { attachTo: "bpf_testmod_test_read", programType: Tracing, attachType: AttachModifyReturn, }, { attachTo: "bpf_testmod_test_read", programType: Tracing, attachType: AttachTraceRawTp, }, } for _, test := range tests { name := fmt.Sprintf("%s:%s", test.attachType, test.attachTo) t.Run(name, func(t *testing.T) { if strings.HasPrefix(test.attachTo, "bpf_testmod_") { requireTestmod(t) } _ = mustNewProgram(t, &ProgramSpec{ AttachTo: test.attachTo, AttachType: test.attachType, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "GPL", Type: test.programType, Flags: test.flags, }, nil) }) } } func TestProgramKernelTypes(t *testing.T) { if _, err := os.Stat("/sys/kernel/btf/vmlinux"); os.IsNotExist(err) { t.Skip("/sys/kernel/btf/vmlinux not present") } btfSpec, err := btf.LoadSpec("/sys/kernel/btf/vmlinux") if err != nil { t.Fatal(err) } _, err = newProgram(t, &ProgramSpec{ Type: Tracing, AttachType: AttachTraceIter, AttachTo: "bpf_map", Instructions: asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), }, License: "MIT", }, &ProgramOptions{ KernelTypes: btfSpec, }) testutils.SkipIfNotSupported(t, err) qt.Assert(t, qt.IsNil(err)) } func TestProgramBindMap(t *testing.T) { testutils.SkipOnOldKernel(t, "5.10", "BPF_PROG_BIND_MAP") arr := createMap(t, Array, 2) prog := createBasicProgram(t) // The attached map does not contain BTF information. So // the metadata part of the program will be empty. This // test just makes sure that we can bind a map to a program. if err := prog.BindMap(arr); err != nil { t.Errorf("Failed to bind map to program: %v", err) } } func TestProgramInstructions(t *testing.T) { name := "test_prog" spec := &ProgramSpec{ Type: SocketFilter, Name: name, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, -1, asm.DWord).WithSymbol(name), asm.Return(), }, License: "MIT", } prog := mustNewProgram(t, spec, nil) pi, err := prog.Info() testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatal(err) } if platform.IsWindows { t.Skip("prog.Info() does not return a valid Tag on Windows") } ok, err := spec.Instructions.HasTag(pi.Tag, internal.NativeEndian) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsTrue(ok), qt.Commentf("ProgramSpec tag differs from xlated instructions")) } func TestProgramLoadErrors(t *testing.T) { testutils.SkipOnOldKernel(t, "4.10", "stable verifier log output") spec, err := LoadCollectionSpec(testutils.NativeFile(t, "testdata/errors-%s.elf")) qt.Assert(t, qt.IsNil(err)) var b btf.Builder raw, err := b.Marshal(nil, nil) qt.Assert(t, qt.IsNil(err)) empty, err := btf.LoadSpecFromReader(bytes.NewReader(raw)) qt.Assert(t, qt.IsNil(err)) for _, test := range []struct { name string want error }{ {"poisoned_single", errBadRelocation}, {"poisoned_double", errBadRelocation}, {"poisoned_kfunc", errUnknownKfunc}, } { progSpec := spec.Programs[test.name] qt.Assert(t, qt.IsNotNil(progSpec)) t.Run(test.name, func(t *testing.T) { t.Log(progSpec.Instructions) _, err := newProgram(t, progSpec, &ProgramOptions{ KernelTypes: empty, }) testutils.SkipIfNotSupported(t, err) var ve *VerifierError qt.Assert(t, qt.ErrorAs(err, &ve)) t.Logf("%-5v", ve) qt.Assert(t, qt.ErrorIs(err, test.want)) }) } } func TestProgramTargetsKernelModule(t *testing.T) { ps := ProgramSpec{Type: Kprobe} qt.Assert(t, qt.IsFalse(ps.targetsKernelModule())) ps.AttachTo = "bpf_testmod_test_read" qt.Assert(t, qt.IsTrue(ps.targetsKernelModule())) } func TestProgramLoadBoundToDevice(t *testing.T) { testutils.SkipOnOldKernel(t, "6.3", "device-bound XDP programs") ins := asm.Instructions{ asm.LoadImm(asm.R0, 2, asm.DWord).WithSymbol("out"), asm.Return(), } _, err := NewProgram(&ProgramSpec{ Type: XDP, Ifindex: math.MaxUint32, AttachType: AttachXDP, Instructions: ins, Flags: sys.BPF_F_XDP_DEV_BOUND_ONLY, License: "MIT", }) testutils.SkipIfNotSupportedOnOS(t, err) // Binding to loopback leads to crashes, yet is only explicitly disallowed // since 3595599fa836 ("net: xdp: Disallow attaching device-bound programs in // generic mode"). This only landed in 6.14 and returns EOPNOTSUPP. // // However, since attaching to loopback quietly succeeds on older kernels, use // a non-existent ifindex to trigger EINVAL on all kernels. Without specifying // ifindex, loading the program succeeds if the kernel knows the // DEV_BOUND_ONLY flag. qt.Assert(t, qt.ErrorIs(err, unix.EINVAL)) } func BenchmarkNewProgram(b *testing.B) { testutils.SkipOnOldKernel(b, "5.18", "kfunc support") spec, err := LoadCollectionSpec(testutils.NativeFile(b, "testdata/kfunc-%s.elf")) qt.Assert(b, qt.IsNil(err)) b.ReportAllocs() for b.Loop() { _, err := NewProgram(spec.Programs["benchmark"]) if !errors.Is(err, unix.EACCES) { b.Fatal("Unexpected error:", err) } } } // Print the full verifier log when loading a program fails. func ExampleVerifierError_retrieveFullLog() { _, err := NewProgram(&ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), // Missing Return }, License: "MIT", }) var ve *VerifierError if errors.As(err, &ve) { // Using %+v will print the whole verifier error, not just the last // few lines. fmt.Printf("Verifier error: %+v\n", ve) } } // VerifierLog understands a variety of formatting flags. func ExampleVerifierError() { err := internal.ErrorWithLog( "catastrophe", syscall.ENOSPC, []byte("first\nsecond\nthird"), ) fmt.Printf("With %%s: %s\n", err) fmt.Printf("All log lines: %+v\n", err) fmt.Printf("First line: %+1v\n", err) fmt.Printf("Last two lines: %-2v\n", err) // Output: With %s: catastrophe: no space left on device: third (2 line(s) omitted) // All log lines: catastrophe: no space left on device: // first // second // third // First line: catastrophe: no space left on device: // first // (2 line(s) omitted) // Last two lines: catastrophe: no space left on device: // (1 line(s) omitted) // second // third } // Use NewProgramWithOptions if you'd like to get the verifier output // for a program, or if you want to change the buffer size used when // generating error messages. func ExampleProgram_retrieveVerifierLog() { spec := &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", } prog, err := NewProgramWithOptions(spec, ProgramOptions{ LogLevel: LogLevelInstruction, }) if err != nil { panic(err) } defer prog.Close() fmt.Println("The verifier output is:") fmt.Println(prog.VerifierLog) } // It's possible to read a program directly from a ProgramArray. func ExampleProgram_unmarshalFromMap() { progArray, err := LoadPinnedMap("/path/to/map", nil) if err != nil { panic(err) } defer progArray.Close() // Load a single program var prog *Program if err := progArray.Lookup(uint32(0), &prog); err != nil { panic(err) } defer prog.Close() fmt.Println("first prog:", prog) // Iterate all programs var ( key uint32 entries = progArray.Iterate() ) for entries.Next(&key, &prog) { fmt.Println(key, "is", prog) } if err := entries.Err(); err != nil { panic(err) } } func ExampleProgramSpec_Compatible() { spec := &ProgramSpec{ Type: SocketFilter, Instructions: asm.Instructions{ asm.LoadImm(asm.R0, 0, asm.DWord), asm.Return(), }, License: "MIT", } prog, _ := NewProgram(spec) info, _ := prog.Info() if err := spec.Compatible(info); err != nil { fmt.Printf("The programs are incompatible: %s\n", err) } else { fmt.Println("The programs are compatible") } } ================================================ FILE: ringbuf/doc.go ================================================ // Package ringbuf allows interacting with the BPF ring buffer. // // BPF allows submitting custom events to a BPF ring buffer map set up // by userspace. This is very useful to push things like packet samples // from BPF to a daemon running in user space. package ringbuf ================================================ FILE: ringbuf/helper_other_test.go ================================================ //go:build !windows package ringbuf import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" ) func mustOutputSamplesProg(tb testing.TB, sampleMessages ...sampleMessage) (*ebpf.Program, *ebpf.Map) { tb.Helper() events, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.RingBuf, MaxEntries: 4096, }) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { events.Close() }) var maxSampleSize int for _, sampleMessage := range sampleMessages { if sampleMessage.size > maxSampleSize { maxSampleSize = sampleMessage.size } } insns := asm.Instructions{ asm.LoadImm(asm.R0, 0x0102030404030201, asm.DWord), asm.Mov.Reg(asm.R9, asm.R1), } bufDwords := (maxSampleSize / 8) + 1 for i := range bufDwords { insns = append(insns, asm.StoreMem(asm.RFP, int16(i+1)*-8, asm.R0, asm.DWord), ) } for _, sampleMessage := range sampleMessages { insns = append(insns, asm.LoadMapPtr(asm.R1, events.FD()), asm.Mov.Imm(asm.R2, int32(sampleMessage.size)), asm.Mov.Imm(asm.R3, int32(0)), asm.FnRingbufReserve.Call(), asm.JEq.Imm(asm.R0, 0, "exit"), asm.Mov.Reg(asm.R5, asm.R0), ) for i := range sampleMessage.size { insns = append(insns, asm.LoadMem(asm.R4, asm.RFP, int16(i+1)*-1, asm.Byte), asm.StoreMem(asm.R5, int16(i), asm.R4, asm.Byte), ) } if sampleMessage.discard { insns = append(insns, asm.Mov.Reg(asm.R1, asm.R5), asm.Mov.Imm(asm.R2, sampleMessage.flags), asm.FnRingbufDiscard.Call(), ) } else { insns = append(insns, asm.Mov.Reg(asm.R1, asm.R5), asm.Mov.Imm(asm.R2, sampleMessage.flags), asm.FnRingbufSubmit.Call(), ) } } insns = append(insns, asm.Mov.Imm(asm.R0, int32(0)).WithSymbol("exit"), asm.Return(), ) prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ License: "MIT", Type: ebpf.XDP, Instructions: insns, }) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { prog.Close() }) return prog, events } ================================================ FILE: ringbuf/helper_test.go ================================================ package ringbuf import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/testutils" ) func mustRun(tb testing.TB, prog *ebpf.Program) { tb.Helper() opts := &ebpf.RunOptions{ Data: internal.EmptyBPFContext, } if platform.IsWindows { opts.Context = make([]byte, 32) } ret, err := prog.Run(opts) testutils.SkipIfNotSupported(tb, err) qt.Assert(tb, qt.IsNil(err)) qt.Assert(tb, qt.Equals(ret, uint32(0))) } ================================================ FILE: ringbuf/helper_windows_test.go ================================================ package ringbuf import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" ) func mustOutputSamplesProg(tb testing.TB, sampleMessages ...sampleMessage) (*ebpf.Program, *ebpf.Map) { tb.Helper() events, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.WindowsRingBuf, MaxEntries: 4096, }) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { events.Close() }) var maxSampleSize int for _, sampleMessage := range sampleMessages { if sampleMessage.size > maxSampleSize { maxSampleSize = sampleMessage.size } } insns := asm.Instructions{ asm.LoadImm(asm.R0, 0x0102030404030201, asm.DWord), asm.Mov.Reg(asm.R9, asm.R1), } bufDwords := (maxSampleSize / 8) + 1 for i := range bufDwords { insns = append(insns, asm.StoreMem(asm.RFP, int16(i+1)*-8, asm.R0, asm.DWord), ) } for _, sampleMessage := range sampleMessages { if sampleMessage.discard { tb.Skip("discard is not supported on Windows") } insns = append(insns, asm.LoadMapPtr(asm.R1, events.FD()), asm.Mov.Reg(asm.R2, asm.RFP), asm.Add.Imm(asm.R2, -int32(8*bufDwords)), asm.Mov.Imm(asm.R3, int32(sampleMessage.size)), asm.Mov.Imm(asm.R4, sampleMessage.flags), asm.WindowsFnRingbufOutput.Call(), asm.JNE.Imm(asm.R0, 0, "exit"), ) } insns = append(insns, asm.Mov.Imm(asm.R0, int32(0)), asm.Return().WithSymbol("exit"), ) prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ License: "MIT", Type: ebpf.WindowsSample, Instructions: insns, }) qt.Assert(tb, qt.IsNil(err)) tb.Cleanup(func() { prog.Close() }) return prog, events } ================================================ FILE: ringbuf/reader.go ================================================ package ringbuf import ( "errors" "fmt" "os" "sync" "time" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) var ( ErrClosed = os.ErrClosed errEOR = errors.New("end of ring") errBusy = errors.New("sample not committed yet") ) // poller abstracts platform-specific event notification. type poller interface { Wait(deadline time.Time) error Flush() error Close() error } // eventRing abstracts platform-specific ring buffer memory access. type eventRing interface { size() int AvailableBytes() uint64 readRecord(rec *Record) error Close() error } // ringbufHeader from 'struct bpf_ringbuf_hdr' in kernel/bpf/ringbuf.c type ringbufHeader struct { Len uint32 _ uint32 // pg_off, only used by kernel internals } const ringbufHeaderSize = int(unsafe.Sizeof(ringbufHeader{})) func (rh *ringbufHeader) isBusy() bool { return rh.Len&sys.BPF_RINGBUF_BUSY_BIT != 0 } func (rh *ringbufHeader) isDiscard() bool { return rh.Len&sys.BPF_RINGBUF_DISCARD_BIT != 0 } func (rh *ringbufHeader) dataLen() int { return int(rh.Len & ^uint32(sys.BPF_RINGBUF_BUSY_BIT|sys.BPF_RINGBUF_DISCARD_BIT)) } type Record struct { RawSample []byte // The minimum number of bytes remaining in the ring buffer after this Record has been read. Remaining int } // Reader allows reading bpf_ringbuf_output // from user space. type Reader struct { poller poller // mu protects read/write access to the Reader structure mu sync.Mutex ring eventRing haveData bool deadline time.Time bufferSize int pendingErr error } // NewReader creates a new BPF ringbuf reader. func NewReader(ringbufMap *ebpf.Map) (*Reader, error) { if ringbufMap.Type() != ebpf.RingBuf && ringbufMap.Type() != ebpf.WindowsRingBuf { return nil, fmt.Errorf("invalid Map type: %s", ringbufMap.Type()) } maxEntries := int(ringbufMap.MaxEntries()) if maxEntries == 0 || (maxEntries&(maxEntries-1)) != 0 { return nil, fmt.Errorf("ringbuffer map size %d is zero or not a power of two", maxEntries) } poller, err := newPoller(ringbufMap.FD()) if err != nil { return nil, err } ring, err := newRingBufEventRing(ringbufMap.FD(), maxEntries) if err != nil { poller.Close() return nil, fmt.Errorf("failed to create ringbuf ring: %w", err) } return &Reader{ poller: poller, ring: ring, bufferSize: ring.size(), // On Windows, the wait handle is only set when the reader is created, // so we miss any wakeups that happened before. // Do an opportunistic read to get any pending samples. haveData: platform.IsWindows, }, nil } // Close frees resources used by the reader. // // It interrupts calls to Read. func (r *Reader) Close() error { if err := r.poller.Close(); err != nil { if errors.Is(err, os.ErrClosed) { return nil } return err } // Acquire the lock. This ensures that Read isn't running. r.mu.Lock() defer r.mu.Unlock() var err error if r.ring != nil { err = r.ring.Close() r.ring = nil } return err } // SetDeadline controls how long Read and ReadInto will block waiting for samples. // // Passing a zero time.Time will remove the deadline. func (r *Reader) SetDeadline(t time.Time) { r.mu.Lock() defer r.mu.Unlock() r.deadline = t } // Read the next record from the BPF ringbuf. // // Calling [Close] interrupts the method with [os.ErrClosed]. Calling [Flush] // makes it return all records currently in the ring buffer, followed by [ErrFlushed]. // // Returns [os.ErrDeadlineExceeded] if a deadline was set and after all records // have been read from the ring. // // See [ReadInto] for a more efficient version of this method. func (r *Reader) Read() (Record, error) { var rec Record err := r.ReadInto(&rec) return rec, err } // ReadInto is like Read except that it allows reusing Record and associated buffers. func (r *Reader) ReadInto(rec *Record) error { r.mu.Lock() defer r.mu.Unlock() if r.ring == nil { return fmt.Errorf("ringbuffer: %w", ErrClosed) } for { if !r.haveData { if pe := r.pendingErr; pe != nil { r.pendingErr = nil return pe } err := r.poller.Wait(r.deadline) if errors.Is(err, os.ErrDeadlineExceeded) || errors.Is(err, ErrFlushed) { // Ignoring this for reading a valid entry after timeout or flush. // This can occur if the producer submitted to the ring buffer // with BPF_RB_NO_WAKEUP. r.pendingErr = err } else if err != nil { return err } r.haveData = true } for { err := r.ring.readRecord(rec) // Not using errors.Is which is quite a bit slower // For a tight loop it might make a difference if err == errBusy { continue } if err == errEOR { r.haveData = false break } return err } } } // BufferSize returns the size in bytes of the ring buffer func (r *Reader) BufferSize() int { return r.bufferSize } // Flush unblocks Read/ReadInto and successive Read/ReadInto calls will return pending samples at this point, // until you receive a ErrFlushed error. func (r *Reader) Flush() error { return r.poller.Flush() } // AvailableBytes returns the amount of data available to read in the ring buffer in bytes. func (r *Reader) AvailableBytes() int { // Don't need to acquire the lock here since the implementation of AvailableBytes // performs atomic loads on the producer and consumer positions. return int(r.ring.AvailableBytes()) } ================================================ FILE: ringbuf/reader_other.go ================================================ //go:build !windows package ringbuf import ( "time" "github.com/cilium/ebpf/internal/epoll" "github.com/cilium/ebpf/internal/unix" ) var ErrFlushed = epoll.ErrFlushed var _ poller = (*epollPoller)(nil) type epollPoller struct { *epoll.Poller events []unix.EpollEvent } func newPoller(fd int) (*epollPoller, error) { ep, err := epoll.New() if err != nil { return nil, err } if err := ep.Add(fd, 0); err != nil { ep.Close() return nil, err } return &epollPoller{ Poller: ep, events: make([]unix.EpollEvent, 1), }, nil } // Wait blocks until data is available or the deadline is reached. // Returns [os.ErrDeadlineExceeded] if a deadline was set and no wakeup was received. // Returns [ErrFlushed] if the ring buffer was flushed manually. func (p *epollPoller) Wait(deadline time.Time) error { _, err := p.Poller.Wait(p.events, deadline) return err } ================================================ FILE: ringbuf/reader_test.go ================================================ package ringbuf import ( "errors" "os" "testing" "time" "github.com/go-quicktest/qt" "github.com/google/go-cmp/cmp" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/testmain" ) type sampleMessage struct { size int flags int32 discard bool } func TestMain(m *testing.M) { testmain.Run(m) } func TestRingbufReader(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") readerTests := []struct { name string messages []sampleMessage want map[int][]byte }{ { name: "send one short sample", messages: []sampleMessage{{size: 5}}, want: map[int][]byte{ 5: {1, 2, 3, 4, 4}, }, }, { name: "send three short samples, the second is discarded", messages: []sampleMessage{{size: 5}, {size: 10, discard: true}, {size: 15}}, want: map[int][]byte{ 5: {1, 2, 3, 4, 4}, 15: {1, 2, 3, 4, 4, 3, 2, 1, 1, 2, 3, 4, 4, 3, 2}, }, }, { name: "send five samples, every even is discarded", messages: []sampleMessage{{size: 5}, {size: 10, discard: true}, {size: 15}, {size: 20, discard: true}, {size: 25}}, want: map[int][]byte{ 5: {1, 2, 3, 4, 4}, 15: {1, 2, 3, 4, 4, 3, 2, 1, 1, 2, 3, 4, 4, 3, 2}, 25: {1, 2, 3, 4, 4, 3, 2, 1, 1, 2, 3, 4, 4, 3, 2, 1, 1, 2, 3, 4, 4, 3, 2, 1, 1}, }, }, } for _, tt := range readerTests { t.Run(tt.name, func(t *testing.T) { prog, events := mustOutputSamplesProg(t, tt.messages...) rd, err := NewReader(events) if err != nil { t.Fatal(err) } defer rd.Close() qt.Assert(t, qt.Equals(rd.AvailableBytes(), 0)) if uint32(rd.BufferSize()) != events.MaxEntries() { t.Errorf("expected %d BufferSize, got %d", events.MaxEntries(), rd.BufferSize()) } opts := &ebpf.RunOptions{ Data: internal.EmptyBPFContext, } if platform.IsWindows { opts.Context = make([]byte, 32) } mustRun(t, prog) var avail int for _, m := range tt.messages { avail += ringbufHeaderSize + internal.Align(m.size, 8) } qt.Assert(t, qt.Equals(rd.AvailableBytes(), avail)) raw := make(map[int][]byte) for len(raw) < len(tt.want) { record, err := rd.Read() if err != nil { t.Fatal("Can't read samples:", err) } raw[len(record.RawSample)] = record.RawSample if len(raw) == len(tt.want) { if record.Remaining != 0 { t.Errorf("expected 0 Remaining, got %d", record.Remaining) } } else { if record.Remaining == 0 { t.Error("expected non-zero Remaining, got 0") } } } if diff := cmp.Diff(tt.want, raw); diff != "" { t.Errorf("Read samples mismatch (-want +got):\n%s", diff) } }) } } func TestReaderBlocking(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") prog, events := mustOutputSamplesProg(t, sampleMessage{size: 5, flags: 0}) mustRun(t, prog) rd, err := NewReader(events) if err != nil { t.Fatal(err) } defer rd.Close() if _, err := rd.Read(); err != nil { t.Fatal("Can't read first sample:", err) } errs := make(chan error, 1) go func() { _, err := rd.Read() errs <- err }() select { case err := <-errs: t.Fatal("Read returns error instead of blocking:", err) case <-time.After(100 * time.Millisecond): } // Close should interrupt blocking Read if err := rd.Close(); err != nil { t.Fatal(err) } select { case err := <-errs: if !errors.Is(err, ErrClosed) { t.Fatal("Expected os.ErrClosed from interrupted Read, got:", err) } case <-time.After(time.Second): t.Fatal("Close doesn't interrupt Read") } // And we should be able to call it multiple times if err := rd.Close(); err != nil { t.Fatal(err) } if _, err := rd.Read(); !errors.Is(err, ErrClosed) { t.Fatal("Second Read on a closed RingbufReader doesn't return ErrClosed") } } func TestReaderNoWakeup(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") prog, events := mustOutputSamplesProg(t, sampleMessage{size: 5, flags: sys.BPF_RB_NO_WAKEUP}, // Read after timeout sampleMessage{size: 7, flags: sys.BPF_RB_NO_WAKEUP}, // Read won't block ) rd, err := NewReader(events) if err != nil { t.Fatal(err) } defer rd.Close() qt.Assert(t, qt.Equals(rd.AvailableBytes(), 0)) mustRun(t, prog) qt.Assert(t, qt.Equals(rd.AvailableBytes(), 2*16)) rd.SetDeadline(time.Now()) record, err := rd.Read() if err != nil { t.Error("Expected no error from first Read, got:", err) } if len(record.RawSample) != 5 { t.Errorf("Expected to read 5 bytes but got %d", len(record.RawSample)) } qt.Assert(t, qt.Equals(rd.AvailableBytes(), 1*16)) record, err = rd.Read() if err != nil { t.Error("Expected no error from second Read, got:", err) } if len(record.RawSample) != 7 { t.Errorf("Expected to read 7 bytes but got %d", len(record.RawSample)) } qt.Assert(t, qt.Equals(rd.AvailableBytes(), 0)) _, err = rd.Read() if !errors.Is(err, os.ErrDeadlineExceeded) { t.Errorf("Expected os.ErrDeadlineExceeded from third Read but got %v", err) } } func TestReaderFlushPendingEvents(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") prog, events := mustOutputSamplesProg(t, sampleMessage{size: 5, flags: sys.BPF_RB_NO_WAKEUP}, // Read after Flush sampleMessage{size: 7, flags: sys.BPF_RB_NO_WAKEUP}, // Read won't block ) rd, err := NewReader(events) if err != nil { t.Fatal(err) } defer rd.Close() mustRun(t, prog) wait := make(chan *Record) go func() { wait <- nil record, err := rd.Read() qt.Assert(t, qt.IsNil(err)) wait <- &record }() <-wait time.Sleep(10 * time.Millisecond) err = rd.Flush() qt.Assert(t, qt.IsNil(err)) waitRec := <-wait if waitRec == nil { t.Error("Expected to read record but got nil") } if waitRec != nil && len(waitRec.RawSample) != 5 { t.Errorf("Expected to read 5 bytes but got %d", len(waitRec.RawSample)) } record, err := rd.Read() if err != nil { t.Error("Expected no error from second Read, got:", err) } if len(record.RawSample) != 7 { t.Errorf("Expected to read 7 bytes but got %d", len(record.RawSample)) } _, err = rd.Read() if !errors.Is(err, ErrFlushed) { t.Errorf("Expected ErrFlushed from third Read but got %v", err) } } func TestReaderSetDeadline(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") _, events := mustOutputSamplesProg(t, sampleMessage{size: 5, flags: 0}) rd, err := NewReader(events) if err != nil { t.Fatal(err) } defer rd.Close() rd.SetDeadline(time.Now().Add(-time.Second)) if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from first Read, got:", err) } if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from second Read, got:", err) } } func TestReadAfterClose(t *testing.T) { testutils.SkipOnOldKernel(t, "5.8", "BPF ring buffer") prog, events := mustOutputSamplesProg(t, sampleMessage{size: 5, flags: 0}, sampleMessage{size: 5, flags: 0}, ) mustRun(t, prog) rd, err := NewReader(events) if err != nil { t.Fatal(err) } _, err = rd.Read() if err != nil { t.Error("Expected no error after first Read, got:", err) } err = rd.Close() if err != nil { t.Error("Expected no error from Close, got: ", err) } _, err = rd.Read() if err == nil || !errors.Is(err, ErrClosed) { t.Error("Expected ErrClosed but got: ", err) } } func BenchmarkReader(b *testing.B) { testutils.SkipOnOldKernel(b, "5.8", "BPF ring buffer") readerBenchmarks := []struct { name string flags int32 }{ { name: "normal epoll with timeout -1", }, } for _, bm := range readerBenchmarks { b.Run(bm.name, func(b *testing.B) { prog, events := mustOutputSamplesProg(b, sampleMessage{size: 80, flags: bm.flags}) rd, err := NewReader(events) if err != nil { b.Fatal(err) } defer rd.Close() b.ReportAllocs() for b.Loop() { b.StopTimer() mustRun(b, prog) b.StartTimer() _, err = rd.Read() if err != nil { b.Fatal("Can't read samples:", err) } } }) } } func BenchmarkReadInto(b *testing.B) { testutils.SkipOnOldKernel(b, "5.8", "BPF ring buffer") prog, events := mustOutputSamplesProg(b, sampleMessage{size: 80, flags: 0}) rd, err := NewReader(events) if err != nil { b.Fatal(err) } defer rd.Close() b.ReportAllocs() var rec Record for b.Loop() { b.StopTimer() mustRun(b, prog) b.StartTimer() if err := rd.ReadInto(&rec); err != nil { b.Fatal("Can't read samples:", err) } } } ================================================ FILE: ringbuf/reader_windows.go ================================================ package ringbuf import ( "errors" "fmt" "os" "sync/atomic" "time" "golang.org/x/sys/windows" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/efw" ) var ErrFlushed = errors.New("ring buffer flushed") var _ poller = (*windowsPoller)(nil) type windowsPoller struct { closed atomic.Bool handle windows.Handle flushHandle windows.Handle handles []windows.Handle } func newPoller(fd int) (*windowsPoller, error) { handle, err := windows.CreateEvent(nil, 0, 0, nil) if err != nil { return nil, err } flushHandle, err := windows.CreateEvent(nil, 0, 0, nil) if err != nil { windows.CloseHandle(handle) return nil, err } if err := efw.EbpfMapSetWaitHandle(fd, 0, handle); err != nil { windows.CloseHandle(handle) windows.CloseHandle(flushHandle) return nil, err } return &windowsPoller{ handle: handle, flushHandle: flushHandle, handles: []windows.Handle{handle, flushHandle}, }, nil } // Wait blocks until data is available or the deadline is reached. // Returns [os.ErrDeadlineExceeded] if a deadline was set and no wakeup was received. // Returns [ErrFlushed] if the ring buffer was flushed manually. // Returns [os.ErrClosed] if the poller was closed. func (p *windowsPoller) Wait(deadline time.Time) error { if p.closed.Load() { return os.ErrClosed } timeout := uint32(windows.INFINITE) if !deadline.IsZero() { timeout = uint32(internal.Between(time.Until(deadline).Milliseconds(), 0, windows.INFINITE-1)) } // Wait for either the ring buffer handle or the flush handle to be signaled result, err := windows.WaitForMultipleObjects(p.handles, false, timeout) switch result { case windows.WAIT_OBJECT_0: // Ring buffer event return nil case windows.WAIT_OBJECT_0 + 1: if p.closed.Load() { return os.ErrClosed } // Flush event return ErrFlushed case uint32(windows.WAIT_TIMEOUT): return os.ErrDeadlineExceeded case windows.WAIT_FAILED: return err default: return fmt.Errorf("unexpected wait result 0x%x: %w", result, err) } } // Flush interrupts [Wait] with [ErrFlushed]. func (p *windowsPoller) Flush() error { // Signal the handle to wake up any waiting threads if err := windows.SetEvent(p.flushHandle); err != nil { if errors.Is(err, windows.ERROR_INVALID_HANDLE) { return os.ErrClosed } return err } return nil } func (p *windowsPoller) Close() error { p.closed.Store(true) if err := p.Flush(); err != nil { return err } return errors.Join(windows.CloseHandle(p.handle), windows.CloseHandle(p.flushHandle)) } ================================================ FILE: ringbuf/ring.go ================================================ package ringbuf import ( "fmt" "io" "sync/atomic" "unsafe" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" ) type ringReader struct { // These point into mmap'ed memory and must be accessed atomically. prod_pos, cons_pos *uintptr mask uintptr ring []byte } func newRingReader(cons_ptr, prod_ptr *uintptr, ring []byte) *ringReader { return &ringReader{ prod_pos: prod_ptr, cons_pos: cons_ptr, // cap is always a power of two mask: uintptr(cap(ring)/2 - 1), ring: ring, } } // To be able to wrap around data, data pages in ring buffers are mapped twice in // a single contiguous virtual region. // Therefore the returned usable size is half the size of the mmaped region. func (rr *ringReader) size() int { return cap(rr.ring) / 2 } // The amount of data available to read in the ring buffer. func (rr *ringReader) AvailableBytes() uint64 { prod := atomic.LoadUintptr(rr.prod_pos) cons := atomic.LoadUintptr(rr.cons_pos) return uint64(prod - cons) } // Read a record from an event ring. func (rr *ringReader) readRecord(rec *Record) error { prod := atomic.LoadUintptr(rr.prod_pos) cons := atomic.LoadUintptr(rr.cons_pos) for { if remaining := prod - cons; remaining == 0 { return errEOR } else if remaining < sys.BPF_RINGBUF_HDR_SZ { return fmt.Errorf("read record header: %w", io.ErrUnexpectedEOF) } // read the len field of the header atomically to ensure a happens before // relationship with the xchg in the kernel. Without this we may see len // without BPF_RINGBUF_BUSY_BIT before the written data is visible. // See https://github.com/torvalds/linux/blob/v6.8/kernel/bpf/ringbuf.c#L484 start := cons & rr.mask len := atomic.LoadUint32((*uint32)((unsafe.Pointer)(&rr.ring[start]))) header := ringbufHeader{Len: len} if header.isBusy() { // the next sample in the ring is not committed yet so we // exit without storing the reader/consumer position // and start again from the same position. return errBusy } cons += sys.BPF_RINGBUF_HDR_SZ // Data is always padded to 8 byte alignment. dataLenAligned := uintptr(internal.Align(header.dataLen(), 8)) if remaining := prod - cons; remaining < dataLenAligned { return fmt.Errorf("read sample data: %w", io.ErrUnexpectedEOF) } start = cons & rr.mask cons += dataLenAligned if header.isDiscard() { // when the record header indicates that the data should be // discarded, we skip it by just updating the consumer position // to the next record. atomic.StoreUintptr(rr.cons_pos, cons) continue } if n := header.dataLen(); cap(rec.RawSample) < n { rec.RawSample = make([]byte, n) } else { rec.RawSample = rec.RawSample[:n] } copy(rec.RawSample, rr.ring[start:]) rec.Remaining = int(prod - cons) atomic.StoreUintptr(rr.cons_pos, cons) return nil } } ================================================ FILE: ringbuf/ring_other.go ================================================ //go:build !windows package ringbuf import ( "errors" "fmt" "os" "runtime" "unsafe" "github.com/cilium/ebpf/internal/unix" ) var _ eventRing = (*mmapEventRing)(nil) type mmapEventRing struct { prod []byte cons []byte *ringReader cleanup runtime.Cleanup } func newRingBufEventRing(mapFD, size int) (*mmapEventRing, error) { cons, err := unix.Mmap(mapFD, 0, os.Getpagesize(), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) if err != nil { return nil, fmt.Errorf("mmap consumer page: %w", err) } prod, err := unix.Mmap(mapFD, (int64)(os.Getpagesize()), os.Getpagesize()+2*size, unix.PROT_READ, unix.MAP_SHARED) if err != nil { _ = unix.Munmap(cons) return nil, fmt.Errorf("mmap data pages: %w", err) } cons_pos := (*uintptr)(unsafe.Pointer(&cons[0])) prod_pos := (*uintptr)(unsafe.Pointer(&prod[0])) ring := &mmapEventRing{ prod: prod, cons: cons, ringReader: newRingReader(cons_pos, prod_pos, prod[os.Getpagesize():]), } ring.cleanup = runtime.AddCleanup(ring, func(*byte) { _ = unix.Munmap(prod) _ = unix.Munmap(cons) }, nil) return ring, nil } func (ring *mmapEventRing) Close() error { ring.cleanup.Stop() prod, cons := ring.prod, ring.cons ring.prod, ring.cons = nil, nil return errors.Join( unix.Munmap(prod), unix.Munmap(cons), ) } ================================================ FILE: ringbuf/ring_windows.go ================================================ package ringbuf import ( "errors" "fmt" "runtime" "unsafe" "github.com/cilium/ebpf/internal/efw" "github.com/cilium/ebpf/internal/sys" ) var _ eventRing = (*windowsEventRing)(nil) type windowsEventRing struct { mapFd *sys.FD cons, prod, data *uint8 *ringReader cleanup runtime.Cleanup } func newRingBufEventRing(mapFD, size int) (*windowsEventRing, error) { dupFd, err := efw.EbpfDuplicateFd(mapFD) if err != nil { return nil, fmt.Errorf("duplicate map fd: %w", err) } fd, err := sys.NewFD(dupFd) if err != nil { _ = efw.EbpfCloseFd(dupFd) return nil, err } consPtr, prodPtr, dataPtr, dataLen, err := efw.EbpfRingBufferMapMapBuffer(dupFd) if err != nil { _ = fd.Close() return nil, fmt.Errorf("map consumer page: %w", err) } if dataLen != efw.Size(size) { _ = fd.Close() return nil, fmt.Errorf("map data length mismatch: %d != %d", dataLen, size) } // consPtr and prodPtr are guaranteed to be page size aligned. consPos := (*uintptr)(unsafe.Pointer(consPtr)) prodPos := (*uintptr)(unsafe.Pointer(prodPtr)) data := unsafe.Slice(dataPtr, dataLen*2) ring := &windowsEventRing{ mapFd: fd, cons: consPtr, prod: prodPtr, data: dataPtr, ringReader: newRingReader(consPos, prodPos, data), } ring.cleanup = runtime.AddCleanup(ring, func(*byte) { efw.EbpfRingBufferMapUnmapBuffer(fd.Int(), consPtr, prodPtr, dataPtr) }, nil) return ring, nil } func (ring *windowsEventRing) Close() error { ring.cleanup.Stop() return errors.Join( efw.EbpfRingBufferMapUnmapBuffer(ring.mapFd.Int(), ring.cons, ring.prod, ring.data), ring.mapFd.Close(), ) } ================================================ FILE: rlimit/doc.go ================================================ // Package rlimit allows raising RLIMIT_MEMLOCK if necessary for the use of BPF. package rlimit ================================================ FILE: rlimit/rlimit_linux.go ================================================ package rlimit import ( "errors" "fmt" "sync" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) var ( unsupportedMemcgAccounting = &internal.UnsupportedFeatureError{ MinimumVersion: internal.Version{5, 11, 0}, Name: "memcg-based accounting for BPF memory", } haveMemcgAccounting error rlimitMu sync.Mutex ) func init() { // We have to run this feature test at init, since it relies on changing // RLIMIT_MEMLOCK. Doing so is not safe in a concurrent program. Instead, // we rely on the initialization order guaranteed by the Go runtime to // execute the test in a safe environment: // // the invocation of init functions happens in a single goroutine, // sequentially, one package at a time. // // This is also the reason why RemoveMemlock is in its own package: // we only want to run the initializer if RemoveMemlock is called // from somewhere. haveMemcgAccounting = detectMemcgAccounting() } func detectMemcgAccounting() error { // Retrieve the original limit to prevent lowering Max, since // doing so is a permanent operation when running unprivileged. var oldLimit unix.Rlimit if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &oldLimit); err != nil { return fmt.Errorf("getting original memlock rlimit: %s", err) } // Drop the current limit to zero, maintaining the old Max value. // This is always permitted by the kernel for unprivileged users. // Retrieve a new copy of the old limit tuple to minimize the chances // of failing the restore operation below. zeroLimit := unix.Rlimit{Cur: 0, Max: oldLimit.Max} if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &zeroLimit, &oldLimit); err != nil { return fmt.Errorf("lowering memlock rlimit: %s", err) } attr := sys.MapCreateAttr{ MapType: 2, /* Array */ KeySize: 4, ValueSize: 4, MaxEntries: 1, } // Creating a map allocates shared (and locked) memory that counts against // the rlimit on pre-5.11 kernels, but against the memory cgroup budget on // kernels 5.11 and over. If this call succeeds with the process' memlock // rlimit set to 0, we can reasonably assume memcg accounting is supported. fd, mapErr := sys.MapCreate(&attr) // Restore old limits regardless of what happened. if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &oldLimit, nil); err != nil { return fmt.Errorf("restoring old memlock rlimit: %s", err) } // Map creation successful, memcg accounting supported. if mapErr == nil { fd.Close() return nil } // EPERM shows up when map creation would exceed the memory budget. if errors.Is(mapErr, unix.EPERM) { return unsupportedMemcgAccounting } // This shouldn't happen really. return fmt.Errorf("unexpected error detecting memory cgroup accounting: %s", mapErr) } // RemoveMemlock removes the limit on the amount of memory the current // process can lock into RAM, if necessary. // // This is not required to load eBPF resources on kernel versions 5.11+ // due to the introduction of cgroup-based memory accounting. On such kernels // the function is a no-op. // // Since the function may change global per-process limits it should be invoked // at program start up, in main() or init(). // // This function exists as a convenience and should only be used when // permanently raising RLIMIT_MEMLOCK to infinite is appropriate. Consider // invoking prlimit(2) directly with a more reasonable limit if desired. // // Requires CAP_SYS_RESOURCE on kernels < 5.11. func RemoveMemlock() error { if haveMemcgAccounting == nil { return nil } if !errors.Is(haveMemcgAccounting, unsupportedMemcgAccounting) { return haveMemcgAccounting } rlimitMu.Lock() defer rlimitMu.Unlock() // pid 0 affects the current process. Requires CAP_SYS_RESOURCE. newLimit := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY} if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &newLimit, nil); err != nil { return fmt.Errorf("failed to set memlock rlimit: %w", err) } return nil } ================================================ FILE: rlimit/rlimit_linux_test.go ================================================ package rlimit import ( "testing" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" ) func TestRemoveMemlock(t *testing.T) { var before unix.Rlimit qt.Assert(t, qt.IsNil(unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &before))) err := RemoveMemlock() qt.Assert(t, qt.IsNil(err)) var after unix.Rlimit qt.Assert(t, qt.IsNil(unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &after))) // We can't use testutils here due to an import cycle. version, err := linux.KernelVersion() qt.Assert(t, qt.IsNil(err)) if version.Less(unsupportedMemcgAccounting.MinimumVersion) { qt.Assert(t, qt.Equals(after.Cur, unix.RLIM_INFINITY), qt.Commentf("cur should be INFINITY")) qt.Assert(t, qt.Equals(after.Max, unix.RLIM_INFINITY), qt.Commentf("max should be INFINITY")) } else { qt.Assert(t, qt.Equals(after.Cur, before.Cur), qt.Commentf("cur should be unchanged")) qt.Assert(t, qt.Equals(after.Max, before.Max), qt.Commentf("max should be unchanged")) } } ================================================ FILE: rlimit/rlimit_other.go ================================================ //go:build !linux package rlimit // RemoveMemlock is a no-op on platforms other than Linux. func RemoveMemlock() error { return nil } ================================================ FILE: scripts/update-efw-deps.sh ================================================ #!/bin/bash set -euo pipefail # Extract EFW version from CI workflow file efw_version=$(awk -F': ' '/CI_MAX_EFW_VERSION:/ {gsub(/['\''"]/, "", $2); print $2}' .github/workflows/ci.yml) if [ -z "$efw_version" ]; then echo "Error: Could not extract CI_MAX_EFW_VERSION from .github/workflows/ci.yml" >&2 exit 1 fi echo "Using EFW version: $efw_version" tmp=$(mktemp -d) cleanup() { rm -r "$tmp" } trap cleanup EXIT # Download and process ebpf_structs.h curl -fL "https://github.com/microsoft/ebpf-for-windows/raw/refs/tags/Release-v${efw_version}/include/ebpf_structs.h" -o "$tmp/ebpf_structs.h" "./internal/cmd/genwinfunctions.awk" "$tmp/ebpf_structs.h" | gofmt > "./asm/func_win.go" ================================================ FILE: scripts/update-kernel-deps.sh ================================================ #!/bin/bash set -euo pipefail # Extract kernel version from CI workflow file kernel_version=$(awk -F': ' '/CI_MAX_KERNEL_VERSION:/ {gsub(/['\''"]/, "", $2); print $2}' .github/workflows/ci.yml) if [ -z "$kernel_version" ]; then echo "Error: Could not extract CI_MAX_KERNEL_VERSION from .github/workflows/ci.yml" >&2 exit 1 fi echo "Using kernel version: $kernel_version" tmp=$(mktemp -d) cleanup() { rm -r "$tmp" } trap cleanup EXIT # Download and process libbpf.c # Truncate .0 patch versions (e.g., 6.16.0 -> 6.16, but leave 7.0 as 7.0) kernel_version_for_url="$kernel_version" if [[ $kernel_version =~ ^([0-9]+\.[0-9]+)\.0$ ]]; then kernel_version_for_url="${BASH_REMATCH[1]}" fi curl -fL "https://raw.githubusercontent.com/gregkh/linux/refs/tags/v$kernel_version_for_url/tools/lib/bpf/libbpf.c" -o "$tmp/libbpf.c" "./internal/cmd/gensections.awk" "$tmp/libbpf.c" | gofmt > "./elf_sections.go" # Download and process vmlinux and btf_testmod go tool crane export "ghcr.io/cilium/ci-kernels:$kernel_version" | tar -x -C "$tmp" extract-vmlinux "$tmp/boot/vmlinuz" > "$tmp/vmlinux" objcopy --dump-section .BTF=/dev/stdout "$tmp/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz" find "$tmp/lib/modules" -type f -name bpf_testmod.ko -exec objcopy --dump-section .BTF="btf/testdata/btf_testmod.btf" {} /dev/null \; find "$tmp/lib/modules" -type f -name bpf_testmod.ko -exec objcopy --dump-section .BTF.base="btf/testdata/btf_testmod.btf.base" {} /dev/null \; ================================================ FILE: scripts/windows/README.md ================================================ # Windows Development Setup You will need access to a Windows environment to work on ebpf-go for Windows. This repository contains a script which (mostly) automatically installs a Windows VM. It then proceeds to install dependencies necessary to compile and install eBPF for Windows. ```shell ./setup.sh path-to-windows.iso ``` Obtain the ISO by choosing "Download Windows 11 Disk Image (ISO)" on the [download page](https://www.microsoft.com/en-gb/software-download/windows11/) and then following the instructions. __Choose "English (United States)" as product language for a fully automated installation.__ ## SSH The setup script adds a public key from `~/.ssh`, you should be able to simply ssh into the VM by executing `ssh $IP`. ## Requirements * Only tested with Windows 11 * `libvirt` using qemu backend * `genisoimage` * `curl` * `envsubst` * `fzf` (optional, to select an ssh key) ================================================ FILE: scripts/windows/Setup.ps1 ================================================ # Configure a fresh installation of Windows via SSH. param ( [switch] $RunOnce = $false ) if ($RunOnce) { # Visual Studio really doesn't seem to like being installed via SSH, so # we invoke from a RunOnce script. Invoke-WebRequest 'https://raw.githubusercontent.com/microsoft/ebpf-for-windows/main/scripts/Setup-DevEnv.ps1' -OutFile $env:TEMP\Setup-DevEnv.ps1 &"$env:TEMP\Setup-DevEnv.ps1" # sshd needs to be restarted to pick up new environment variables. Restart-Service sshd return } # Enable Developer Mode (so that symlinks work) # See https://stackoverflow.com/a/40033638 $RegistryKeyPath = "HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\AppModelUnlock" New-Item -Path $RegistryKeyPath -ItemType Directory -Force New-ItemProperty -Path $RegistryKeyPath -Name AllowDevelopmentWithoutDevLicense -PropertyType DWORD -Value 1 -Force # Ensure we have a PROFILE. # # This also allows chocolatey to add its hooks. if (!(Test-Path -Path $PROFILE)) { New-Item -ItemType File -Path $PROFILE -Force } # Add VS tools to PATH $addVsToolsToPath = @' # Add VS to PATH function Import-VsEnv { $vswherePath = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" $vspath = & $vswherePath -property installationPath $vsDevShell = "${vspath}\Common7\Tools\Launch-VsDevShell.ps1" & $vsDevShell -SkipAutomaticLocation } '@ if (-not (Get-Content $PROFILE | Select-String "Add VS to PATH")) { $addVsToolsToPath | Add-Content -Path $PROFILE } # Enable git symlink support globally. $gitConfig = "${HOME}/.gitconfig" if (!(Test-Path -Path $gitConfig)) { New-Item -ItemType File -Path $gitConfig -Force } if (-not (Get-Content $gitConfig | Select-String "symlinks = true")) { @' [core] symlinks = true '@ | Add-Content -Path $gitConfig } # Install winget version which supports configure -f # if ([version]($(winget --version).substring(1)) -lt [version]'1.6.0') { # echo "Updating winget" # # From https://andrewstaylor.com/2023/11/28/winget-powershell-module/ # Get-PackageProvider NuGet -ForceBootstrap | Out-Null # install-module microsoft.winget.client -Force -AllowClobber # import-module microsoft.winget.client # repair-wingetpackagemanager -Force -Latest -AllUsers # } echo "Scheduling installation of eBPF for Windows dependencies for next reboot." Set-ItemProperty "HKCU:\Software\Microsoft\Windows\CurrentVersion\RunOnce" -Name 'InstallEFWDependencies' -Value "powershell.exe -command `"start -verb runas powershell.exe -argumentlist \`"-file $PSCommandPath -RunOnce\`"" echo "Rebooting." Restart-Computer -Force ================================================ FILE: scripts/windows/autounattend.xml ================================================ en-US 0409:00000409 en-US en-US en-US 0 3 2B87N-8KFHP-DKV6R-Y2C8J-PKCKT OnError true false 1 cmd.exe /c ">>"X:\diskpart.txt" (echo SELECT DISK=0&echo CLEAN&echo CONVERT GPT&echo CREATE PARTITION EFI SIZE=300&echo FORMAT QUICK FS=FAT32 LABEL="System"&echo CREATE PARTITION MSR SIZE=16)" 2 cmd.exe /c ">>"X:\diskpart.txt" (echo CREATE PARTITION PRIMARY&echo SHRINK MINIMUM=1000&echo FORMAT QUICK FS=NTFS LABEL="Windows"&echo CREATE PARTITION PRIMARY&echo FORMAT QUICK FS=NTFS LABEL="Recovery")" 3 cmd.exe /c ">>"X:\diskpart.txt" (echo SET ID="de94bba4-06d1-4d40-a16a-bfd50179d6ac"&echo GPT ATTRIBUTES=0x8000000000000001)" 4 cmd.exe /c "diskpart.exe /s "X:\diskpart.txt" >>"X:\diskpart.log" || ( type "X:\diskpart.log" & echo diskpart encountered an error. & pause & exit /b 1 )" 5 cmd.exe /c ">>"X:\defender.vbs" (echo WScript.Echo "Scanning for newly created SYSTEM registry hive file to disable Windows Defender services..."&echo Set fso = CreateObject("Scripting.FileSystemObject"^))" 6 cmd.exe /c ">>"X:\defender.vbs" (echo Set existing = CreateObject("Scripting.Dictionary"^)&echo Function Execute(command^)&echo WScript.Echo "Running command '" + command + "'"&echo Set shell = CreateObject("WScript.Shell"^))" 7 cmd.exe /c ">>"X:\defender.vbs" (echo Set exec = shell.Exec(command^)&echo Do While exec.Status = 0&echo WScript.Sleep 100&echo Loop&echo WScript.Echo exec.StdOut.ReadAll&echo WScript.Echo exec.StdErr.ReadAll)" 8 cmd.exe /c ">>"X:\defender.vbs" (echo Execute = exec.ExitCode&echo End Function&echo Function FindHiveFiles&echo Set FindHiveFiles = CreateObject("Scripting.Dictionary"^)&echo For Each drive In fso.Drives)" 9 cmd.exe /c ">>"X:\defender.vbs" (echo If drive.IsReady And drive.DriveLetter ^<^> "X" Then&echo For Each folder In Array("$Windows.~BT\NewOS\Windows", "Windows"^))" 10 cmd.exe /c ">>"X:\defender.vbs" (echo file = fso.BuildPath(fso.BuildPath(drive.RootFolder, folder^), "System32\config\SYSTEM"^)&echo If fso.FileExists(file^) And fso.FileExists(file + ".LOG1"^) And fso.FileExists(file + ".LOG2"^) Then)" 11 cmd.exe /c ">>"X:\defender.vbs" (echo FindHiveFiles.Add file, Nothing&echo End If&echo Next&echo End If&echo Next&echo End Function&echo For Each file In FindHiveFiles)" 12 cmd.exe /c ">>"X:\defender.vbs" (echo WScript.Echo "Will ignore file at '" + file + "' because it was already present when Windows Setup started."&echo existing.Add file, Nothing&echo Next&echo Do)" 13 cmd.exe /c ">>"X:\defender.vbs" (echo For Each file In FindHiveFiles&echo If Not existing.Exists(file^) Then&echo ret = 1&echo While ret ^> 0&echo WScript.Sleep 500&echo ret = Execute("reg.exe LOAD HKLM\mount " + file^))" 14 cmd.exe /c ">>"X:\defender.vbs" (echo Wend&echo For Each service In Array("Sense", "WdBoot", "WdFilter", "WdNisDrv", "WdNisSvc", "WinDefend"^))" 15 cmd.exe /c ">>"X:\defender.vbs" (echo ret = Execute("reg.exe ADD HKLM\mount\ControlSet001\Services\" + service + " /v Start /t REG_DWORD /d 4 /f"^)&echo Next&echo ret = Execute("reg.exe UNLOAD HKLM\mount"^))" 16 cmd.exe /c ">>"X:\defender.vbs" (echo WScript.Echo "Found and successfully modified SYSTEM registry hive file at '" + file + "'. This window will now close."&echo WScript.Sleep 5000&echo Exit Do&echo End If)" 17 cmd.exe /c ">>"X:\defender.vbs" (echo WScript.Sleep 1000&echo Next&echo Loop)" 18 cmd.exe /c "start /MIN cscript.exe //E:vbscript X:\defender.vbs" 1 powershell.exe -WindowStyle Normal -NoProfile -Command "$xml = [xml]::new(); $xml.Load('C:\Windows\Panther\unattend.xml'); $sb = [scriptblock]::Create( $xml.unattend.Extensions.ExtractScript ); Invoke-Command -ScriptBlock $sb -ArgumentList $xml;" 2 powershell.exe -WindowStyle Normal -NoProfile -Command "Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\Specialize.ps1' -Raw | Invoke-Expression;" 3 reg.exe load "HKU\DefaultUser" "C:\Users\Default\NTUSER.DAT" 4 powershell.exe -WindowStyle Normal -NoProfile -Command "Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\DefaultUser.ps1' -Raw | Invoke-Expression;" 5 reg.exe unload "HKU\DefaultUser" 0409:00000409 en-US en-US en-US $USER Administrators true</PlainText> </Password> </LocalAccount> </LocalAccounts> </UserAccounts> <AutoLogon> <Username>$USER</Username> <Enabled>true</Enabled> <LogonCount>1</LogonCount> <Password> <Value/> <PlainText>true</PlainText> </Password> </AutoLogon> <OOBE> <ProtectYourPC>3</ProtectYourPC> <HideEULAPage>true</HideEULAPage> <HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE> <HideOnlineAccountScreens>false</HideOnlineAccountScreens> </OOBE> <FirstLogonCommands> <SynchronousCommand wcm:action="add"> <Order>1</Order> <CommandLine>powershell.exe -WindowStyle Normal -NoProfile -Command "Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\FirstLogon.ps1' -Raw | Invoke-Expression;"</CommandLine> </SynchronousCommand> </FirstLogonCommands> </component> </settings> <Extensions xmlns="https://schneegans.de/windows/unattend-generator/"> <ExtractScript> param( [xml] $Document ); foreach( $file in $Document.unattend.Extensions.File ) { $path = [System.Environment]::ExpandEnvironmentVariables( $file.GetAttribute( 'path' ) ); mkdir -Path( $path | Split-Path -Parent ) -ErrorAction 'SilentlyContinue'; $encoding = switch( [System.IO.Path]::GetExtension( $path ) ) { { $_ -in '.ps1', '.xml' } { [System.Text.Encoding]::UTF8; } { $_ -in '.reg', '.vbs', '.js' } { [System.Text.UnicodeEncoding]::new( $false, $true ); } default { [System.Text.Encoding]::Default; } }; $bytes = $encoding.GetPreamble() + $encoding.GetBytes( $file.InnerText.Trim() ); [System.IO.File]::WriteAllBytes( $path, $bytes ); } </ExtractScript> <File path="C:\Windows\Setup\Scripts\VirtIoGuestTools.ps1"> &amp; { foreach( $letter in 'DEFGHIJKLMNOPQRSTUVWXYZ'.ToCharArray() ) { $exe = "${letter}:\virtio-win-guest-tools.exe"; if( Test-Path -LiteralPath $exe ) { Start-Process -FilePath $exe -ArgumentList '/passive', '/norestart' -Wait; return; } } 'VirtIO Guest Tools image (virtio-win-*.iso) is not attached to this VM.'; } *&gt;&amp;1 &gt;&gt; 'C:\Windows\Setup\Scripts\VirtIoGuestTools.log'; </File> <File path="C:\Windows\Setup\Scripts\unattend-01.ps1"> bcdedit.exe -set TESTSIGNING ON; </File> <File path="C:\Windows\Setup\Scripts\unattend-02.ps1"> # https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_server_configuration Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0; Start-Service sshd; Set-Service -Name sshd -StartupType 'Automatic'; New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value "C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe" -PropertyType String -Force; icacls.exe "C:\ProgramData\ssh\administrators_authorized_keys" /inheritance:r /grant "Administrators:F" /grant "SYSTEM:F"; # Allow inbound connections Set-NetFirewallProfile -Profile Domain,Public,Private -DefaultInboundAction Allow; # Set up authorized keys echo "$AUTHORIZED_KEYS" | Out-File -Encoding utf8 -FilePath "C:\ProgramData\ssh\administrators_authorized_keys" </File> <File path="C:\Windows\Setup\Scripts\Specialize.ps1"> $scripts = @( { net.exe accounts /lockoutthreshold:0; }; { net.exe accounts /maxpwage:UNLIMITED; }; { reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Windows Defender Security Center\Notifications" /v DisableNotifications /t REG_DWORD /d 1 /f; }; { Set-ExecutionPolicy -Scope 'LocalMachine' -ExecutionPolicy 'RemoteSigned' -Force; }; { reg.exe add "HKLM\Software\Policies\Microsoft\Windows\CloudContent" /v "DisableWindowsConsumerFeatures" /t REG_DWORD /d 1 /f; }; { Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\VirtIoGuestTools.ps1' -Raw | Invoke-Expression; }; { reg.exe add "HKLM\SYSTEM\CurrentControlSet\Control\BitLocker" /v "PreventDeviceEncryption" /t REG_DWORD /d 1 /f; }; { reg.exe add "HKLM\SOFTWARE\Policies\Microsoft\Edge" /v HideFirstRunExperience /t REG_DWORD /d 1 /f; }; { Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\unattend-01.ps1' -Raw | Invoke-Expression; }; ); &amp; { [float] $complete = 0; [float] $increment = 100 / $scripts.Count; foreach( $script in $scripts ) { Write-Progress -Activity 'Running scripts to customize your Windows installation. Do not close this window.' -PercentComplete $complete; '*** Will now execute command «{0}».' -f $( $str = $script.ToString().Trim() -replace '\s+', ' '; $max = 100; if( $str.Length -le $max ) { $str; } else { $str.Substring( 0, $max - 1 ) + '…'; } ); $start = [datetime]::Now; &amp; $script; '*** Finished executing command after {0:0} ms.' -f [datetime]::Now.Subtract( $start ).TotalMilliseconds; "`r`n" * 3; $complete += $increment; } } *&gt;&amp;1 &gt;&gt; "C:\Windows\Setup\Scripts\Specialize.log"; </File> <File path="C:\Windows\Setup\Scripts\DefaultUser.ps1"> $scripts = @( { $names = @( 'ContentDeliveryAllowed'; 'FeatureManagementEnabled'; 'OEMPreInstalledAppsEnabled'; 'PreInstalledAppsEnabled'; 'PreInstalledAppsEverEnabled'; 'SilentInstalledAppsEnabled'; 'SoftLandingEnabled'; 'SubscribedContentEnabled'; 'SubscribedContent-310093Enabled'; 'SubscribedContent-338387Enabled'; 'SubscribedContent-338388Enabled'; 'SubscribedContent-338389Enabled'; 'SubscribedContent-338393Enabled'; 'SubscribedContent-353698Enabled'; 'SystemPaneSuggestionsEnabled'; ); foreach( $name in $names ) { reg.exe add "HKU\DefaultUser\Software\Microsoft\Windows\CurrentVersion\ContentDeliveryManager" /v $name /t REG_DWORD /d 0 /f; } }; ); &amp; { [float] $complete = 0; [float] $increment = 100 / $scripts.Count; foreach( $script in $scripts ) { Write-Progress -Activity 'Running scripts to modify the default user’’s registry hive. Do not close this window.' -PercentComplete $complete; '*** Will now execute command «{0}».' -f $( $str = $script.ToString().Trim() -replace '\s+', ' '; $max = 100; if( $str.Length -le $max ) { $str; } else { $str.Substring( 0, $max - 1 ) + '…'; } ); $start = [datetime]::Now; &amp; $script; '*** Finished executing command after {0:0} ms.' -f [datetime]::Now.Subtract( $start ).TotalMilliseconds; "`r`n" * 3; $complete += $increment; } } *&gt;&amp;1 &gt;&gt; "C:\Windows\Setup\Scripts\DefaultUser.log"; </File> <File path="C:\Windows\Setup\Scripts\FirstLogon.ps1"> $scripts = @( { Set-ItemProperty -LiteralPath 'Registry::HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon' -Name 'AutoLogonCount' -Type 'DWord' -Force -Value 0; }; { Disable-ComputerRestore -Drive 'C:\'; }; { Get-Content -LiteralPath 'C:\Windows\Setup\Scripts\unattend-02.ps1' -Raw | Invoke-Expression; }; ); &amp; { [float] $complete = 0; [float] $increment = 100 / $scripts.Count; foreach( $script in $scripts ) { Write-Progress -Activity 'Running scripts to finalize your Windows installation. Do not close this window.' -PercentComplete $complete; '*** Will now execute command «{0}».' -f $( $str = $script.ToString().Trim() -replace '\s+', ' '; $max = 100; if( $str.Length -le $max ) { $str; } else { $str.Substring( 0, $max - 1 ) + '…'; } ); $start = [datetime]::Now; &amp; $script; '*** Finished executing command after {0:0} ms.' -f [datetime]::Now.Subtract( $start ).TotalMilliseconds; "`r`n" * 3; $complete += $increment; } } *&gt;&amp;1 &gt;&gt; "C:\Windows\Setup\Scripts\FirstLogon.log"; </File> </Extensions> </unattend> ================================================ FILE: scripts/windows/setup-efw.sh ================================================ #!/usr/bin/env bash # Install dependencies required by eBPF for Windows. set -euo pipefail VM_NAME="$1" ip=$(virsh --connect qemu:///system domifaddr "$VM_NAME" | gawk 'match($0, /([[:digit:]\.]+)\//, a) { print a[1] }') if [ -z "$ip" ]; then echo "Can't figure out IP address of VM, giving up" exit 1 fi echo "VM IP is $ip" echo Installing eBPF for Windows dependencies scp ./*.ps1 "$ip": ssh -t "$ip" ".\\Setup.ps1" ================================================ FILE: scripts/windows/setup.sh ================================================ #!/usr/bin/env bash set -euo pipefail # Variables VIRTIO_ISO_URL="https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/archive-virtio/virtio-win-0.1.266-1/virtio-win-0.1.266.iso" VIRTIO_ISO="/tmp/virtio-win.iso" # Check if ISO path is provided if [ -z "$1" ]; then echo "Usage: $0 <path_to_windows_iso>" exit 1 else ISO_PATH=$1 fi if [ "$(whoami)" = "root" ]; then echo "Do not run this script as root: it prevents detecting the correct user name" exit 1 fi # Prompt settings read -p "Enter the name of the VM (default is vm): " VM_NAME VM_NAME=${VM_NAME:-vm} read -p "Enter the amount of RAM in MB (default is 8192): " RAM_MB RAM_MB=${RAM_MB:-8192} read -p "Enter the disk size in GB (default is 100): " DISK_SIZE DISK_SIZE=${DISK_SIZE:-100} SSH_PUBKEYS=("$HOME/.ssh"/*.pub) if [ ${#SSH_PUBKEYS[@]} -eq 0 ]; then echo "No .pub files found in ~/.ssh directory." exit 1 elif [ ${#SSH_PUBKEYS[@]} -eq 1 ]; then SSH_PUBKEY=${SSH_PUBKEYS[0]} else SSH_PUBKEY=$(printf "%s\n" "${SSH_PUBKEYS[@]}" | fzf --prompt="Select a .pub file: ") fi if [ -z "$SSH_PUBKEY" ]; then echo "No SSH pubkey selected." exit 1 fi # Check disk before starting download VM_DISK="/var/lib/libvirt/images/${VM_NAME}.qcow2" if [ -f "$VM_DISK" ]; then echo "Error: $VM_DISK already exists" exit 1 fi # Download Virtio Drivers ISO echo "Downloading Virtio drivers ISO..." curl -L -o "$VIRTIO_ISO" --etag-save "$VIRTIO_ISO.tmp" --etag-compare "$VIRTIO_ISO.etag" "$VIRTIO_ISO_URL" mv "$VIRTIO_ISO.tmp" "$VIRTIO_ISO.etag" # Create autounattend temp="$(mktemp -d)" cleanup() { sudo umount "$temp/mount" 2> /dev/null rm -rf "$temp" } trap cleanup EXIT chmod 0755 "$temp" mkdir -p "$temp/mount" "$temp/modifications" # Prepare an installation file automatically installs Windows. # # Allows ssh authentication with all public keys found in ~/.ssh. AUTHORIZED_KEYS="$(cat "$SSH_PUBKEY")" envsubst '$USER $AUTHORIZED_KEYS' < autounattend.xml > "$temp/modifications/autounattend.xml" # Generate bootable ISO. # # This ISO contains the autounattend.xml and doesn't require pressing a button # to start the installation. See: # * https://palant.info/2023/02/13/automating-windows-installation-in-a-vm/ sudo mount -o loop "$ISO_PATH" "$temp/mount" genisoimage \ -iso-level 4 -rock -udf \ -disable-deep-relocation \ -untranslated-filenames \ -allow-limited-size \ -no-emul-boot \ -boot-load-size 8 \ -eltorito-boot boot/etfsboot.com \ -eltorito-alt-boot \ -eltorito-boot efi/microsoft/boot/efisys_noprompt.bin \ -o "$temp/win.iso" \ "$temp/mount" "$temp/modifications" # Create VM Disk sudo qemu-img create -f qcow2 "$VM_DISK" "${DISK_SIZE}G" # Define and create the VM using virt-install. # This will start the VM. sudo virt-install \ --connect qemu:///system \ --name "$VM_NAME" \ --ram "$RAM_MB" \ --vcpus "$(nproc),cores=$(nproc)" \ --cpu "host-passthrough,check=none,migratable=off,feature.vmx=require,feature.hle=disable,feature.rtm=disable" \ --os-variant win11 \ --network network=default,model=e1000 \ --channel type=unix,source.mode=bind,target.type=virtio,target.name=org.qemu.guest_agent.0 \ --graphics spice \ --disk path="$VM_DISK",format=qcow2,bus=sata,size="$DISK_SIZE",boot.order=1 \ --disk path="$temp/win.iso",device=cdrom,bus=sata,boot.order=2 \ --disk path="$VIRTIO_ISO",device=cdrom,bus=sata \ --install bootdev=cdrom \ --boot uefi,firmware.feature0.name=enrolled-keys,firmware.feature0.enabled=no \ --noautoconsole echo "Windows VM setup initiated, click through the installer." echo "You may have to manually start the VM a couple of times." # Show the graphical output so that the user can follow along. virt-manager --connect qemu:///system --show-domain-console "$VM_NAME" echo "Waiting for VM to receive an IP." ip="" while [ -z "$ip" ]; do sleep 10 ip="$(virsh --connect qemu:///system domifaddr "$VM_NAME" | gawk 'match($0, /([[:digit:]\.]+)\//, a) { print a[1] }')" echo -n . done echo echo "Waiting for SSH to become available to continue installation." while ! ssh -o ConnectTimeout=10 -T "$ip" '$true' &> /dev/null; do echo -n . done echo ./setup-efw.sh "$VM_NAME" ================================================ FILE: struct_ops.go ================================================ package ebpf import ( "errors" "fmt" "reflect" "strings" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" ) const structOpsValuePrefix = "bpf_struct_ops_" const structOpsLinkSec = ".struct_ops.link" const structOpsSec = ".struct_ops" const structOpsKeySize = 4 // structOpsFindInnerType returns the "inner" struct inside a value struct_ops type. // // Given a value like: // // struct bpf_struct_ops_bpf_testmod_ops { // struct bpf_struct_ops_common common; // struct bpf_testmod_ops data; // }; // // this function returns the *btf.Struct for "bpf_testmod_ops" along with the // byte offset of the "data" member inside the value type. // // The inner struct name is derived by trimming the "bpf_struct_ops_" prefix // from the value's name. func structOpsFindInnerType(vType *btf.Struct) (*btf.Struct, uint32, error) { innerName := strings.TrimPrefix(vType.Name, structOpsValuePrefix) for _, m := range vType.Members { if st, ok := btf.As[*btf.Struct](m.Type); ok && st.Name == innerName { return st, m.Offset.Bytes(), nil } } return nil, 0, fmt.Errorf("inner struct %q not found in %s", innerName, vType.Name) } // structOpsFindTarget resolves the kernel-side "value struct" for a struct_ops map. func structOpsFindTarget(userType *btf.Struct, cache *btf.Cache) (vType *btf.Struct, id btf.TypeID, module *btf.Handle, err error) { // the kernel value type name, e.g. "bpf_struct_ops_<name>" vTypeName := structOpsValuePrefix + userType.Name target := btf.Type((*btf.Struct)(nil)) spec, module, err := findTargetInKernel(vTypeName, &target, cache) if errors.Is(err, btf.ErrNotFound) { return nil, 0, nil, fmt.Errorf("%q doesn't exist in kernel: %w", vTypeName, ErrNotSupported) } if err != nil { return nil, 0, nil, fmt.Errorf("lookup value type %q: %w", vTypeName, err) } id, err = spec.TypeID(target) if err != nil { return nil, 0, nil, err } return target.(*btf.Struct), id, module, nil } // structOpsPopulateValue writes a `prog FD` which references to `p` into the // struct_ops value buffer `kernVData` at byte offset `dstOff` corresponding to // the member `km`. func structOpsPopulateValue(km btf.Member, kernVData []byte, p *Program) error { kmPtr, ok := btf.As[*btf.Pointer](km.Type) if !ok { return fmt.Errorf("member %s is not a func pointer", km.Name) } if _, isFuncProto := btf.As[*btf.FuncProto](kmPtr.Target); !isFuncProto { return fmt.Errorf("member %s is not a func pointer", km.Name) } dstOff := int(km.Offset.Bytes()) if dstOff < 0 || dstOff+8 > len(kernVData) { return fmt.Errorf("member %q: value buffer too small for func ptr", km.Name) } internal.NativeEndian.PutUint64(kernVData[dstOff:dstOff+8], uint64(p.FD())) return nil } // structOpsCopyMember copies a single member from the user struct (m) // into the kernel value struct (km) for struct_ops. func structOpsCopyMember(m, km btf.Member, data []byte, kernVData []byte) error { mSize, err := btf.Sizeof(m.Type) if err != nil { return fmt.Errorf("sizeof(user.%s): %w", m.Name, err) } kSize, err := btf.Sizeof(km.Type) if err != nil { return fmt.Errorf("sizeof(kernel.%s): %w", km.Name, err) } if mSize != kSize { return fmt.Errorf("size mismatch for %s: user=%d kernel=%d", m.Name, mSize, kSize) } if km.BitfieldSize > 0 || m.BitfieldSize > 0 { return fmt.Errorf("bitfield %s not supported", m.Name) } srcOff := int(m.Offset.Bytes()) dstOff := int(km.Offset.Bytes()) if srcOff < 0 || srcOff+mSize > len(data) { return fmt.Errorf("member %q: userdata is too small", m.Name) } if dstOff < 0 || dstOff+mSize > len(kernVData) { return fmt.Errorf("member %q: value type is too small", m.Name) } // skip mods(const, restrict, volatile and typetag) // and typedef to check type compatibility mType := btf.UnderlyingType(m.Type) kernMType := btf.UnderlyingType(km.Type) if reflect.TypeOf(mType) != reflect.TypeOf(kernMType) { return fmt.Errorf("unmatched member type %s != %s (kernel)", m.Name, km.Name) } switch mType.(type) { case *btf.Struct, *btf.Union: if !structOpsIsMemZeroed(data[srcOff : srcOff+mSize]) { return fmt.Errorf("non-zero nested struct %s: %w", m.Name, ErrNotSupported) } // the bytes has zeroed value, we simply skip the copy. return nil } copy(kernVData[dstOff:dstOff+mSize], data[srcOff:srcOff+mSize]) return nil } // structOpsIsMemZeroed() checks whether all bytes in data are zero. func structOpsIsMemZeroed(data []byte) bool { for _, b := range data { if b != 0 { return false } } return true } // structOpsSetAttachTo sets p.AttachTo in the expected "struct_name:memberName" format // based on the struct definition. // // this relies on the assumption that each member in the // `.struct_ops` section has a relocation at its starting byte offset. func structOpsSetAttachTo( sec *elfSection, baseOff uint32, userSt *btf.Struct, progs map[string]*ProgramSpec) error { for _, m := range userSt.Members { memberOff := m.Offset sym, ok := sec.relocations[uint64(baseOff+memberOff.Bytes())] if !ok { continue } p, ok := progs[sym.Name] if !ok || p == nil { return fmt.Errorf("program %s not found", sym.Name) } if p.Type != StructOps { return fmt.Errorf("program %s is not StructOps", sym.Name) } p.AttachTo = userSt.Name + ":" + m.Name } return nil } ================================================ FILE: struct_ops_test.go ================================================ package ebpf import ( "testing" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/testutils" ) func TestCreateStructOpsMapSpecSimple(t *testing.T) { requireTestmodOps(t) ms := &MapSpec{ Name: "testmod_ops", Type: StructOpsMap, Flags: sys.BPF_F_LINK, Key: &btf.Int{Size: 4}, KeySize: 4, Value: &btf.Struct{Name: "bpf_testmod_ops"}, MaxEntries: 1, Contents: []MapKV{ { Key: uint32(0), Value: make([]byte, 448), }, }, } m, err := NewMap(ms) testutils.SkipIfNotSupported(t, err) if err != nil { t.Fatalf("creating struct_ops map failed: %v", err) } t.Cleanup(func() { _ = m.Close() }) } ================================================ FILE: syscalls.go ================================================ package ebpf import ( "bytes" "errors" "fmt" "math" "os" "runtime" "strings" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/linux" "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) var ( // pre-allocating these here since they may // get called in hot code paths and cause // unnecessary memory allocations sysErrKeyNotExist = sys.Error(ErrKeyNotExist, unix.ENOENT) sysErrKeyExist = sys.Error(ErrKeyExist, unix.EEXIST) sysErrNotSupported = sys.Error(ErrNotSupported, sys.ENOTSUPP) ) // sanitizeName replaces all invalid characters in name with replacement. // Passing a negative value for replacement will delete characters instead // of replacing them. // // The set of allowed characters may change over time. func sanitizeName(name string, replacement rune) string { return strings.Map(func(char rune) rune { switch { case char >= 'A' && char <= 'Z': return char case char >= 'a' && char <= 'z': return char case char >= '0' && char <= '9': return char case char == '.': return char case char == '_': return char default: return replacement } }, name) } func maybeFillObjName(name string) sys.ObjName { if errors.Is(haveObjName(), ErrNotSupported) { return sys.ObjName{} } name = sanitizeName(name, -1) if errors.Is(objNameAllowsDot(), ErrNotSupported) { name = strings.ReplaceAll(name, ".", "") } return sys.NewObjName(name) } func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD, error) { buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) if err := insns.Marshal(buf, internal.NativeEndian); err != nil { return nil, err } bytecode := buf.Bytes() return sys.ProgLoad(&sys.ProgLoadAttr{ ProgType: sys.ProgType(typ), License: sys.NewStringPointer(license), Insns: sys.SlicePointer(bytecode), InsnCnt: uint32(len(bytecode) / asm.InstructionSize), }) } var haveNestedMaps = internal.NewFeatureTest("nested maps", func() error { if platform.IsWindows { // We only support efW versions which have this feature, no need to probe. return nil } _, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(ArrayOfMaps), KeySize: 4, ValueSize: 4, MaxEntries: 1, // Invalid file descriptor. InnerMapFd: ^uint32(0), }) if errors.Is(err, unix.EINVAL) { return internal.ErrNotSupported } if errors.Is(err, unix.EBADF) { return nil } return err }, "4.12", "windows:0.21.0") var haveMapMutabilityModifiers = internal.NewFeatureTest("read- and write-only maps", func() error { // This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since // BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapFlags: sys.BPF_F_RDONLY_PROG, }) if err != nil { return internal.ErrNotSupported } _ = m.Close() return nil }, "5.2") var haveMmapableMaps = internal.NewFeatureTest("mmapable maps", func() error { // This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapFlags: sys.BPF_F_MMAPABLE, }) if err != nil { return internal.ErrNotSupported } _ = m.Close() return nil }, "5.5") var haveInnerMaps = internal.NewFeatureTest("inner maps", func() error { // This checks BPF_F_INNER_MAP, which appeared in 5.10. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapFlags: sys.BPF_F_INNER_MAP, }) if err != nil { return internal.ErrNotSupported } _ = m.Close() return nil }, "5.10") var haveNoPreallocMaps = internal.NewFeatureTest("prealloc maps", func() error { // This checks BPF_F_NO_PREALLOC, which appeared in 4.6. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Hash), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapFlags: sys.BPF_F_NO_PREALLOC, }) if err != nil { return internal.ErrNotSupported } _ = m.Close() return nil }, "4.6") func wrapMapError(err error) error { if err == nil { return nil } if errors.Is(err, unix.ENOENT) { return sysErrKeyNotExist } if errors.Is(err, unix.EEXIST) { return sysErrKeyExist } if errors.Is(err, sys.ENOTSUPP) { return sysErrNotSupported } if errors.Is(err, unix.E2BIG) { return fmt.Errorf("key too big for map: %w", err) } return err } var haveObjName = internal.NewFeatureTest("object names", func() error { if platform.IsWindows { // We only support efW versions which have this feature, no need to probe. return nil } attr := sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapName: sys.NewObjName("feature_test"), } fd, err := sys.MapCreate(&attr) if err != nil { return internal.ErrNotSupported } _ = fd.Close() return nil }, "4.15", "windows:0.21.0") var objNameAllowsDot = internal.NewFeatureTest("dot in object names", func() error { if platform.IsWindows { // We only support efW versions which have this feature, no need to probe. return nil } if err := haveObjName(); err != nil { return err } attr := sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, ValueSize: 4, MaxEntries: 1, MapName: sys.NewObjName(".test"), } fd, err := sys.MapCreate(&attr) if err != nil { return internal.ErrNotSupported } _ = fd.Close() return nil }, "5.2", "windows:0.21.0") var haveBatchAPI = internal.NewFeatureTest("map batch api", func() error { var maxEntries uint32 = 2 attr := sys.MapCreateAttr{ MapType: sys.MapType(Hash), KeySize: 4, ValueSize: 4, MaxEntries: maxEntries, } fd, err := sys.MapCreate(&attr) if err != nil { return internal.ErrNotSupported } defer fd.Close() keys := []uint32{1, 2} values := []uint32{3, 4} kp, _ := marshalMapSyscallInput(keys, 8) vp, _ := marshalMapSyscallInput(values, 8) err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{ MapFd: fd.Uint(), Keys: kp, Values: vp, Count: maxEntries, }) if err != nil { return internal.ErrNotSupported } return nil }, "5.6") var haveProbeReadKernel = internal.NewFeatureTest("bpf_probe_read_kernel", func() error { insns := asm.Instructions{ asm.Mov.Reg(asm.R1, asm.R10), asm.Add.Imm(asm.R1, -8), asm.Mov.Imm(asm.R2, 8), asm.Mov.Imm(asm.R3, 0), asm.FnProbeReadKernel.Call(), asm.Return(), } fd, err := progLoad(insns, Kprobe, "GPL") if err != nil { return internal.ErrNotSupported } _ = fd.Close() return nil }, "5.5") var haveBPFToBPFCalls = internal.NewFeatureTest("bpf2bpf calls", func() error { insns := asm.Instructions{ asm.Call.Label("prog2").WithSymbol("prog1"), asm.Return(), asm.Mov.Imm(asm.R0, 0).WithSymbol("prog2"), asm.Return(), } fd, err := progLoad(insns, SocketFilter, "MIT") if err != nil { return internal.ErrNotSupported } _ = fd.Close() return nil }, "4.16") var haveSyscallWrapper = internal.NewFeatureTest("syscall wrapper", func() error { prefix := linux.PlatformPrefix() if prefix == "" { return fmt.Errorf("unable to find the platform prefix for (%s)", runtime.GOARCH) } args := tracefs.ProbeArgs{ Type: tracefs.Kprobe, Symbol: prefix + "sys_bpf", Pid: -1, } var err error args.Group, err = tracefs.RandomGroup("ebpf_probe") if err != nil { return err } evt, err := tracefs.NewEvent(args) if errors.Is(err, os.ErrNotExist) { return internal.ErrNotSupported } if err != nil { return err } return evt.Close() }, "4.17") var haveProgramExtInfos = internal.NewFeatureTest("program ext_infos", func() error { insns := asm.Instructions{ asm.Mov.Imm(asm.R0, 0), asm.Return(), } buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) if err := insns.Marshal(buf, internal.NativeEndian); err != nil { return err } bytecode := buf.Bytes() _, err := sys.ProgLoad(&sys.ProgLoadAttr{ ProgType: sys.ProgType(SocketFilter), License: sys.NewStringPointer("MIT"), Insns: sys.SlicePointer(bytecode), InsnCnt: uint32(len(bytecode) / asm.InstructionSize), FuncInfoCnt: 1, ProgBtfFd: math.MaxUint32, }) if errors.Is(err, unix.EBADF) { return nil } if errors.Is(err, unix.E2BIG) { return ErrNotSupported } return err }, "5.0") ================================================ FILE: syscalls_test.go ================================================ package ebpf import ( "testing" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/internal/testutils" ) func TestSanitizeName(t *testing.T) { for input, want := range map[string]string{ "test": "test", "": "", "a-b": "ab", "yeah so": "yeahso", "dot.": "dot.", "Capital": "Capital", "t_est": "t_est", "hörnchen": "hrnchen", } { qt.Assert(t, qt.Equals(sanitizeName(input, -1), want), qt.Commentf("input: %s", input)) } } func TestHaveBatchAPI(t *testing.T) { testutils.CheckFeatureTest(t, haveBatchAPI) } func TestHaveObjName(t *testing.T) { testutils.CheckFeatureTest(t, haveObjName) } func TestObjNameAllowsDot(t *testing.T) { testutils.CheckFeatureTest(t, objNameAllowsDot) } func TestHaveNestedMaps(t *testing.T) { testutils.CheckFeatureTest(t, haveNestedMaps) } func TestHaveMapMutabilityModifiers(t *testing.T) { testutils.CheckFeatureTest(t, haveMapMutabilityModifiers) } func TestHaveMmapableMaps(t *testing.T) { testutils.CheckFeatureTest(t, haveMmapableMaps) } func TestHaveInnerMaps(t *testing.T) { testutils.CheckFeatureTest(t, haveInnerMaps) } func TestHaveProbeReadKernel(t *testing.T) { testutils.CheckFeatureTest(t, haveProbeReadKernel) } func TestHaveBPFToBPFCalls(t *testing.T) { testutils.CheckFeatureTest(t, haveBPFToBPFCalls) } func TestHaveSyscallWrapper(t *testing.T) { testutils.CheckFeatureTest(t, haveSyscallWrapper) } func TestHaveProgramExtInfos(t *testing.T) { testutils.CheckFeatureTest(t, haveProgramExtInfos) } ================================================ FILE: testdata/arena.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" struct { __uint(type, BPF_MAP_TYPE_ARENA); __uint(map_flags, BPF_F_MMAPABLE); __uint(max_entries, 100); /* number of pages */ __ulong(map_extra, 0x1ull << 44); /* start of mmap region */ } arena __section(".maps"); ================================================ FILE: testdata/btf_map_init.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" int __section("socket/tail") tail_1() { return 42; } // Tail call map (program array) initialized with program pointers. struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __type(key, uint32_t); __type(value, uint32_t); __uint(max_entries, 2); __array(values, int()); } prog_array_init __section(".maps") = { .values = { // Skip index 0 to exercise empty array slots. [1] = &tail_1, }, }; int __section("socket/main") tail_main(void *ctx) { // If prog_array_init is correctly populated, the tail call // will succeed and the program will continue in tail_1 and // not return here. bpf_tail_call(ctx, &prog_array_init, 1); return 0; } // Inner map with a single possible entry. struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, uint32_t); __type(value, uint32_t); } inner_map __section(".maps"); // Outer map carrying a reference to the inner map. struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 2); __type(key, uint32_t); __type(value, uint32_t); __array(values, typeof(inner_map)); } outer_map_init __section(".maps") = { .values = { // Skip index 0 to exercise empty array slots. [1] = &inner_map, }, }; ================================================ FILE: testdata/common.h ================================================ #pragma once typedef _Bool bool; typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef signed int int32_t; typedef unsigned long uint64_t; enum libbpf_tristate { TRI_NO = 0, TRI_YES = 1, TRI_MODULE = 2, }; #define ___bpf_concat(a, b) ____bpf_concat(a, b) #define ____bpf_concat(a, b) a ## b #define __section(NAME) __attribute__((section(NAME), used)) #define __uint(name, val) int(*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) #define __noinline __attribute__((noinline)) #define __weak __attribute__((weak)) #define __hidden __attribute__((visibility("hidden"))) #define bpf_ksym_exists(sym) \ ({ \ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ !!sym; \ }) #define core_access __builtin_preserve_access_index #define BPF_MAP_TYPE_HASH (1) #define BPF_MAP_TYPE_ARRAY (2) #define BPF_MAP_TYPE_PROG_ARRAY (3) #define BPF_MAP_TYPE_PERF_EVENT_ARRAY (4) #define BPF_MAP_TYPE_ARRAY_OF_MAPS (12) #define BPF_MAP_TYPE_HASH_OF_MAPS (13) #define BPF_MAP_TYPE_ARENA (33) #define BPF_F_NO_PREALLOC (1U << 0) #define BPF_F_MMAPABLE (1U << 10) #define BPF_F_CURRENT_CPU (0xffffffffULL) /* From tools/lib/bpf/libbpf.h */ struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; unsigned int map_flags; }; static void *(*bpf_map_lookup_elem)(const void *map, const void *key) = (void *)1; static long (*bpf_map_update_elem)(const void *map, const void *key, const void *value, uint64_t flags) = (void *)2; static long (*bpf_trace_printk)(const char *fmt, uint32_t fmt_size, ...) = (void *)6; static uint32_t (*bpf_get_smp_processor_id)(void) = (void *)8; static long (*bpf_tail_call)(void *ctx, void *prog_array_map, uint32_t index) = (void *)12; static int (*bpf_perf_event_output)(const void *ctx, const void *map, uint64_t index, const void *data, uint64_t size) = (void *)25; static void *(*bpf_get_current_task)() = (void *)35; static long (*bpf_probe_read_kernel)(void *dst, uint32_t size, const void *unsafe_ptr) = (void *)113; static long (*bpf_for_each_map_elem)(const void *map, void *callback_fn, void *callback_ctx, uint64_t flags) = (void *)164; ================================================ FILE: testdata/constants.c ================================================ /* This file exercises the ELF loader. It is not a valid BPF program. */ #include "common.h" char __license[] __section("license") = "MIT"; /* * Maps with the Freeze flag set (like .rodata) must be frozen before sending * programs to the verifier so constants can be used during verification. If * done incorrectly, the following sk_lookup program will fail to verify since * the only valid return code is 1. See bpf/verifier.c:check_return_code(). */ volatile const uint32_t ret = -1; __section("sk_lookup/") int freeze_rodata() { return ret; } ================================================ FILE: testdata/docker/Dockerfile ================================================ # This Dockerfile generates a build environment for generating ELFs # of testdata programs. Run `make build` in this directory to build it. FROM golang:1.25-bookworm COPY llvm-snapshot.gpg.key . RUN apt-get update && \ apt-get -y --no-install-recommends install ca-certificates gnupg && \ apt-key add llvm-snapshot.gpg.key && \ rm llvm-snapshot.gpg.key && \ apt-get remove -y gnupg && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* COPY llvm.list /etc/apt/sources.list.d RUN apt-get update && \ apt-get -y --no-install-recommends install \ make git gawk \ libbpf-dev \ bpftool \ clang-format \ clang-14 llvm-14 \ clang-17 llvm-17 \ clang-20 llvm-20 && \ rm -rf /var/lib/apt/lists/* # Examples use `#include <asm/types.h>` which Debian carries in asm-generic/ instead. RUN ln -s /usr/include/asm-generic /usr/include/asm RUN curl -fL "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/plain/scripts/extract-vmlinux" -o "/usr/local/bin/extract-vmlinux" && \ chmod +x /usr/local/bin/extract-vmlinux ================================================ FILE: testdata/docker/IMAGE ================================================ ghcr.io/cilium/ebpf-builder ================================================ FILE: testdata/docker/Makefile ================================================ # Makefile to build and push the `cilium/ebpf` llvm builder Docker image. CONTAINER_ENGINE ?= docker IMAGE := $(shell cat IMAGE) EPOCH := $(shell date +'%s') ifndef IMAGE $(error IMAGE file not present in Makefile directory) endif .PHONY: build push build: ${CONTAINER_ENGINE} build --no-cache . -t "$(IMAGE):$(EPOCH)" echo $(EPOCH) > VERSION push: ${CONTAINER_ENGINE} push "$(IMAGE):$(shell cat VERSION)" ================================================ FILE: testdata/docker/README.md ================================================ # `cilium/ebpf` LLVM Builder Image This is a simple Docker image to provide reproducible eBPF ELF builds across contributors' workstations. This standardizes on a single environment used to regenerate e.g. testdata ELFs and does not depend on the toolchain installed on the host machine. Additionally, it reduces drift in the bytecode committed to the repository over time as the same exact clang + llc version is used throughout the development lifecycle. Only when upgrading or rebuilding the Docker image would changes in .elf files be expected (assuming the .c files are untouched). ## Building Building the image requires Docker. Run the build with: `make build` This updates the `VERSION` file. Commit it and submit a PR upstream. ### Regeneration Testdata on non-x86 platforms Before running `make`, ensure [Docker buildx](https://docs.docker.com/buildx/working-with-buildx/) is enabled. Additionally `QEMU user` and `binfmt` should be installed. On a Debian based distribution the command to add them is `apt install qemu-user-static binfmt-support`. ## Pushing After building, push the image to the Docker registry specified in `IMAGE` with: `make push` ================================================ FILE: testdata/docker/VERSION ================================================ 1768997810 ================================================ FILE: testdata/docker/llvm-snapshot.gpg.key ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- Version: GnuPG v1.4.12 (GNU/Linux) mQINBFE9lCwBEADi0WUAApM/mgHJRU8lVkkw0CHsZNpqaQDNaHefD6Rw3S4LxNmM EZaOTkhP200XZM8lVdbfUW9xSjA3oPldc1HG26NjbqqCmWpdo2fb+r7VmU2dq3NM R18ZlKixiLDE6OUfaXWKamZsXb6ITTYmgTO6orQWYrnW6ckYHSeaAkW0wkDAryl2 B5v8aoFnQ1rFiVEMo4NGzw4UX+MelF7rxaaregmKVTPiqCOSPJ1McC1dHFN533FY Wh/RVLKWo6npu+owtwYFQW+zyQhKzSIMvNujFRzhIxzxR9Gn87MoLAyfgKEzrbbT DhqqNXTxS4UMUKCQaO93TzetX/EBrRpJj+vP640yio80h4Dr5pAd7+LnKwgpTDk1 G88bBXJAcPZnTSKu9I2c6KY4iRNbvRz4i+ZdwwZtdW4nSdl2792L7Sl7Nc44uLL/ ZqkKDXEBF6lsX5XpABwyK89S/SbHOytXv9o4puv+65Ac5/UShspQTMSKGZgvDauU cs8kE1U9dPOqVNCYq9Nfwinkf6RxV1k1+gwtclxQuY7UpKXP0hNAXjAiA5KS5Crq 7aaJg9q2F4bub0mNU6n7UI6vXguF2n4SEtzPRk6RP+4TiT3bZUsmr+1ktogyOJCc Ha8G5VdL+NBIYQthOcieYCBnTeIH7D3Sp6FYQTYtVbKFzmMK+36ERreL/wARAQAB tD1TeWx2ZXN0cmUgTGVkcnUgLSBEZWJpYW4gTExWTSBwYWNrYWdlcyA8c3lsdmVz dHJlQGRlYmlhbi5vcmc+iQI4BBMBAgAiBQJRPZQsAhsDBgsJCAcDAgYVCAIJCgsE FgIDAQIeAQIXgAAKCRAVz00Yr090Ibx+EADArS/hvkDF8juWMXxh17CgR0WZlHCC 9CTBWkg5a0bNN/3bb97cPQt/vIKWjQtkQpav6/5JTVCSx2riL4FHYhH0iuo4iAPR udC7Cvg8g7bSPrKO6tenQZNvQm+tUmBHgFiMBJi92AjZ/Qn1Shg7p9ITivFxpLyX wpmnF1OKyI2Kof2rm4BFwfSWuf8Fvh7kDMRLHv+MlnK/7j/BNpKdozXxLcwoFBmn l0WjpAH3OFF7Pvm1LJdf1DjWKH0Dc3sc6zxtmBR/KHHg6kK4BGQNnFKujcP7TVdv gMYv84kun14pnwjZcqOtN3UJtcx22880DOQzinoMs3Q4w4o05oIF+sSgHViFpc3W R0v+RllnH05vKZo+LDzc83DQVrdwliV12eHxrMQ8UYg88zCbF/cHHnlzZWAJgftg hB08v1BKPgYRUzwJ6VdVqXYcZWEaUJmQAPuAALyZESw94hSo28FAn0/gzEc5uOYx K+xG/lFwgAGYNb3uGM5m0P6LVTfdg6vDwwOeTNIExVk3KVFXeSQef2ZMkhwA7wya KJptkb62wBHFE+o9TUdtMCY6qONxMMdwioRE5BYNwAsS1PnRD2+jtlI0DzvKHt7B MWd8hnoUKhMeZ9TNmo+8CpsAtXZcBho0zPGz/R8NlJhAWpdAZ1CmcPo83EW86Yq7 BxQUKnNHcwj2ebkCDQRRPZQsARAA4jxYmbTHwmMjqSizlMJYNuGOpIidEdx9zQ5g zOr431/VfWq4S+VhMDhs15j9lyml0y4ok215VRFwrAREDg6UPMr7ajLmBQGau0Fc bvZJ90l4NjXp5p0NEE/qOb9UEHT7EGkEhaZ1ekkWFTWCgsy7rRXfZLxB6sk7pzLC DshyW3zjIakWAnpQ5j5obiDy708pReAuGB94NSyb1HoW/xGsGgvvCw4r0w3xPStw F1PhmScE6NTBIfLliea3pl8vhKPlCh54Hk7I8QGjo1ETlRP4Qll1ZxHJ8u25f/ta RES2Aw8Hi7j0EVcZ6MT9JWTI83yUcnUlZPZS2HyeWcUj+8nUC8W4N8An+aNps9l/ 21inIl2TbGo3Yn1JQLnA1YCoGwC34g8QZTJhElEQBN0X29ayWW6OdFx8MDvllbBV ymmKq2lK1U55mQTfDli7S3vfGz9Gp/oQwZ8bQpOeUkc5hbZszYwP4RX+68xDPfn+ M9udl+qW9wu+LyePbW6HX90LmkhNkkY2ZzUPRPDHZANU5btaPXc2H7edX4y4maQa xenqD0lGh9LGz/mps4HEZtCI5CY8o0uCMF3lT0XfXhuLksr7Pxv57yue8LLTItOJ d9Hmzp9G97SRYYeqU+8lyNXtU2PdrLLq7QHkzrsloG78lCpQcalHGACJzrlUWVP/ fN3Ht3kAEQEAAYkCHwQYAQIACQUCUT2ULAIbDAAKCRAVz00Yr090IbhWEADbr50X OEXMIMGRLe+YMjeMX9NG4jxs0jZaWHc/WrGR+CCSUb9r6aPXeLo+45949uEfdSsB pbaEdNWxF5Vr1CSjuO5siIlgDjmT655voXo67xVpEN4HhMrxugDJfCa6z97P0+ML PdDxim57uNqkam9XIq9hKQaurxMAECDPmlEXI4QT3eu5qw5/knMzDMZj4Vi6hovL wvvAeLHO/jsyfIdNmhBGU2RWCEZ9uo/MeerPHtRPfg74g+9PPfP6nyHD2Wes6yGd oVQwtPNAQD6Cj7EaA2xdZYLJ7/jW6yiPu98FFWP74FN2dlyEA2uVziLsfBrgpS4l tVOlrO2YzkkqUGrybzbLpj6eeHx+Cd7wcjI8CalsqtL6cG8cUEjtWQUHyTbQWAgG 5VPEgIAVhJ6RTZ26i/G+4J8neKyRs4vz+57UGwY6zI4AB1ZcWGEE3Bf+CDEDgmnP LSwbnHefK9IljT9XU98PelSryUO/5UPw7leE0akXKB4DtekToO226px1VnGp3Bov 1GBGvpHvL2WizEwdk+nfk8LtrLzej+9FtIcq3uIrYnsac47Pf7p0otcFeTJTjSq3 krCaoG4Hx0zGQG2ZFpHrSrZTVy6lxvIdfi0beMgY6h78p6M9eYZHQHc02DjFkQXN bXb5c6gCHESH5PXwPU4jQEE7Ib9J6sbk7ZT2Mw== =j+4q -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: testdata/docker/llvm.list ================================================ # Taken from https://apt.llvm.org. deb http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm main deb-src http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm main deb http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-17 main deb-src http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-17 main deb http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-20 main deb-src http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-20 main ================================================ FILE: testdata/errors.c ================================================ #include "common.h" #include "../btf/testdata/bpf_core_read.h" struct nonexist { int non_exist; }; enum nonexist_enum { NON_EXIST = 1 }; // Force loading program with BTF by including a relocation for a local type. #define FORCE_BTF \ do { \ if (bpf_core_type_id_local(int) == 0) \ return __LINE__; \ } while (0) __section("socket") int poisoned_single() { FORCE_BTF; struct nonexist ne; return core_access(ne.non_exist); } __section("socket") int poisoned_double() { FORCE_BTF; return bpf_core_enum_value(enum nonexist_enum, NON_EXIST); } extern int invalid_kfunc(void) __ksym __weak; __section("socket") int poisoned_kfunc() { // NB: This doesn't go via CO-RE but uses a similar mechanism to generate // an invalid instruction. We test it here for convenience. return invalid_kfunc(); } ================================================ FILE: testdata/fentry_fexit.c ================================================ #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; __section("fentry/target") int trace_on_entry() { return 0; } __section("fexit/target") int trace_on_exit() { return 0; } __section("tc") int target() { return 0; } ================================================ FILE: testdata/freplace.c ================================================ // /* This file excercises freplace. */ #include "common.h" char __license[] __section("license") = "MIT"; struct bpf_args { uint64_t args[0]; }; __attribute__((noinline)) int subprog() { volatile int ret = 0; return ret; } __section("raw_tracepoint/sched_process_exec") int sched_process_exec(struct bpf_args *ctx) { return subprog(); } __section("freplace/subprog") int replacement() { return 0; } ================================================ FILE: testdata/fwd_decl.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" // Forward function declaration, never implemented. int fwd(); __section("socket") int call_fwd() { return fwd(); } ================================================ FILE: testdata/invalid-kfunc.c ================================================ #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; // This function declaration is incorrect on purpose. extern void bpf_kfunc_call_test_mem_len_pass1(void) __ksym; __section("tc") int call_kfunc() { bpf_kfunc_call_test_mem_len_pass1(); return 1; } ================================================ FILE: testdata/invalid_btf_map_init.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, uint64_t); __uint(max_entries, 1); } hash_map __section(".maps") = { /* This forces a non-zero byte into the .maps section. */ .key = (void *)1, }; ================================================ FILE: testdata/invalid_map.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" char __license[] __section("license") = "MIT"; struct { struct bpf_map_def def; uint32_t dummy; } invalid_map __section("maps") = { .def = { .type = BPF_MAP_TYPE_HASH, .key_size = 4, .value_size = 2, .max_entries = 1, }, .dummy = 1, }; ================================================ FILE: testdata/invalid_map_static.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" struct bpf_map_def dummy __section("maps") = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 1, .map_flags = 0, }; /* The static qualifier leads to clang not emitting a symbol. */ static struct bpf_map_def hash_map __section("maps") = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 1, .map_flags = 0, }; __section("xdp") int xdp_prog() { uint32_t key = 0; void *p = bpf_map_lookup_elem(&hash_map, &key); return !!p; } ================================================ FILE: testdata/iproute2_map_compat.c ================================================ /* This file excercises the ELF loader. It is not a valid BPF program. */ #include "common.h" #define PIN_GLOBAL_NS 2 // bpf_elf_map is a custom BPF map definition used by iproute2. // It contains the id, pinning, inner_id and inner_idx fields // in addition to the ones in struct bpf_map_def which is commonly // used in the kernel and libbpf. struct bpf_elf_map { unsigned int type; unsigned int size_key; unsigned int size_value; unsigned int max_elem; unsigned int flags; unsigned int id; unsigned int pinning; unsigned int inner_id; unsigned int inner_idx; }; struct bpf_elf_map hash_map __section("maps") = { .type = BPF_MAP_TYPE_HASH, .size_key = sizeof(uint32_t), .size_value = sizeof(uint64_t), .max_elem = 2, .pinning = PIN_GLOBAL_NS, }; ================================================ FILE: testdata/kconfig.c ================================================ #include "common.h" char __license[] __section("license") = "GPL-2.0"; /* Special cases requiring feature testing or vDSO magic. */ extern int LINUX_KERNEL_VERSION __kconfig; extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; /* Values pulled from /proc/kconfig. */ extern int CONFIG_HZ __kconfig; extern enum libbpf_tristate CONFIG_BPF_SYSCALL __kconfig; extern char CONFIG_DEFAULT_HOSTNAME[1] __kconfig; __section("socket") int kconfig() { if (LINUX_KERNEL_VERSION == 0) return __LINE__; if (LINUX_HAS_SYSCALL_WRAPPER == 0) return __LINE__; if (CONFIG_HZ == 0) return __LINE__; if (CONFIG_BPF_SYSCALL == TRI_NO) return __LINE__; if (CONFIG_DEFAULT_HOSTNAME[0] == 0) return __LINE__; return 0; } ================================================ FILE: testdata/kfunc-kmod.c ================================================ #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; extern void bpf_testmod_test_mod_kfunc(int) __ksym; __section("tc") int call_kfunc() { bpf_testmod_test_mod_kfunc(0); return 1; } ================================================ FILE: testdata/kfunc.c ================================================ #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; // CO-RE type compat checking doesn't allow matches between forward declarations // and structs so we can't use forward declarations. Empty structs work just fine. struct __sk_buff {}; struct nf_conn {}; struct bpf_sock_tuple {}; struct bpf_ct_opts {}; struct bpf_cpumask {}; extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, uint32_t, struct bpf_ct_opts *, uint32_t) __ksym; extern void bpf_ct_release(struct nf_conn *) __ksym; __section("tc") int call_kfunc(void *ctx) { char buf[1]; struct nf_conn *conn = bpf_skb_ct_lookup(ctx, (void *)buf, 0, (void *)buf, 0); if (conn) { bpf_ct_release(conn); } return 1; } extern int bpf_fentry_test1(int) __ksym; __section("fentry/bpf_fentry_test2") int benchmark() { // bpf_fentry_test1 is a valid kfunc but not allowed to be called from // TC context. We use this to avoid loading a gajillion programs into // the kernel when benchmarking the loader. return bpf_fentry_test1(0); } extern void invalid_kfunc(void) __ksym __weak; extern struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak; extern void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak; __section("tp_btf/task_newtask") int weak_kfunc_missing(void *ctx) { if (bpf_ksym_exists(invalid_kfunc)) { invalid_kfunc(); return 0; } return 1; } __section("tp_btf/task_newtask") int call_weak_kfunc(void *ctx) { if (bpf_ksym_exists(bpf_cpumask_create)) { struct bpf_cpumask *mask = bpf_cpumask_create(); if (mask) bpf_cpumask_release(mask); return 1; } return 0; } ================================================ FILE: testdata/ksym.c ================================================ #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 2); __type(key, uint32_t); __type(value, uint64_t); } array_map __section(".maps"); // Non-weak ksyms must be present in the kernel. extern void bpf_init __ksym; // Weak ksyms are potentially zero at runtime. extern void bpf_trace_run1 __ksym __weak; __section("socket") int ksym_test() { uint32_t i; uint64_t val; i = 0; val = (uint64_t)&bpf_init; bpf_map_update_elem(&array_map, &i, &val, 0); i = 1; val = (uint64_t)&bpf_trace_run1; bpf_map_update_elem(&array_map, &i, &val, 0); return 0; } extern void non_existing_symbol __ksym __weak; __section("socket") int ksym_missing_test() { if (&non_existing_symbol == 0) { return 1; } return 0; } ================================================ FILE: testdata/linked.h ================================================ #pragma once #include "common.h" /* When linking BTF map definitions, all maps must be compatible with each * other, otherwise bpftool throws an error. */ struct h32_btf { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, uint32_t); __uint(max_entries, 1); }; /* Legacy map definitions are appended like programs sections are, and can * win/lose based on linking order, even if they're completely different maps. * Test whether the expected candidate wins by configuring different maxentries. */ #define h32_legacy(MAX_ENTRIES) \ { \ .type = BPF_MAP_TYPE_HASH, \ .key_size = sizeof(int), \ .value_size = sizeof(int), \ .max_entries = MAX_ENTRIES, \ .map_flags = BPF_F_NO_PREALLOC, \ } ================================================ FILE: testdata/linked1.c ================================================ #include "common.h" #include "linked.h" // Weak in L1, strong in L2. __weak __section(".maps") struct h32_btf map_l1_w; // Strong in L1, weak in L2. __section(".maps") struct h32_btf map_l1_s; // Weak in both L1 and L2. __weak __section(".maps") struct h32_btf map_ww; // Strong in L1, only defined here. __section(".maps") struct h32_btf map_l1; // Strong in L1, weak in L2. __section("maps") struct bpf_map_def map_legacy_l1_s = h32_legacy(1); // Weak in L1, strong in L2. __weak __section("maps") struct bpf_map_def map_legacy_l2_s = h32_legacy(__LINE__); // Call external symbol only defined in L2. extern int l2(void); __section("socket") int entry_l2() { return l2(); } // Weak and only defined in L1, called extern in L2. __weak __noinline int l1() { return 0; } // Weak in L1, strong in L2. __weak __noinline int l1_w() { return __LINE__; } __weak __section("socket") int entry_l1_w() { return l1_w(); } // Strong in L1, weak in L2. __noinline int l1_s() { return 0; } __section("socket") int entry_l1_s() { return l1_s(); } // Weak in both L1 and L2. __weak __noinline int ww() { return 0; } __weak __section("socket") int entry_ww() { return ww(); } ================================================ FILE: testdata/linked2.c ================================================ #include "common.h" #include "linked.h" // Weak in L1, strong in L2. __section(".maps") struct h32_btf map_l1_w; // Strong in L1, weak in L2. __weak __section(".maps") struct h32_btf map_l1_s; // Weak in both L1 and L2. __weak __section(".maps") struct h32_btf map_ww; // Strong in L2, only defined here. __section(".maps") struct h32_btf map_l2; // Strong in L1, weak in L2. __weak __section("maps") struct bpf_map_def map_legacy_l1_s = h32_legacy(__LINE__); // Weak in L1, strong in L2. __section("maps") struct bpf_map_def map_legacy_l2_s = h32_legacy(1); // Call external symbol only defined in L1. extern int l1(void); __section("socket") int entry_l1() { return l1(); } // Weak and only defined in L2, called extern in L1. __weak __noinline int l2() { return 0; } // Weak in L1, strong in L2. __noinline int l1_w() { return 0; } __section("socket") int entry_l1_w() { return l1_w(); } // Strong in L1, weak in L2. __weak __noinline int l1_s() { return __LINE__; } __weak __section("socket") int entry_l1_s() { return l1_s(); } // Weak in both L1 and L2. __weak __noinline int ww() { return __LINE__; } __weak __section("socket") int entry_ww() { return ww(); } ================================================ FILE: testdata/loader.c ================================================ /* This file excercises the ELF loader. */ #include "common.h" char __license[] __section("license") = "MIT"; #ifdef __NOBTF__ #include "loader_nobtf.h" #else #include "loader.h" #endif static int __attribute__((noinline)) __section("static") static_fn(uint32_t arg) { return arg - 1; } int __attribute__((noinline)) global_fn2(uint32_t arg) { return arg + 2; } int __attribute__((noinline)) __section("other") global_fn3(uint32_t arg) { return arg + 1; } int __attribute__((noinline)) global_fn(uint32_t arg) { return static_fn(arg) + global_fn2(arg) + global_fn3(arg); } volatile unsigned int key1 = 0; // .bss volatile unsigned int key2 = 1; // .data volatile const unsigned int key3 = 2; // .rodata // .rodata volatile const uint32_t arg = 1; // custom .rodata section volatile const uint32_t arg2 __section(".rodata.test") = 2; // custom .data section volatile uint32_t arg3 __section(".data.test"); __section("xdp") int xdp_prog() { bpf_map_lookup_elem(&hash_map, (void *)&key1); bpf_map_lookup_elem(&hash_map2, (void *)&key2); bpf_map_lookup_elem(&hash_map2, (void *)&key3); return static_fn(arg) + global_fn(arg) + arg2 + arg3; } // This function has no relocations, and is thus parsed differently. __section("socket") int no_relocation() { return 0; } // Make sure we allow relocations generated by inline assembly. __section("socket/2") int asm_relocation() { int my_const; asm("%0 = MY_CONST ll" : "=r"(my_const)); return my_const; } volatile const unsigned int uneg = -1; volatile const int neg = -2; static volatile const unsigned int static_uneg = -3; static volatile const int static_neg = -4; __section("socket/3") int data_sections() { if (uneg != (unsigned int)-1) return __LINE__; if (neg != -2) return __LINE__; if (static_uneg != (unsigned int)-3) return __LINE__; if (static_neg != -4) return __LINE__; return 0; } /* * Up until LLVM 14, this program results in an .rodata.cst32 section * that is accessed by 'return values[i]'. For this section, no BTF is * emitted. 'values' cannot be rewritten, since there is no BTF info * describing the data section. */ __section("socket/4") int anon_const() { volatile int ctx = 0; // 32 bytes wide results in a .rodata.cst32 section. #define values (uint64_t[]){0x0, 0x1, 0x2, 0x3} int i; for (i = 0; i < 3; i++) { if (ctx == values[i]) { return values[i]; } } return 0; } ================================================ FILE: testdata/loader.h ================================================ /* BTF-style map definitions for loader.c */ #pragma once #include "common.h" struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, uint64_t); __uint(max_entries, 1); __uint(map_flags, BPF_F_NO_PREALLOC); } hash_map __section(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(uint32_t)); __uint(value_size, sizeof(uint64_t)); __uint(max_entries, 2); } hash_map2 __section(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, uint64_t); __uint(max_entries, 1); __uint(pinning, 1 /* LIBBPF_PIN_BY_NAME */); } btf_pin __section(".maps"); // Named map type definition, without structure variable declaration. struct inner_map_t { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, int); __uint(max_entries, 1); }; // Anonymous map type definition with structure variable declaration. struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(key_size, sizeof(uint32_t)); __uint(max_entries, 1); __array(values, struct inner_map_t); } btf_outer_map __section(".maps"); // Array of maps with anonymous inner struct. struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(key_size, sizeof(uint32_t)); __uint(max_entries, 1); __array( values, struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, uint32_t); __type(value, uint32_t); }); } btf_outer_map_anon __section(".maps"); struct perf_event { uint64_t foo; uint64_t bar; }; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 4096); __type(value, struct perf_event); } perf_event_array __section(".maps"); typedef struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(key_size, sizeof(uint32_t)); __uint(value_size, sizeof(uint64_t)); __uint(max_entries, 1); } array_map_t; // Map definition behind a typedef. array_map_t btf_typedef_map __section(".maps"); #define __decl_tags __attribute__((btf_decl_tag("a"), btf_decl_tag("b"))) // Legacy map definition decorated with decl tags. struct bpf_map_def bpf_decl_map __decl_tags __section("maps") = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 1, }; // BTF map definition decorated with decl tags. struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(key_size, sizeof(uint32_t)); __uint(value_size, sizeof(uint64_t)); __uint(max_entries, 1); } btf_decl_map __decl_tags __section(".maps"); ================================================ FILE: testdata/loader_nobtf.h ================================================ /* Legacy map definitions for loader.c (no BTF) */ #pragma once #include "common.h" struct bpf_map_def hash_map __section("maps") = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 1, .map_flags = BPF_F_NO_PREALLOC, }; struct bpf_map_def hash_map2 __section("maps") = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 2, }; // key_size and value_size always need to be 4 bytes and are automatically set // when the map is created if left at 0 in the ELF. Leave them at 0 for // consistency with the BTF map definitions, which specify key and value types, // causing sizes to be 0 in the MapSpec. This avoids special casing in tests. struct bpf_map_def perf_event_array __section("maps") = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .max_entries = 4096, }; ================================================ FILE: testdata/manyprogs.c ================================================ /* This file is used for benchmarking NewCollection(). */ #include "../btf/testdata/bpf_core_read.h" #include "common.h" char __license[] __section("license") = "Dual MIT/GPL"; struct bpf_map_def __section("maps") kprobe_map = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 128, }; #pragma clang attribute push(__attribute__((preserve_access_index)), apply_to = record) struct ns_common { unsigned int inum; }; struct mnt_namespace { struct ns_common ns; }; struct nsproxy { struct mnt_namespace *mnt_ns; }; struct task_struct { struct nsproxy *nsproxy; }; #pragma clang attribute pop static inline int impl() { uint64_t initval = 1, *valp; struct task_struct *task = (struct task_struct *)bpf_get_current_task(); uint32_t mntns = BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); valp = bpf_map_lookup_elem(&kprobe_map, &mntns); if (!valp) { bpf_map_update_elem(&kprobe_map, &mntns, &initval, 0); return 0; } __sync_fetch_and_add(valp, 1); return 0; } #define DEFINE_PROBE(i) \ __section("kprobe/sys_execvea" #i) int kprobe_execve##i() { \ return impl(); \ } DEFINE_PROBE(0); DEFINE_PROBE(1); DEFINE_PROBE(2); DEFINE_PROBE(3); DEFINE_PROBE(4); DEFINE_PROBE(5); DEFINE_PROBE(6); DEFINE_PROBE(7); DEFINE_PROBE(8); DEFINE_PROBE(9); DEFINE_PROBE(10); DEFINE_PROBE(11); DEFINE_PROBE(12); DEFINE_PROBE(13); DEFINE_PROBE(14); DEFINE_PROBE(15); DEFINE_PROBE(16); DEFINE_PROBE(17); DEFINE_PROBE(18); DEFINE_PROBE(19); DEFINE_PROBE(20); DEFINE_PROBE(21); DEFINE_PROBE(22); DEFINE_PROBE(23); DEFINE_PROBE(24); DEFINE_PROBE(25); DEFINE_PROBE(26); DEFINE_PROBE(27); DEFINE_PROBE(28); DEFINE_PROBE(29); ================================================ FILE: testdata/map_spin_lock.c ================================================ /* This file excercises bpf_spin_lock. */ #include "common.h" struct bpf_spin_lock { uint32_t val; }; struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, uint32_t); __type(value, struct hash_elem); __uint(max_entries, 2); } spin_lock_map __section(".maps"); ================================================ FILE: testdata/raw_tracepoint.c ================================================ /* This file excercises the ELF loader. */ #include "common.h" char __license[] __section("license") = "MIT"; struct bpf_args { uint64_t args[0]; }; __section("raw_tracepoint/sched_process_exec") int sched_process_exec(struct bpf_args *ctx) { return 0; } ================================================ FILE: testdata/strings.c ================================================ #include "common.h" char __license[] __section("license") = "MIT"; typedef char custkey[48]; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 2); __type(key, custkey); __type(value, uint32_t); } my_map __section(".maps"); #define KEY "This string is allocated in the string section\n" __section("xdp") int filter() { uint32_t *value = bpf_map_lookup_elem(&my_map, KEY); if (value) (*value)++; else { uint32_t newValue = 1; bpf_map_update_elem(&my_map, KEY, &newValue, 0); } return 2; } ================================================ FILE: testdata/struct_ops.c ================================================ #include "common.h" char _license[] __section("license") = "GPL"; struct bpf_testmod_ops { int (*test_1)(void); void (*test_2)(int, int); int data; }; __section("struct_ops/test_1") int test_1(void) { return 0; } __section(".struct_ops.link") struct bpf_testmod_ops testmod_ops = { .test_1 = (void *)test_1, .data = 0xdeadbeef, }; ================================================ FILE: testdata/subprog_reloc.c ================================================ /* This file excercises the ELF loader. */ #include "common.h" char __license[] __section("license") = "MIT"; struct bpf_map_def hash_map __section("maps") = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(uint32_t), .value_size = sizeof(uint64_t), .max_entries = 1, }; static int sub_prog() { uint32_t key = 0; uint64_t val = 42; bpf_map_update_elem(&hash_map, &key, &val, /* BPF_ANY */ 0); return 0; } __section("xdp") int fp_relocation() { uint32_t key = 0; uint64_t val = 1; bpf_map_update_elem(&hash_map, &key, &val, /* BPF_ANY */ 0); bpf_for_each_map_elem(&hash_map, sub_prog, (void *)0, 0); uint64_t *new_val = bpf_map_lookup_elem(&hash_map, &key); if (!new_val) { return -1; } return *new_val; } ================================================ FILE: testdata/variables.c ================================================ #include "common.h" // Should not appear in CollectionSpec.Variables. __hidden volatile uint32_t hidden; // Weak variables can be overridden by non-weak symbols when linking BPF // programs using bpftool. Make sure they appear in CollectionSpec.Variables. __weak volatile uint32_t weak __section(".data.weak"); // Ensure vars are referenced so they are not culled by the loader. __section("socket") int set_vars() { hidden = 0xbeef1; weak = 0xbeef2; return 0; } volatile uint32_t var_bss __section(".bss"); __section("socket") int get_bss() { return var_bss; } volatile uint32_t var_data __section(".data"); __section("socket") int get_data() { return var_data; } volatile const uint32_t var_rodata __section(".rodata"); __section("socket") int get_rodata() { return var_rodata; } struct var_struct_t { uint64_t a; uint64_t b; }; volatile struct var_struct_t var_struct __section(".data.struct"); __section("socket") int check_struct() { return var_struct.a == 0xa && var_struct.b == 0xb; } /* Padding before b and after 1-byte-aligned d. */ struct var_struct_pad_t { uint32_t a; uint64_t b; uint16_t c; uint8_t d[5]; uint64_t e; }; volatile struct var_struct_pad_t var_struct_pad __section(".data.struct"); __section("socket") int check_struct_pad() { return var_struct_pad.a == 0xa && var_struct_pad.b == 0xb && var_struct_pad.c == 0xc && var_struct_pad.d[0] == 0xd && var_struct_pad.e == 0xe; } // Variable aligned on page boundary to ensure all bytes in the mapping can be // accessed through the Variable API. volatile uint8_t var_array[8192] __section(".data.array"); __section("socket") int check_array() { return var_array[sizeof(var_array) - 1] == 0xff; } volatile uint32_t var_atomic __section(".data.atomic"); __section("socket") int add_atomic() { __sync_fetch_and_add(&var_atomic, 1); return 0; } ================================================ FILE: testdata/windows/LICENSE ================================================ MIT License Copyright (c) eBPF for Windows contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: types.go ================================================ package ebpf import ( "github.com/cilium/ebpf/internal/platform" "github.com/cilium/ebpf/internal/sys" ) //go:generate go tool stringer -output types_string.go -type=MapType,ProgramType,PinType // MapType indicates the type map structure // that will be initialized in the kernel. type MapType uint32 // All the various map types that can be created const ( UnspecifiedMap MapType = MapType(platform.LinuxTag | iota) // Hash is a hash map Hash // Array is an array map Array // ProgramArray - A program array map is a special kind of array map whose map // values contain only file descriptors referring to other eBPF // programs. Thus, both the key_size and value_size must be // exactly four bytes. This map is used in conjunction with the // TailCall helper. ProgramArray // PerfEventArray - A perf event array is used in conjunction with PerfEventRead // and PerfEventOutput calls, to read the raw bpf_perf_data from the registers. PerfEventArray // PerCPUHash - This data structure is useful for people who have high performance // network needs and can reconcile adds at the end of some cycle, so that // hashes can be lock free without the use of XAdd, which can be costly. PerCPUHash // PerCPUArray - This data structure is useful for people who have high performance // network needs and can reconcile adds at the end of some cycle, so that // hashes can be lock free without the use of XAdd, which can be costly. // Each CPU gets a copy of this hash, the contents of all of which can be reconciled // later. PerCPUArray // StackTrace - This holds whole user and kernel stack traces, it can be retrieved with // GetStackID StackTrace // CGroupArray - This is a very niche structure used to help SKBInCGroup determine // if an skb is from a socket belonging to a specific cgroup CGroupArray // LRUHash - This allows you to create a small hash structure that will purge the // least recently used items rather than throw an error when you run out of memory LRUHash // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, // it has more to do with including the CPU id with the LRU calculation so that if a // particular CPU is using a value over-and-over again, then it will be saved, but if // a value is being retrieved a lot but sparsely across CPUs it is not as important, basically // giving weight to CPU locality over overall usage. LRUCPUHash // LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful, // for storing things like IP addresses which can be bit masked allowing for keys of differing // values to refer to the same reference based on their masks. See wikipedia for more details. LPMTrie // ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps // itself. ArrayOfMaps // HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps // itself. HashOfMaps // DevMap - Specialized map to store references to network devices. DevMap // SockMap - Specialized map to store references to sockets. SockMap // CPUMap - Specialized map to store references to CPUs. CPUMap // XSKMap - Specialized map for XDP programs to store references to open sockets. XSKMap // SockHash - Specialized hash to store references to sockets. SockHash // CGroupStorage - Special map for CGroups. CGroupStorage // ReusePortSockArray - Specialized map to store references to sockets that can be reused. ReusePortSockArray // PerCPUCGroupStorage - Special per CPU map for CGroups. PerCPUCGroupStorage // Queue - FIFO storage for BPF programs. Queue // Stack - LIFO storage for BPF programs. Stack // SkStorage - Specialized map for local storage at SK for BPF programs. SkStorage // DevMapHash - Hash-based indexing scheme for references to network devices. DevMapHash // StructOpsMap - This map holds a kernel struct with its function pointer implemented in a BPF // program. StructOpsMap // RingBuf - Similar to PerfEventArray, but shared across all CPUs. RingBuf // InodeStorage - Specialized local storage map for inodes. InodeStorage // TaskStorage - Specialized local storage map for task_struct. TaskStorage // BloomFilter - Space-efficient data structure to quickly test whether an element exists in a set. BloomFilter // UserRingbuf - The reverse of RingBuf, used to send messages from user space to BPF programs. UserRingbuf // CgroupStorage - Store data keyed on a cgroup. If the cgroup disappears, the key is automatically removed. CgroupStorage // Arena - Sparse shared memory region between a BPF program and user space. Arena ) // Map types (Windows). const ( WindowsHash MapType = MapType(platform.WindowsTag | iota + 1) WindowsArray WindowsProgramArray WindowsPerCPUHash WindowsPerCPUArray WindowsHashOfMaps WindowsArrayOfMaps WindowsLRUHash WindowsLPMTrie WindowsQueue WindowsLRUCPUHash WindowsStack WindowsRingBuf ) // MapTypeForPlatform returns a platform specific map type. // // Use this if the library doesn't provide a constant yet. func MapTypeForPlatform(plat string, typ uint32) (MapType, error) { return platform.EncodeConstant[MapType](plat, typ) } // hasPerCPUValue returns true if the Map stores a value per CPU. func (mt MapType) hasPerCPUValue() bool { switch mt { case PerCPUHash, PerCPUArray, LRUCPUHash, PerCPUCGroupStorage: return true case WindowsPerCPUHash, WindowsPerCPUArray, WindowsLRUCPUHash: return true default: return false } } // canStoreMapOrProgram returns true if the Map stores references to another Map // or Program. func (mt MapType) canStoreMapOrProgram() bool { return mt.canStoreMap() || mt.canStoreProgram() || mt == StructOpsMap } // canStoreMap returns true if the map type accepts a map fd // for update and returns a map id for lookup. func (mt MapType) canStoreMap() bool { return mt == ArrayOfMaps || mt == HashOfMaps || mt == WindowsArrayOfMaps || mt == WindowsHashOfMaps } // canStoreProgram returns true if the map type accepts a program fd // for update and returns a program id for lookup. func (mt MapType) canStoreProgram() bool { return mt == ProgramArray || mt == WindowsProgramArray } // canHaveValueSize returns true if the map type supports setting a value size. func (mt MapType) canHaveValueSize() bool { switch mt { case RingBuf, Arena: return false // Special-case perf events since they require a value size of either 0 or 4 // for historical reasons. Let the library fix this up later. case PerfEventArray: return false } return true } // mustHaveNoPrealloc returns true if the map type does not support // preallocation and needs the BPF_F_NO_PREALLOC flag set to be created // successfully. func (mt MapType) mustHaveNoPrealloc() bool { switch mt { case CgroupStorage, InodeStorage, TaskStorage, SkStorage: return true case LPMTrie: return true } return false } // ProgramType of the eBPF program type ProgramType uint32 // eBPF program types (Linux). const ( UnspecifiedProgram = ProgramType(sys.BPF_PROG_TYPE_UNSPEC) SocketFilter = ProgramType(sys.BPF_PROG_TYPE_SOCKET_FILTER) Kprobe = ProgramType(sys.BPF_PROG_TYPE_KPROBE) SchedCLS = ProgramType(sys.BPF_PROG_TYPE_SCHED_CLS) SchedACT = ProgramType(sys.BPF_PROG_TYPE_SCHED_ACT) TracePoint = ProgramType(sys.BPF_PROG_TYPE_TRACEPOINT) XDP = ProgramType(sys.BPF_PROG_TYPE_XDP) PerfEvent = ProgramType(sys.BPF_PROG_TYPE_PERF_EVENT) CGroupSKB = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SKB) CGroupSock = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCK) LWTIn = ProgramType(sys.BPF_PROG_TYPE_LWT_IN) LWTOut = ProgramType(sys.BPF_PROG_TYPE_LWT_OUT) LWTXmit = ProgramType(sys.BPF_PROG_TYPE_LWT_XMIT) SockOps = ProgramType(sys.BPF_PROG_TYPE_SOCK_OPS) SkSKB = ProgramType(sys.BPF_PROG_TYPE_SK_SKB) CGroupDevice = ProgramType(sys.BPF_PROG_TYPE_CGROUP_DEVICE) SkMsg = ProgramType(sys.BPF_PROG_TYPE_SK_MSG) RawTracepoint = ProgramType(sys.BPF_PROG_TYPE_RAW_TRACEPOINT) CGroupSockAddr = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR) LWTSeg6Local = ProgramType(sys.BPF_PROG_TYPE_LWT_SEG6LOCAL) LircMode2 = ProgramType(sys.BPF_PROG_TYPE_LIRC_MODE2) SkReuseport = ProgramType(sys.BPF_PROG_TYPE_SK_REUSEPORT) FlowDissector = ProgramType(sys.BPF_PROG_TYPE_FLOW_DISSECTOR) CGroupSysctl = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SYSCTL) RawTracepointWritable = ProgramType(sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) CGroupSockopt = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCKOPT) Tracing = ProgramType(sys.BPF_PROG_TYPE_TRACING) StructOps = ProgramType(sys.BPF_PROG_TYPE_STRUCT_OPS) Extension = ProgramType(sys.BPF_PROG_TYPE_EXT) LSM = ProgramType(sys.BPF_PROG_TYPE_LSM) SkLookup = ProgramType(sys.BPF_PROG_TYPE_SK_LOOKUP) Syscall = ProgramType(sys.BPF_PROG_TYPE_SYSCALL) Netfilter = ProgramType(sys.BPF_PROG_TYPE_NETFILTER) ) // eBPF program types (Windows). // // See https://github.com/microsoft/ebpf-for-windows/blob/main/include/ebpf_structs.h#L170 const ( WindowsXDP ProgramType = ProgramType(platform.WindowsTag) | (iota + 1) WindowsBind WindowsCGroupSockAddr WindowsSockOps WindowsXDPTest ProgramType = ProgramType(platform.WindowsTag) | 998 WindowsSample ProgramType = ProgramType(platform.WindowsTag) | 999 ) // ProgramTypeForPlatform returns a platform specific program type. // // Use this if the library doesn't provide a constant yet. func ProgramTypeForPlatform(plat string, value uint32) (ProgramType, error) { return platform.EncodeConstant[ProgramType](plat, value) } // AttachType of the eBPF program, needed to differentiate allowed context accesses in // some newer program types like CGroupSockAddr. Should be set to AttachNone if not required. // Will cause invalid argument (EINVAL) at program load time if set incorrectly. type AttachType uint32 //go:generate go tool stringer -type AttachType -trimprefix Attach // AttachNone is an alias for AttachCGroupInetIngress for readability reasons. const AttachNone AttachType = 0 // Attach types (Linux). const ( AttachCGroupInetIngress = AttachType(sys.BPF_CGROUP_INET_INGRESS) AttachCGroupInetEgress = AttachType(sys.BPF_CGROUP_INET_EGRESS) AttachCGroupInetSockCreate = AttachType(sys.BPF_CGROUP_INET_SOCK_CREATE) AttachCGroupSockOps = AttachType(sys.BPF_CGROUP_SOCK_OPS) AttachSkSKBStreamParser = AttachType(sys.BPF_SK_SKB_STREAM_PARSER) AttachSkSKBStreamVerdict = AttachType(sys.BPF_SK_SKB_STREAM_VERDICT) AttachCGroupDevice = AttachType(sys.BPF_CGROUP_DEVICE) AttachSkMsgVerdict = AttachType(sys.BPF_SK_MSG_VERDICT) AttachCGroupInet4Bind = AttachType(sys.BPF_CGROUP_INET4_BIND) AttachCGroupInet6Bind = AttachType(sys.BPF_CGROUP_INET6_BIND) AttachCGroupInet4Connect = AttachType(sys.BPF_CGROUP_INET4_CONNECT) AttachCGroupInet6Connect = AttachType(sys.BPF_CGROUP_INET6_CONNECT) AttachCGroupInet4PostBind = AttachType(sys.BPF_CGROUP_INET4_POST_BIND) AttachCGroupInet6PostBind = AttachType(sys.BPF_CGROUP_INET6_POST_BIND) AttachCGroupUDP4Sendmsg = AttachType(sys.BPF_CGROUP_UDP4_SENDMSG) AttachCGroupUDP6Sendmsg = AttachType(sys.BPF_CGROUP_UDP6_SENDMSG) AttachLircMode2 = AttachType(sys.BPF_LIRC_MODE2) AttachFlowDissector = AttachType(sys.BPF_FLOW_DISSECTOR) AttachCGroupSysctl = AttachType(sys.BPF_CGROUP_SYSCTL) AttachCGroupUDP4Recvmsg = AttachType(sys.BPF_CGROUP_UDP4_RECVMSG) AttachCGroupUDP6Recvmsg = AttachType(sys.BPF_CGROUP_UDP6_RECVMSG) AttachCGroupGetsockopt = AttachType(sys.BPF_CGROUP_GETSOCKOPT) AttachCGroupSetsockopt = AttachType(sys.BPF_CGROUP_SETSOCKOPT) AttachTraceRawTp = AttachType(sys.BPF_TRACE_RAW_TP) AttachTraceFEntry = AttachType(sys.BPF_TRACE_FENTRY) AttachTraceFExit = AttachType(sys.BPF_TRACE_FEXIT) AttachModifyReturn = AttachType(sys.BPF_MODIFY_RETURN) AttachLSMMac = AttachType(sys.BPF_LSM_MAC) AttachTraceIter = AttachType(sys.BPF_TRACE_ITER) AttachCgroupInet4GetPeername = AttachType(sys.BPF_CGROUP_INET4_GETPEERNAME) AttachCgroupInet6GetPeername = AttachType(sys.BPF_CGROUP_INET6_GETPEERNAME) AttachCgroupInet4GetSockname = AttachType(sys.BPF_CGROUP_INET4_GETSOCKNAME) AttachCgroupInet6GetSockname = AttachType(sys.BPF_CGROUP_INET6_GETSOCKNAME) AttachXDPDevMap = AttachType(sys.BPF_XDP_DEVMAP) AttachCgroupInetSockRelease = AttachType(sys.BPF_CGROUP_INET_SOCK_RELEASE) AttachXDPCPUMap = AttachType(sys.BPF_XDP_CPUMAP) AttachSkLookup = AttachType(sys.BPF_SK_LOOKUP) AttachXDP = AttachType(sys.BPF_XDP) AttachSkSKBVerdict = AttachType(sys.BPF_SK_SKB_VERDICT) AttachSkReuseportSelect = AttachType(sys.BPF_SK_REUSEPORT_SELECT) AttachSkReuseportSelectOrMigrate = AttachType(sys.BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) AttachPerfEvent = AttachType(sys.BPF_PERF_EVENT) AttachTraceKprobeMulti = AttachType(sys.BPF_TRACE_KPROBE_MULTI) AttachTraceKprobeSession = AttachType(sys.BPF_TRACE_KPROBE_SESSION) AttachLSMCgroup = AttachType(sys.BPF_LSM_CGROUP) AttachStructOps = AttachType(sys.BPF_STRUCT_OPS) AttachNetfilter = AttachType(sys.BPF_NETFILTER) AttachTCXIngress = AttachType(sys.BPF_TCX_INGRESS) AttachTCXEgress = AttachType(sys.BPF_TCX_EGRESS) AttachTraceUprobeMulti = AttachType(sys.BPF_TRACE_UPROBE_MULTI) AttachCgroupUnixConnect = AttachType(sys.BPF_CGROUP_UNIX_CONNECT) AttachCgroupUnixSendmsg = AttachType(sys.BPF_CGROUP_UNIX_SENDMSG) AttachCgroupUnixRecvmsg = AttachType(sys.BPF_CGROUP_UNIX_RECVMSG) AttachCgroupUnixGetpeername = AttachType(sys.BPF_CGROUP_UNIX_GETPEERNAME) AttachCgroupUnixGetsockname = AttachType(sys.BPF_CGROUP_UNIX_GETSOCKNAME) AttachNetkitPrimary = AttachType(sys.BPF_NETKIT_PRIMARY) AttachNetkitPeer = AttachType(sys.BPF_NETKIT_PEER) ) // Attach types (Windows). // // See https://github.com/microsoft/ebpf-for-windows/blob/main/include/ebpf_structs.h#L260 const ( AttachWindowsXDP = AttachType(platform.WindowsTag | iota + 1) AttachWindowsBind AttachWindowsCGroupInet4Connect AttachWindowsCGroupInet6Connect AttachWindowsCgroupInet4RecvAccept AttachWindowsCgroupInet6RecvAccept AttachWindowsCGroupSockOps AttachWindowsSample AttachWindowsXDPTest ) // AttachTypeForPlatform returns a platform specific attach type. // // Use this if the library doesn't provide a constant yet. func AttachTypeForPlatform(plat string, value uint32) (AttachType, error) { return platform.EncodeConstant[AttachType](plat, value) } // AttachFlags of the eBPF program used in BPF_PROG_ATTACH command type AttachFlags uint32 // PinType determines whether a map is pinned into a BPFFS. type PinType uint32 // Valid pin types. // // Mirrors enum libbpf_pin_type. const ( PinNone PinType = iota // Pin an object by using its name as the filename. PinByName ) // LoadPinOptions control how a pinned object is loaded. type LoadPinOptions struct { // Request a read-only or write-only object. The default is a read-write // object. Only one of the flags may be set. ReadOnly bool WriteOnly bool // Raw flags for the syscall. Other fields of this struct take precedence. Flags uint32 } // Marshal returns a value suitable for BPF_OBJ_GET syscall file_flags parameter. func (lpo *LoadPinOptions) Marshal() uint32 { if lpo == nil { return 0 } flags := lpo.Flags if lpo.ReadOnly { flags |= sys.BPF_F_RDONLY } if lpo.WriteOnly { flags |= sys.BPF_F_WRONLY } return flags } // BatchOptions batch map operations options // // Mirrors libbpf struct bpf_map_batch_opts // Currently BPF_F_FLAG is the only supported // flag (for ElemFlags). type BatchOptions struct { ElemFlags uint64 Flags uint64 } // LogLevel controls the verbosity of the kernel's eBPF program verifier. // These constants can be used for the ProgramOptions.LogLevel field. type LogLevel = sys.LogLevel const ( // Print verifier state at branch points. LogLevelBranch = sys.BPF_LOG_LEVEL1 // Print verifier state for every instruction. // Available since Linux v5.2. LogLevelInstruction = sys.BPF_LOG_LEVEL2 // Print verifier errors and stats at the end of the verification process. // Available since Linux v5.2. LogLevelStats = sys.BPF_LOG_STATS ) ================================================ FILE: types_string.go ================================================ // Code generated by "stringer -output types_string.go -type=MapType,ProgramType,PinType"; DO NOT EDIT. package ebpf import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[UnspecifiedMap-0] _ = x[Hash-1] _ = x[Array-2] _ = x[ProgramArray-3] _ = x[PerfEventArray-4] _ = x[PerCPUHash-5] _ = x[PerCPUArray-6] _ = x[StackTrace-7] _ = x[CGroupArray-8] _ = x[LRUHash-9] _ = x[LRUCPUHash-10] _ = x[LPMTrie-11] _ = x[ArrayOfMaps-12] _ = x[HashOfMaps-13] _ = x[DevMap-14] _ = x[SockMap-15] _ = x[CPUMap-16] _ = x[XSKMap-17] _ = x[SockHash-18] _ = x[CGroupStorage-19] _ = x[ReusePortSockArray-20] _ = x[PerCPUCGroupStorage-21] _ = x[Queue-22] _ = x[Stack-23] _ = x[SkStorage-24] _ = x[DevMapHash-25] _ = x[StructOpsMap-26] _ = x[RingBuf-27] _ = x[InodeStorage-28] _ = x[TaskStorage-29] _ = x[BloomFilter-30] _ = x[UserRingbuf-31] _ = x[CgroupStorage-32] _ = x[Arena-33] _ = x[WindowsHash-268435457] _ = x[WindowsArray-268435458] _ = x[WindowsProgramArray-268435459] _ = x[WindowsPerCPUHash-268435460] _ = x[WindowsPerCPUArray-268435461] _ = x[WindowsHashOfMaps-268435462] _ = x[WindowsArrayOfMaps-268435463] _ = x[WindowsLRUHash-268435464] _ = x[WindowsLPMTrie-268435465] _ = x[WindowsQueue-268435466] _ = x[WindowsLRUCPUHash-268435467] _ = x[WindowsStack-268435468] _ = x[WindowsRingBuf-268435469] } const ( _MapType_name_0 = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStorageBloomFilterUserRingbufCgroupStorageArena" _MapType_name_1 = "WindowsHashWindowsArrayWindowsProgramArrayWindowsPerCPUHashWindowsPerCPUArrayWindowsHashOfMapsWindowsArrayOfMapsWindowsLRUHashWindowsLPMTrieWindowsQueueWindowsLRUCPUHashWindowsStackWindowsRingBuf" ) var ( _MapType_index_0 = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290, 301, 312, 325, 330} _MapType_index_1 = [...]uint8{0, 11, 23, 42, 59, 77, 94, 112, 126, 140, 152, 169, 181, 195} ) func (i MapType) String() string { switch { case i <= 33: return _MapType_name_0[_MapType_index_0[i]:_MapType_index_0[i+1]] case 268435457 <= i && i <= 268435469: i -= 268435457 return _MapType_name_1[_MapType_index_1[i]:_MapType_index_1[i+1]] default: return "MapType(" + strconv.FormatInt(int64(i), 10) + ")" } } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[UnspecifiedProgram-0] _ = x[SocketFilter-1] _ = x[Kprobe-2] _ = x[SchedCLS-3] _ = x[SchedACT-4] _ = x[TracePoint-5] _ = x[XDP-6] _ = x[PerfEvent-7] _ = x[CGroupSKB-8] _ = x[CGroupSock-9] _ = x[LWTIn-10] _ = x[LWTOut-11] _ = x[LWTXmit-12] _ = x[SockOps-13] _ = x[SkSKB-14] _ = x[CGroupDevice-15] _ = x[SkMsg-16] _ = x[RawTracepoint-17] _ = x[CGroupSockAddr-18] _ = x[LWTSeg6Local-19] _ = x[LircMode2-20] _ = x[SkReuseport-21] _ = x[FlowDissector-22] _ = x[CGroupSysctl-23] _ = x[RawTracepointWritable-24] _ = x[CGroupSockopt-25] _ = x[Tracing-26] _ = x[StructOps-27] _ = x[Extension-28] _ = x[LSM-29] _ = x[SkLookup-30] _ = x[Syscall-31] _ = x[Netfilter-32] _ = x[WindowsXDP-268435457] _ = x[WindowsBind-268435458] _ = x[WindowsCGroupSockAddr-268435459] _ = x[WindowsSockOps-268435460] _ = x[WindowsXDPTest-268436454] _ = x[WindowsSample-268436455] } const ( _ProgramType_name_0 = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallNetfilter" _ProgramType_name_1 = "WindowsXDPWindowsBindWindowsCGroupSockAddrWindowsSockOps" _ProgramType_name_2 = "WindowsXDPTestWindowsSample" ) var ( _ProgramType_index_0 = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 310} _ProgramType_index_1 = [...]uint8{0, 10, 21, 42, 56} _ProgramType_index_2 = [...]uint8{0, 14, 27} ) func (i ProgramType) String() string { switch { case i <= 32: return _ProgramType_name_0[_ProgramType_index_0[i]:_ProgramType_index_0[i+1]] case 268435457 <= i && i <= 268435460: i -= 268435457 return _ProgramType_name_1[_ProgramType_index_1[i]:_ProgramType_index_1[i+1]] case 268436454 <= i && i <= 268436455: i -= 268436454 return _ProgramType_name_2[_ProgramType_index_2[i]:_ProgramType_index_2[i+1]] default: return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")" } } func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[PinNone-0] _ = x[PinByName-1] } const _PinType_name = "PinNonePinByName" var _PinType_index = [...]uint8{0, 7, 16} func (i PinType) String() string { idx := int(i) - 0 if i < 0 || idx >= len(_PinType_index)-1 { return "PinType(" + strconv.FormatInt(int64(i), 10) + ")" } return _PinType_name[_PinType_index[idx]:_PinType_index[idx+1]] } ================================================ FILE: types_windows.go ================================================ package ebpf import ( "fmt" "os" "golang.org/x/sys/windows" "github.com/cilium/ebpf/internal/efw" "github.com/cilium/ebpf/internal/platform" ) // WindowsProgramTypeForGUID resolves a GUID to a ProgramType. // // The GUID must be in the form of "{XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}". // // Returns an error wrapping [os.ErrNotExist] if the GUID is not recignized. func WindowsProgramTypeForGUID(guid string) (ProgramType, error) { progTypeGUID, err := windows.GUIDFromString(guid) if err != nil { return 0, fmt.Errorf("parse GUID: %w", err) } rawProgramType, err := efw.EbpfGetBpfProgramType(progTypeGUID) if err != nil { return 0, fmt.Errorf("get program type: %w", err) } if rawProgramType == 0 { return 0, fmt.Errorf("program type not found for GUID %v: %w", guid, os.ErrNotExist) } return ProgramTypeForPlatform(platform.Windows, rawProgramType) } // WindowsAttachTypeForGUID resolves a GUID to an AttachType. // // The GUID must be in the form of "{XXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}". // // Returns an error wrapping [os.ErrNotExist] if the GUID is not recignized. func WindowsAttachTypeForGUID(guid string) (AttachType, error) { attachTypeGUID, err := windows.GUIDFromString(guid) if err != nil { return 0, fmt.Errorf("parse GUID: %w", err) } rawAttachType, err := efw.EbpfGetBpfAttachType(attachTypeGUID) if err != nil { return 0, fmt.Errorf("get attach type: %w", err) } if rawAttachType == 0 { return 0, fmt.Errorf("attach type not found for GUID %v: %w", attachTypeGUID, os.ErrNotExist) } return AttachTypeForPlatform(platform.Windows, rawAttachType) } ================================================ FILE: types_windows_test.go ================================================ package ebpf import ( "os" "testing" "github.com/go-quicktest/qt" "golang.org/x/sys/windows" ) func TestWindowsProgramTypeForGUID(t *testing.T) { sampleGUID := windows.GUID{ Data1: 0xf788ef4a, Data2: 0x207d, Data3: 0x4dc3, Data4: [...]byte{0x85, 0xcf, 0x0f, 0x2e, 0xa1, 0x07, 0x21, 0x3c}, } _, err := WindowsProgramTypeForGUID("{00000000-0000-0000-0000-000000000001}") qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) programType, err := WindowsProgramTypeForGUID(sampleGUID.String()) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(WindowsSample, programType)) } func TestWindowsAttachTypeForGUID(t *testing.T) { sampleGUID := windows.GUID{ Data1: 0xf788ef4b, Data2: 0x207d, Data3: 0x4dc3, Data4: [...]byte{0x85, 0xcf, 0x0f, 0x2e, 0xa1, 0x07, 0x21, 0x3c}, } _, err := WindowsAttachTypeForGUID("{00000000-0000-0000-0000-000000000001}") qt.Assert(t, qt.ErrorIs(err, os.ErrNotExist)) attachType, err := WindowsAttachTypeForGUID(sampleGUID.String()) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(AttachWindowsSample, attachType)) } ================================================ FILE: variable.go ================================================ package ebpf import ( "encoding/binary" "fmt" "io" "reflect" "slices" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sysenc" ) // VariableSpec is a convenience wrapper for modifying global variables of a // CollectionSpec before loading it into the kernel. // // All operations on a VariableSpec's underlying MapSpec are performed in the // host's native endianness. type VariableSpec struct { Name string // Name of the section this variable was allocated in. SectionName string // Offset of the variable within the datasec. Offset uint32 // Byte representation of the variable's value. Value []byte // Type information of the variable. Optional. Type *btf.Var } // Set sets the value of the VariableSpec to the provided input using the host's // native endianness. func (s *VariableSpec) Set(in any) error { size := int(s.Size()) if size == 0 { bs := binary.Size(in) if bs < 0 { return fmt.Errorf("cannot determine binary size of value %v", in) } size = bs } if s.Value == nil { s.Value = make([]byte, size) } buf, err := sysenc.Marshal(in, size) if err != nil { return fmt.Errorf("marshaling value %s: %w", s.Name, err) } buf.CopyTo(s.Value) return nil } // Get writes the value of the VariableSpec to the provided output using the // host's native endianness. // // Returns an error if the variable is not initialized or if the unmarshaling fails. func (s *VariableSpec) Get(out any) error { if s.Value == nil { return fmt.Errorf("variable is not initialized") } if err := sysenc.Unmarshal(out, s.Value); err != nil { return fmt.Errorf("unmarshaling value: %w", err) } return nil } // Size returns the size of the variable in bytes. func (s *VariableSpec) Size() uint32 { if s.Value != nil { return uint32(len(s.Value)) } if s.Type != nil { size, err := btf.Sizeof(s.Type.Type) if err != nil { return 0 } return uint32(size) } return 0 } // Constant returns true if the variable is located in a data section intended // for constant values. func (s *VariableSpec) Constant() bool { return isConstantDataSection(s.SectionName) } func (s *VariableSpec) String() string { return fmt.Sprintf("%s (type=%v, section=%s, offset=%d, size=%d)", s.Name, s.Type, s.SectionName, s.Offset, s.Size()) } // Copy the VariableSpec. func (s *VariableSpec) Copy() *VariableSpec { cpy := *s cpy.Value = slices.Clone(s.Value) if s.Type != nil { cpy.Type = btf.Copy(s.Type).(*btf.Var) } return &cpy } // Variable is a convenience wrapper for modifying global variables of a // Collection after loading it into the kernel. Operations on a Variable are // performed using direct memory access, bypassing the BPF map syscall API. // // On kernels older than 5.5, most interactions with Variable return // [ErrNotSupported]. type Variable struct { name string offset uint32 size uint32 t *btf.Var mm *Memory } func newVariable(name string, offset, size uint32, t *btf.Var, mm *Memory) (*Variable, error) { if mm != nil { if offset+size > mm.Size() { return nil, fmt.Errorf("offset %d(+%d) is out of bounds", offset, size) } } return &Variable{ name: name, offset: offset, size: size, t: t, mm: mm, }, nil } // Size returns the size of the variable. func (v *Variable) Size() uint32 { return v.size } // ReadOnly returns true if the Variable represents a variable that is read-only // after loading the Collection into the kernel. // // On systems without BPF_F_MMAPABLE support, ReadOnly always returns true. func (v *Variable) ReadOnly() bool { if v.mm == nil { return true } return v.mm.ReadOnly() } // Type returns the [btf.Var] representing the variable in its data section. // This is useful for inspecting the variable's decl tags and the type // information of the inner type. // // Returns nil if the original ELF object did not contain BTF information. func (v *Variable) Type() *btf.Var { return v.t } func (v *Variable) String() string { return fmt.Sprintf("%s (type=%v)", v.name, v.t) } // Set the value of the Variable to the provided input. The input must marshal // to the same length as the size of the Variable. func (v *Variable) Set(in any) error { if v.mm == nil { return fmt.Errorf("variable %s: direct access requires Linux 5.5 or later: %w", v.name, ErrNotSupported) } if v.ReadOnly() { return fmt.Errorf("variable %s: %w", v.name, ErrReadOnly) } if !v.mm.bounds(v.offset, v.size) { return fmt.Errorf("variable %s: access out of bounds: %w", v.name, io.EOF) } buf, err := sysenc.Marshal(in, int(v.size)) if err != nil { return fmt.Errorf("marshaling value %s: %w", v.name, err) } if _, err := v.mm.WriteAt(buf.Bytes(), int64(v.offset)); err != nil { return fmt.Errorf("writing value to %s: %w", v.name, err) } return nil } // Get writes the value of the Variable to the provided output. The output must // be a pointer to a value whose size matches the Variable. func (v *Variable) Get(out any) error { if v.mm == nil { return fmt.Errorf("variable %s: direct access requires Linux 5.5 or later: %w", v.name, ErrNotSupported) } if !v.mm.bounds(v.offset, v.size) { return fmt.Errorf("variable %s: access out of bounds: %w", v.name, io.EOF) } if err := sysenc.Unmarshal(out, v.mm.b[v.offset:v.offset+v.size]); err != nil { return fmt.Errorf("unmarshaling value %s: %w", v.name, err) } return nil } func checkVariable[T any](v *Variable) error { if v.ReadOnly() { return ErrReadOnly } t := reflect.TypeFor[T]() if t.Kind() == reflect.Uintptr && v.size == 8 { // uintptr is 8 bytes on 64-bit and 4 on 32-bit. In BPF/BTF, pointers are // always 8 bytes. For the sake of portability, allow accessing 8-byte BPF // variables as uintptr on 32-bit systems, since the upper 32 bits of the // pointer should be zero anyway. return nil } if uintptr(v.size) != t.Size() { return fmt.Errorf("can't create %d-byte accessor to %d-byte variable: %w", t.Size(), v.size, ErrInvalidType) } return nil } // VariablePointer returns a pointer to a variable of type T backed by memory // shared with the BPF program. Requires building the Go application with -tags // ebpf_unsafe_memory_experiment. // // T must contain only fixed-size, non-Go-pointer types: bools, floats, // (u)int[8-64], arrays, and structs containing them. Structs must embed // [structs.HostLayout]. [ErrInvalidType] is returned if T is not a valid type. func VariablePointer[T comparable](v *Variable) (*T, error) { if err := checkVariable[T](v); err != nil { return nil, fmt.Errorf("variable pointer %s: %w", v.name, err) } return memoryPointer[T](v.mm, v.offset) } ================================================ FILE: variable_test.go ================================================ package ebpf import ( "encoding/binary" "runtime" "structs" "sync/atomic" "testing" "time" "unsafe" "github.com/go-quicktest/qt" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" ) func TestVariableSpec(t *testing.T) { file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNil(spec.Variables["hidden"])) qt.Assert(t, qt.IsNotNil(spec.Variables["weak"])) const want uint32 = 12345 // Update a variable in each type of data section (.bss,.data,.rodata) qt.Assert(t, qt.IsNil(spec.Variables["var_bss"].Set(want))) qt.Assert(t, qt.IsNil(spec.Variables["var_data"].Set(want))) qt.Assert(t, qt.IsNil(spec.Variables["var_rodata"].Set(want))) var v uint32 qt.Assert(t, qt.IsNil(spec.Variables["var_bss"].Get(&v))) qt.Assert(t, qt.Equals(v, want)) qt.Assert(t, qt.IsNil(spec.Variables["var_data"].Get(&v))) qt.Assert(t, qt.Equals(v, want)) qt.Assert(t, qt.IsNil(spec.Variables["var_rodata"].Get(&v))) qt.Assert(t, qt.Equals(v, want)) // Composite values. type structT struct { A, B uint64 } qt.Assert(t, qt.IsNil(spec.Variables["var_struct"].Set(&structT{1, 2}))) var s structT qt.Assert(t, qt.IsNil(spec.Variables["var_struct"].Get(&s))) qt.Assert(t, qt.Equals(s, structT{1, 2})) } func TestVariableSpecCopy(t *testing.T) { file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) if err != nil { t.Fatal(err) } cpy := spec.Copy() // Update a variable in a section with only a single variable (.rodata). const want uint32 = 0xfefefefe wantb := []byte{0xfe, 0xfe, 0xfe, 0xfe} // Same byte sequence regardless of endianness qt.Assert(t, qt.IsNil(cpy.Variables["var_rodata"].Set(want))) qt.Assert(t, qt.DeepEquals(cpy.Variables["var_rodata"].Value, wantb)) // Verify that the original underlying MapSpec was not modified. zero := make([]byte, 4) qt.Assert(t, qt.DeepEquals(spec.Maps[".rodata"].Contents[0].Value.([]byte), zero)) // Check that modifications to the VariableSpec's Type don't affect the // underlying MapSpec's type information on either the original or the copy. cpy.Variables["var_rodata"].Type.Name = "modified" spec.Variables["var_rodata"].Type.Name = "modified" qt.Assert(t, qt.Equals(cpy.Maps[".rodata"].Value.(*btf.Datasec).Vars[0].Type.(*btf.Var).Name, "var_rodata")) qt.Assert(t, qt.Equals(spec.Maps[".rodata"].Value.(*btf.Datasec).Vars[0].Type.(*btf.Var).Name, "var_rodata")) } func TestVariableSpecEmptyValue(t *testing.T) { spec := &VariableSpec{ Type: &btf.Var{ Type: &btf.Int{ Size: 4, }, }, } value := uint32(0x12345678) raw, err := binary.Append(nil, internal.NativeEndian, value) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.IsNotNil(spec.Get(new(uint32)))) qt.Assert(t, qt.IsNotNil(spec.Set(uint64(0))), qt.Commentf("Setting a value of incorrect size should fail")) qt.Assert(t, qt.IsNil(spec.Set(value))) qt.Assert(t, qt.DeepEquals(spec.Value, raw)) spec.Value = nil spec.Type = nil qt.Assert(t, qt.IsNil(spec.Set(uint64(0))), qt.Commentf("Setting an empty value without a type should accept any type")) qt.Assert(t, qt.HasLen(spec.Value, 8)) } func mustReturn(tb testing.TB, prog *Program, value uint32) { tb.Helper() ret, _, err := prog.Test(internal.EmptyBPFContext) qt.Assert(tb, qt.IsNil(err)) qt.Assert(tb, qt.Equals(ret, value)) } func TestVariable(t *testing.T) { testutils.SkipIfNotSupported(t, haveMmapableMaps()) file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) obj := struct { GetBSS *Program `ebpf:"get_bss"` GetData *Program `ebpf:"get_data"` CheckStruct *Program `ebpf:"check_struct"` BSS *Variable `ebpf:"var_bss"` Data *Variable `ebpf:"var_data"` Struct *Variable `ebpf:"var_struct"` Array *Variable `ebpf:"var_array"` }{} qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) t.Cleanup(func() { obj.GetBSS.Close() obj.GetData.Close() obj.CheckStruct.Close() }) mustReturn(t, obj.GetBSS, 0) mustReturn(t, obj.GetData, 0) mustReturn(t, obj.CheckStruct, 0) want := uint32(4242424242) qt.Assert(t, qt.IsNil(obj.BSS.Set(want))) mustReturn(t, obj.GetBSS, want) qt.Assert(t, qt.IsNil(obj.Data.Set(want))) mustReturn(t, obj.GetData, want) qt.Assert(t, qt.IsNil(obj.Struct.Set(&struct{ A, B uint64 }{0xa, 0xb}))) mustReturn(t, obj.CheckStruct, 1) // Ensure page-aligned array variable can be accessed in its entirety. arr := make([]byte, obj.Array.Size()) qt.Assert(t, qt.IsNil(obj.Array.Get(arr))) qt.Assert(t, qt.IsNil(obj.Array.Set(arr))) typ := obj.BSS.Type() qt.Assert(t, qt.IsNotNil(typ)) i, ok := btf.As[*btf.Int](typ.Type) qt.Assert(t, qt.IsTrue(ok)) qt.Assert(t, qt.Equals(i.Size, 4)) qt.Assert(t, qt.IsNotNil(obj.Data.Type())) qt.Assert(t, qt.IsNotNil(obj.Struct.Type())) } func TestVariableConst(t *testing.T) { testutils.SkipIfNotSupported(t, haveMmapableMaps()) file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) want := uint32(12345) qt.Assert(t, qt.IsNil(spec.Variables["var_rodata"].Set(want))) obj := struct { GetRodata *Program `ebpf:"get_rodata"` Rodata *Variable `ebpf:"var_rodata"` }{} qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) t.Cleanup(func() { obj.GetRodata.Close() }) var got uint32 qt.Assert(t, qt.IsNil(obj.Rodata.Get(&got))) qt.Assert(t, qt.Equals(got, want)) mustReturn(t, obj.GetRodata, want) qt.Assert(t, qt.IsTrue(obj.Rodata.ReadOnly())) qt.Assert(t, qt.ErrorIs(obj.Rodata.Set(want), ErrReadOnly)) } func TestVariableFallback(t *testing.T) { // LoadAndAssign should work on Variable regardless of BPF_F_MMAPABLE support. file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) obj := struct { Data *Variable `ebpf:"var_data"` }{} mustLoadAndAssign(t, spec, &obj, nil) // Expect either success or ErrNotSupported on all systems. u32 := uint32(0) if err := obj.Data.Get(&u32); err != nil { qt.Assert(t, qt.ErrorIs(err, ErrNotSupported)) } if err := obj.Data.Set(&u32); err != nil { qt.Assert(t, qt.ErrorIs(err, ErrNotSupported)) } } func TestVariablePointer(t *testing.T) { testutils.SkipIfNotSupported(t, haveMmapableMaps()) file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) obj := struct { AddAtomic *Program `ebpf:"add_atomic"` CheckStructPad *Program `ebpf:"check_struct_pad"` CheckArray *Program `ebpf:"check_array"` Atomic *Variable `ebpf:"var_atomic"` StructPad *Variable `ebpf:"var_struct_pad"` Array *Variable `ebpf:"var_array"` }{} unsafeMemory = true t.Cleanup(func() { unsafeMemory = false }) qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) t.Cleanup(func() { obj.AddAtomic.Close() obj.CheckStructPad.Close() obj.CheckArray.Close() }) // Bump the value by 1 using a bpf program. want := uint32(1338) a32, err := VariablePointer[atomic.Uint32](obj.Atomic) qt.Assert(t, qt.IsNil(err)) a32.Store(want - 1) mustReturn(t, obj.AddAtomic, 0) qt.Assert(t, qt.Equals(a32.Load(), want)) _, err = VariablePointer[*uint32](obj.Atomic) qt.Assert(t, qt.ErrorIs(err, ErrInvalidType)) _, err = VariablePointer[struct{ _ *uint64 }](obj.StructPad) qt.Assert(t, qt.ErrorIs(err, ErrInvalidType)) type S struct { _ structs.HostLayout A uint32 B uint64 C uint16 D [5]byte E uint64 } s, err := VariablePointer[S](obj.StructPad) qt.Assert(t, qt.IsNil(err)) *s = S{A: 0xa, B: 0xb, C: 0xc, D: [5]byte{0xd, 0, 0, 0, 0}, E: 0xe} mustReturn(t, obj.CheckStructPad, 1) a, err := VariablePointer[[8192]byte](obj.Array) qt.Assert(t, qt.IsNil(err)) a[len(a)-1] = 0xff mustReturn(t, obj.CheckArray, 1) } func TestVariablePointerError(t *testing.T) { testutils.SkipIfNotSupported(t, haveMmapableMaps()) file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) obj := struct { Atomic *Variable `ebpf:"var_atomic"` }{} qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) _, err = VariablePointer[atomic.Uint32](obj.Atomic) qt.Assert(t, qt.ErrorIs(err, ErrNotSupported)) } func TestVariablePointerGC(t *testing.T) { testutils.SkipIfNotSupported(t, haveMmapableMaps()) file := testutils.NativeFile(t, "testdata/variables-%s.elf") spec, err := LoadCollectionSpec(file) qt.Assert(t, qt.IsNil(err)) cancel := make(chan struct{}) type obj_s struct { AddAtomic *Program `ebpf:"add_atomic"` Atomic *Variable `ebpf:"var_atomic"` AtomicMap *Map `ebpf:".data.atomic"` } unsafeMemory = true t.Cleanup(func() { unsafeMemory = false }) var obj obj_s qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) // Set cleanup on obj to get notified when it is collected. ogc := make(chan struct{}) runtime.AddCleanup(&obj, func(*byte) { close(ogc) }, nil) mem, err := obj.AtomicMap.unsafeMemory() qt.Assert(t, qt.IsNil(err)) obj.AtomicMap.Close() // Start a goroutine that panics if the finalizer runs before we expect it to. mgc := make(chan struct{}) go func() { select { case <-mgc: panic("memory cleanup ran unexpectedly") case <-cancel: return } }() // Set cleanup on the Memory's backing array to get notified when it is // collected. runtime.AddCleanup(unsafe.SliceData(mem.b), func(*byte) { close(mgc) }, nil) // Pull out Program handle and Variable pointer so reference to obj is // dropped. prog := obj.AddAtomic t.Cleanup(func() { prog.Close() }) a32, err := VariablePointer[atomic.Uint32](obj.Atomic) qt.Assert(t, qt.IsNil(err)) // No references to obj past this point. Trigger GC and wait for the obj // finalizer to complete. runtime.GC() testutils.WaitChan(t, ogc, time.Second) // Trigger prog and read memory to ensure variable reference is still valid. mustReturn(t, prog, 0) qt.Assert(t, qt.Equals(a32.Load(), 1)) // Close the cancel channel while holding a backing array reference to avoid // false-positive panics in case we get a GC cycle before the manual call to // runtime.GC below. close(cancel) runtime.KeepAlive(a32) // More GC cycles to collect the backing array. As long as the unsafe memory // implementation is still on SetFinalizer, this needs multiple cycles to // work, since finalizers can resurrect objects. 3 GCs seems to work reliably. runtime.GC() runtime.GC() runtime.GC() // Wait for backing array to be finalized. testutils.WaitChan(t, mgc, time.Second*5) }