Compare commits

...

7 Commits

Author SHA1 Message Date
Federico Di Pierro 1dcb369c20 update(cmake): bump container plugin to 0.2.3.
Signed-off-by: Federico Di Pierro <nierro92@gmail.com>
2025-05-19 11:24:01 +02:00
Luca Guerra bd2b9aa2d7 cleanup(modern_bpf): address review comments
Signed-off-by: Luca Guerra <luca@guerra.sh>
Co-authored-by: Andrea Terzolo <andreaterzolo3@gmail.com>
2025-05-13 11:19:28 +02:00
Luca Guerra c1806ae290 cleanup(modern_bpf): use a regular map for shared ebpf settings
Signed-off-by: Luca Guerra <luca@guerra.sh>
2025-05-13 11:19:28 +02:00
Luca Guerra c14db1050c cleanup(modern_bpf): use a separate map for 64bit interesting syscalls table
Signed-off-by: Luca Guerra <luca@guerra.sh>
Co-authored-by: Kondah Mouad <kondah.mouad@gmail.com>
2025-05-13 11:19:28 +02:00
Federico Di Pierro fe797fbc39 fix(driver/modern_bpf,userspace/libpman): move `g_64bit_sampling_syscall_table` and `g_ia32_to_64_table` to rodata.
Signed-off-by: Federico Di Pierro <nierro92@gmail.com>

Co-authored-by: Luca Guerra <luca@guerra.sh>
2025-05-13 11:19:28 +02:00
Federico Di Pierro e0da46e5fa fix(userspace/libscap): avoid a possible read past end of buffer.
Signed-off-by: Federico Di Pierro <nierro92@gmail.com>
2025-05-13 11:19:28 +02:00
Federico Di Pierro 78a96227a1 chore(driver/modern_bpf): limit `bpf_loop` helper to 16 iterations.
Signed-off-by: Federico Di Pierro <nierro92@gmail.com>
2025-05-13 11:19:28 +02:00
12 changed files with 261 additions and 63 deletions

View File

@ -22,13 +22,13 @@ set(CONTAINER_LIBRARY
)
if(NOT CONTAINER_VERSION)
set(CONTAINER_VERSION "0.2.2")
set(CONTAINER_VERSION "0.2.3")
endif()
if(NOT CONTAINER_HASH)
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(CONTAINER_HASH "e770975d06bad6c593ea43dd56364afdb9314a47924f29198ba3ab184e6aca5d")
set(CONTAINER_HASH "1cff2c3e0c07efe85a97e187c0565e1a6fc67b4e0be8f43c5bf2f560b0c4957b")
else() # arm64
set(CONTAINER_HASH "52fa2687152eb89c7e54a7b96bd1b08e78fdef15b98c989322f8eab7debcbf35")
set(CONTAINER_HASH "2f3732c4b9ea42436a2b514a4eb792e89d41d07930fd7c3c7f77def6aadf6666")
endif()
endif()
if(NOT TARGET container_plugin)

View File

@ -17,44 +17,99 @@
/*=============================== SETTINGS ===========================*/
static __always_inline struct capture_settings *maps__get_capture_settings() {
uint32_t key = 0;
return bpf_map_lookup_elem(&capture_settings, &key);
}
static __always_inline uint64_t maps__get_boot_time() {
return g_settings.boot_time;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->boot_time;
}
static __always_inline uint32_t maps__get_snaplen() {
return g_settings.snaplen;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->snaplen;
}
static __always_inline bool maps__get_dropping_mode() {
return g_settings.dropping_mode;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->dropping_mode;
}
static __always_inline uint32_t maps__get_sampling_ratio() {
return g_settings.sampling_ratio;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->sampling_ratio;
}
static __always_inline bool maps__get_drop_failed() {
return g_settings.drop_failed;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->drop_failed;
}
static __always_inline bool maps__get_do_dynamic_snaplen() {
return g_settings.do_dynamic_snaplen;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->do_dynamic_snaplen;
}
static __always_inline uint16_t maps__get_fullcapture_port_range_start() {
return g_settings.fullcapture_port_range_start;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->fullcapture_port_range_start;
}
static __always_inline uint16_t maps__get_fullcapture_port_range_end() {
return g_settings.fullcapture_port_range_end;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->fullcapture_port_range_end;
}
static __always_inline uint16_t maps__get_statsd_port() {
return g_settings.statsd_port;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->statsd_port;
}
static __always_inline int32_t maps__get_scap_tid() {
return g_settings.scap_tid;
struct capture_settings *settings = maps__get_capture_settings();
if(settings == NULL) {
return 0;
}
return settings->scap_tid;
}
/*=============================== SETTINGS ===========================*/
@ -89,8 +144,12 @@ static __always_inline uint8_t maps__64bit_sampling_syscall_table(uint32_t sysca
/*=============================== SYSCALL-64 INTERESTING TABLE ===========================*/
static __always_inline bool maps__64bit_interesting_syscall(uint32_t syscall_id) {
return g_64bit_interesting_syscalls_table[syscall_id & (SYSCALL_TABLE_SIZE - 1)];
static __always_inline bool maps__interesting_syscall_64bit(uint32_t syscall_id) {
bool *ret = bpf_map_lookup_elem(&interesting_syscalls_table_64bit, &syscall_id);
if(ret == NULL) {
return false;
}
return *ret;
}
/*=============================== SYSCALL-64 INTERESTING TABLE ===========================*/

View File

@ -26,6 +26,9 @@
/* Maximum number of `iovec` structures that we can analyze. */
#define MAX_IOVCNT 32
/* Maximum number of supported sendmmsg/recvmmsg loops with bpf_loop helper */
#define MAX_SENDMMSG_RECVMMSG_SIZE 16
/* Maximum number of `pollfd` structures that we can analyze. */
#define MAX_POLLFD 16

View File

@ -14,7 +14,7 @@
#include <helpers/extract/extract_from_kernel.h>
static __always_inline bool syscalls_dispatcher__64bit_interesting_syscall(uint32_t syscall_id) {
return maps__64bit_interesting_syscall(syscall_id);
return maps__interesting_syscall_64bit(syscall_id);
}
static __always_inline long convert_network_syscalls(struct pt_regs *regs) {

View File

@ -47,35 +47,23 @@ __weak const volatile uint64_t probe_api_ver = PPM_API_CURRENT_VERSION;
*/
__weak const volatile uint64_t probe_schema_var = PPM_SCHEMA_CURRENT_VERSION;
/*=============================== BPF READ-ONLY GLOBAL VARIABLES ===============================*/
/*=============================== BPF GLOBAL VARIABLES ===============================*/
/**
* @brief Given the syscall id on 64-bit-architectures returns if
* the syscall must be filtered out according to the simple consumer logic.
*/
__weak bool g_64bit_interesting_syscalls_table[SYSCALL_TABLE_SIZE];
/**
* @brief Given the syscall id on 64-bit-architectures returns:
* - `UF_NEVER_DROP` if the syscall must not be dropped in the sampling logic.
* - `UF_ALWAYS_DROP` if the syscall must always be dropped in the sampling logic.
* - `UF_NONE` if we drop the syscall depends on the sampling ratio.
*/
__weak uint8_t g_64bit_sampling_syscall_table[SYSCALL_TABLE_SIZE];
__weak const volatile uint8_t g_64bit_sampling_syscall_table[SYSCALL_TABLE_SIZE];
/**
* @brief Given the syscall id on 32-bit x86 arch returns
* its x64 value. Used to support ia32 syscall emulation.
*/
__weak uint32_t g_ia32_to_64_table[SYSCALL_TABLE_SIZE];
__weak const volatile uint32_t g_ia32_to_64_table[SYSCALL_TABLE_SIZE];
/**
* @brief Global capture settings shared between userspace and
* bpf programs.
*/
__weak struct capture_settings g_settings;
/*=============================== BPF READ-ONLY GLOBAL VARIABLES ===============================*/
/*=============================== BPF GLOBAL VARIABLES ===============================*/
/**
* @brief Variable used only kernel side to understand when we need to send
@ -133,6 +121,28 @@ struct {
/*=============================== BPF_MAP_TYPE_ARRAY ===============================*/
/**
* @brief This table is used to keep track of which syscalls must be filtered out
* according to the simple consumer logic.
*/
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, SYSCALL_TABLE_SIZE);
__type(key, uint32_t);
__type(value, bool);
} interesting_syscalls_table_64bit __weak SEC(".maps");
/**
* @brief Global capture settings shared between userspace and
* bpf programs.
*/
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, uint32_t);
__type(value, struct capture_settings);
} capture_settings __weak SEC(".maps");
/* These maps have one entry for each CPU.
*
* PLEASE NOTE:

View File

@ -161,7 +161,7 @@ int BPF_PROG(recvmmsg_x, struct pt_regs *regs, long ret) {
.args = args,
};
uint32_t nr_loops = ret < 1024 ? ret : 1024;
uint32_t nr_loops = ret < MAX_SENDMMSG_RECVMMSG_SIZE ? ret : MAX_SENDMMSG_RECVMMSG_SIZE;
bpf_loop(nr_loops, handle_exit, &data, 0);
return 0;

View File

@ -147,7 +147,7 @@ int BPF_PROG(sendmmsg_x, struct pt_regs *regs, long ret) {
.args = args,
};
uint32_t nr_loops = ret < 1024 ? ret : 1024;
uint32_t nr_loops = ret < MAX_SENDMMSG_RECVMMSG_SIZE ? ret : MAX_SENDMMSG_RECVMMSG_SIZE;
bpf_loop(nr_loops, handle_exit, &data, 0);
return 0;

View File

@ -76,7 +76,7 @@ int BPF_PROG(socket_x, struct pt_regs *regs, long ret) {
/* Just called once by our scap process */
if(ret >= 0 && maps__get_socket_file_ops() == NULL) {
struct task_struct *task = get_current_task();
/* Please note that in `g_settings.scap_tid` scap will put its virtual tid
/* Please note that in `settings.scap_tid` scap will put its virtual tid
* if it is running inside a container. If we want to extract the same information
* in the kernel we need to extract the virtual tid of the task.
*/

View File

@ -461,8 +461,9 @@ int pman_finalize_maps_after_loading(void);
* @param syscall_id syscall system id.
* @param interesting true if the syscall must be marked as interesting.
*
* @return `0` on success, `errno` in case of error.
*/
void pman_mark_single_64bit_syscall(int syscall_id, bool interesting);
int pman_mark_single_64bit_syscall(int syscall_id, bool interesting);
#ifdef __cplusplus
}

View File

@ -75,63 +75,142 @@ uint64_t pman_get_probe_schema_ver() {
/*=============================== BPF GLOBAL VARIABLES ===============================*/
int pman_get_capture_settings(struct capture_settings* settings) {
char error_message[MAX_ERROR_MESSAGE_LEN];
int ret;
uint32_t key = 0;
int fd = bpf_map__fd(g_state.skel->maps.capture_settings);
if(fd <= 0) {
snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "unable to get capture_settings map fd!");
pman_print_error((const char*)error_message);
return errno;
}
if((ret = bpf_map_lookup_elem(fd, &key, settings)) != 0) {
snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "unable to get capture_settings!");
pman_print_error((const char*)error_message);
}
return ret;
}
int pman_update_capture_settings(struct capture_settings* settings) {
char error_message[MAX_ERROR_MESSAGE_LEN];
int ret;
int fd = bpf_map__fd(g_state.skel->maps.capture_settings);
if(fd <= 0) {
snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "unable to get capture_settings map fd!");
pman_print_error((const char*)error_message);
return errno;
}
uint32_t key = 0;
if((ret = bpf_map_update_elem(fd, &key, settings, BPF_ANY)) != 0) {
snprintf(error_message,
MAX_ERROR_MESSAGE_LEN,
"unable to initialize capture_settings map!");
pman_print_error((const char*)error_message);
}
return ret;
}
void pman_set_snaplen(uint32_t desired_snaplen) {
g_state.skel->bss->g_settings.snaplen = desired_snaplen;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.snaplen = desired_snaplen;
pman_update_capture_settings(&settings);
}
void pman_set_boot_time(uint64_t boot_time) {
g_state.skel->bss->g_settings.boot_time = boot_time;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.boot_time = boot_time;
pman_update_capture_settings(&settings);
}
void pman_set_dropping_mode(bool value) {
g_state.skel->bss->g_settings.dropping_mode = value;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.dropping_mode = value;
pman_update_capture_settings(&settings);
}
void pman_set_sampling_ratio(uint32_t value) {
g_state.skel->bss->g_settings.sampling_ratio = value;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.sampling_ratio = value;
pman_update_capture_settings(&settings);
}
void pman_set_drop_failed(bool drop_failed) {
g_state.skel->bss->g_settings.drop_failed = drop_failed;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.drop_failed = drop_failed;
pman_update_capture_settings(&settings);
}
void pman_set_do_dynamic_snaplen(bool do_dynamic_snaplen) {
g_state.skel->bss->g_settings.do_dynamic_snaplen = do_dynamic_snaplen;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.do_dynamic_snaplen = do_dynamic_snaplen;
pman_update_capture_settings(&settings);
}
void pman_set_fullcapture_port_range(uint16_t range_start, uint16_t range_end) {
g_state.skel->bss->g_settings.fullcapture_port_range_start = range_start;
g_state.skel->bss->g_settings.fullcapture_port_range_end = range_end;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.fullcapture_port_range_start = range_start;
settings.fullcapture_port_range_end = range_end;
pman_update_capture_settings(&settings);
}
void pman_set_statsd_port(uint16_t statsd_port) {
g_state.skel->bss->g_settings.statsd_port = statsd_port;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.statsd_port = statsd_port;
pman_update_capture_settings(&settings);
}
void pman_set_scap_tid(int32_t scap_tid) {
g_state.skel->bss->g_settings.scap_tid = scap_tid;
}
void pman_mark_single_64bit_syscall(int intersting_syscall_id, bool interesting) {
g_state.skel->bss->g_64bit_interesting_syscalls_table[intersting_syscall_id] = interesting;
struct capture_settings settings;
if(pman_get_capture_settings(&settings) != 0) {
return;
}
settings.scap_tid = scap_tid;
pman_update_capture_settings(&settings);
}
void pman_fill_syscall_sampling_table() {
for(int syscall_id = 0; syscall_id < SYSCALL_TABLE_SIZE; syscall_id++) {
if(g_syscall_table[syscall_id].flags & UF_NEVER_DROP) {
g_state.skel->bss->g_64bit_sampling_syscall_table[syscall_id] = UF_NEVER_DROP;
g_state.skel->rodata->g_64bit_sampling_syscall_table[syscall_id] = UF_NEVER_DROP;
continue;
}
/* Syscalls with `g_syscall_table[syscall_id].flags == UF_NONE` are the generic ones */
if(g_syscall_table[syscall_id].flags & UF_ALWAYS_DROP ||
g_syscall_table[syscall_id].flags == UF_NONE) {
g_state.skel->bss->g_64bit_sampling_syscall_table[syscall_id] = UF_ALWAYS_DROP;
g_state.skel->rodata->g_64bit_sampling_syscall_table[syscall_id] = UF_ALWAYS_DROP;
continue;
}
if(g_syscall_table[syscall_id].flags & UF_USED) {
g_state.skel->bss->g_64bit_sampling_syscall_table[syscall_id] = 0;
g_state.skel->rodata->g_64bit_sampling_syscall_table[syscall_id] = 0;
continue;
}
}
@ -144,7 +223,7 @@ void pman_fill_ia32_to_64_table() {
// 0 is read on x86_64; this is not a problem though because
// we will never receive a 32bit syscall above the upper limit, since it won't be existent.
const int x64_val = g_ia32_64_map[syscall_id];
g_state.skel->bss->g_ia32_to_64_table[syscall_id] = x64_val;
g_state.skel->rodata->g_ia32_to_64_table[syscall_id] = x64_val;
}
}
@ -295,6 +374,38 @@ int pman_fill_syscall_exit_extra_tail_table() {
/*=============================== BPF_MAP_TYPE_ARRAY ===============================*/
int pman_fill_interesting_syscalls_table_64bit() {
char error_message[MAX_ERROR_MESSAGE_LEN];
int fd = bpf_map__fd(g_state.skel->maps.interesting_syscalls_table_64bit);
for(uint32_t i = 0; i < SYSCALL_TABLE_SIZE; i++) {
const bool interesting = false;
if(bpf_map_update_elem(fd, &i, &interesting, BPF_ANY) < 0) {
snprintf(error_message,
MAX_ERROR_MESSAGE_LEN,
"unable to initialize interesting syscall table at index %d!",
i);
pman_print_error((const char*)error_message);
return errno;
}
}
return 0;
}
int pman_mark_single_64bit_syscall(int syscall_id, bool interesting) {
char error_message[MAX_ERROR_MESSAGE_LEN];
int fd = bpf_map__fd(g_state.skel->maps.interesting_syscalls_table_64bit);
if(bpf_map_update_elem(fd, &syscall_id, &interesting, BPF_ANY) < 0) {
snprintf(error_message,
MAX_ERROR_MESSAGE_LEN,
"unable to set interesting syscall at index %d as %d!",
syscall_id,
interesting);
pman_print_error((const char*)error_message);
return errno;
}
return 0;
}
static int size_auxiliary_maps() {
/* We always allocate auxiliary maps from all the CPUs, even if some of them are not online. */
if(bpf_map__set_max_entries(g_state.skel->maps.auxiliary_maps, g_state.n_possible_cpus)) {
@ -324,6 +435,8 @@ int pman_prepare_maps_before_loading() {
/* Read-only global variables must be set before loading phase. */
fill_event_params_table();
fill_ppm_sc_table();
pman_fill_ia32_to_64_table();
pman_fill_syscall_sampling_table();
/* We need to set the entries number for every BPF_MAP_TYPE_ARRAY
* The number of entries will be always equal to the CPUs number.
@ -335,6 +448,11 @@ int pman_prepare_maps_before_loading() {
int pman_finalize_maps_after_loading() {
int err;
struct capture_settings settings = {};
err = pman_update_capture_settings(&settings);
if(err != 0) {
return err;
}
/* set bpf global variables. */
pman_set_snaplen(80);
@ -346,8 +464,7 @@ int pman_finalize_maps_after_loading() {
pman_set_statsd_port(PPM_PORT_STATSD);
/* We have to fill all ours tail tables. */
pman_fill_syscall_sampling_table();
pman_fill_ia32_to_64_table();
pman_fill_interesting_syscalls_table_64bit();
err = pman_fill_syscalls_tail_table();
err = err ?: pman_fill_syscall_exit_extra_tail_table();
return err;

View File

@ -52,11 +52,11 @@ int pman_enforce_sc_set(bool *sc_set) {
}
if(!sc_set[sc]) {
pman_mark_single_64bit_syscall(syscall_id, false);
ret = ret ?: pman_mark_single_64bit_syscall(syscall_id, false);
} else {
sys_enter = true;
sys_exit = true;
pman_mark_single_64bit_syscall(syscall_id, true);
ret = ret ?: pman_mark_single_64bit_syscall(syscall_id, true);
}
}

View File

@ -206,10 +206,6 @@ static int32_t next(struct scap_engine_handle engine,
// Sanity checks in case a plugin implements a non-syscall event source.
// If a plugin has event sourcing capability and has a specific ID, then
// it is allowed to produce only plugin events of its own event source.
uint8_t* pplugin_id = (uint8_t*)evt + sizeof(scap_evt) + sizeof(uint32_t) + sizeof(uint32_t);
uint32_t plugin_id;
memcpy(&plugin_id, pplugin_id, sizeof(plugin_id));
if(handle->m_input_plugin->id != 0) {
/*
* | scap_evt | len_id (4B) | len_pl (4B) | id | payload |
@ -223,6 +219,12 @@ static int32_t next(struct scap_engine_handle engine,
handle->m_input_plugin->name);
}
// We are now sure this is a PLUGINEVENT thus we can read the plugin id (first param)
uint8_t* pplugin_id =
(uint8_t*)evt + sizeof(scap_evt) + sizeof(uint32_t) + sizeof(uint32_t);
uint32_t plugin_id;
memcpy(&plugin_id, pplugin_id, sizeof(plugin_id));
// forcely setting plugin ID with the one of the open plugin
if(plugin_id == 0) {
plugin_id = handle->m_input_plugin->id;
@ -240,6 +242,12 @@ static int32_t next(struct scap_engine_handle engine,
}
if(scap_event_get_type(evt) == PPME_PLUGINEVENT_E) {
// We are now sure this is a PLUGINEVENT thus we can read the plugin id (first param)
uint8_t* pplugin_id =
(uint8_t*)evt + sizeof(scap_evt) + sizeof(uint32_t) + sizeof(uint32_t);
uint32_t plugin_id;
memcpy(&plugin_id, pplugin_id, sizeof(plugin_id));
// a zero plugin ID is not allowed for PPME_PLUGINEVENT_E
if(plugin_id == 0) {
return scap_errprintf(lasterr,