aboutsummaryrefslogtreecommitdiff
path: root/src/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/common.c')
-rw-r--r--src/common.c231
1 files changed, 161 insertions, 70 deletions
diff --git a/src/common.c b/src/common.c
index 606743c..cc6e6aa 100644
--- a/src/common.c
+++ b/src/common.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
@@ -13,14 +15,17 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
#include <bpf/bpf.h>
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
@@ -32,52 +37,6 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
- [BPF_SK_LOOKUP] = "sk_lookup",
- [BPF_TRACE_ITER] = "trace_iter",
- [BPF_XDP_DEVMAP] = "xdp_devmap",
- [BPF_XDP_CPUMAP] = "xdp_cpumap",
- [BPF_XDP] = "xdp",
- [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
- [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
- [BPF_PERF_EVENT] = "perf_event",
-};
-
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -109,7 +68,7 @@ void p_info(const char *fmt, ...)
va_end(ap);
}
-static bool is_bpffs(char *path)
+static bool is_bpffs(const char *path)
{
struct statfs st_fs;
@@ -119,11 +78,73 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+ struct rlimit rlim_init, rlim_cur_zero = {};
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ union bpf_attr attr;
+ int prog_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insn_cnt;
+ attr.license = ptr_to_u64("GPL");
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+ return false;
+
+ /* Drop the soft limit to zero. We maintain the hard limit to its
+ * current value, because lowering it would be a permanent operation
+ * for unprivileged users.
+ */
+ rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+ return false;
+
+ /* Do not use bpf_prog_load() from libbpf here, because it calls
+ * bump_rlimit_memlock(), interfering with the current probe.
+ */
+ prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ err = errno;
+
+ /* reset soft rlimit to its initial value */
+ setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+ if (prog_fd < 0)
+ return err == EPERM;
+
+ close(prog_fd);
+ return false;
+}
+
void set_max_rlimit(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
- setrlimit(RLIMIT_MEMLOCK, &rinf);
+ if (known_to_need_rlimit())
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
}
static int
@@ -223,13 +244,16 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
return fd;
}
-int mount_bpffs_for_pin(const char *name)
+int mount_bpffs_for_pin(const char *name, bool is_dir)
{
char err_str[ERR_MAX_LEN];
char *file;
char *dir;
int err = 0;
+ if (is_dir && is_bpffs(name))
+ return err;
+
file = malloc(strlen(name) + 1);
if (!file) {
p_err("mem alloc failed");
@@ -265,7 +289,7 @@ int do_pin_fd(int fd, const char *name)
{
int err;
- err = mount_bpffs_for_pin(name);
+ err = mount_bpffs_for_pin(name, false);
if (err)
return err;
@@ -281,6 +305,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
int err;
int fd;
+ if (!REQ_ARGS(3))
+ return -EINVAL;
+
fd = get_fd(&argc, &argv);
if (fd < 0)
return fd;
@@ -297,6 +324,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
[BPF_OBJ_UNKNOWN] = "unknown",
[BPF_OBJ_PROG] = "prog",
[BPF_OBJ_MAP] = "map",
+ [BPF_OBJ_LINK] = "link",
};
if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
@@ -328,7 +356,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
info.func_info_rec_size = sizeof(finfo);
info.func_info = ptr_to_u64(&finfo);
- if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+ if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len))
goto copy_name;
prog_btf = btf__load_from_kernel_by_id(info.btf_id);
@@ -463,7 +491,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
goto out_close;
memset(&pinned_info, 0, sizeof(pinned_info));
- if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+ if (bpf_prog_get_info_by_fd(fd, &pinned_info, &len))
goto out_close;
path = strdup(fpath);
@@ -472,10 +500,11 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
goto out_close;
}
- err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path);
+ err = hashmap__append(build_fn_table, pinned_info.id, path);
if (err) {
p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
pinned_info.id, path, strerror(errno));
+ free(path);
goto out_close;
}
@@ -523,7 +552,7 @@ void delete_pinned_obj_table(struct hashmap *map)
return;
hashmap__for_each_entry(map, entry, bkt)
- free(entry->value);
+ free(entry->pvalue);
hashmap__free(map);
}
@@ -605,12 +634,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
}
const char *
-ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
- const char **opt)
+ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
{
+ __maybe_unused int device_id;
char devname[IF_NAMESIZE];
int vendor_id;
- int device_id;
if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
p_err("Can't get net device name for ifindex %d: %s", ifindex,
@@ -625,6 +653,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
}
switch (vendor_id) {
+#ifdef HAVE_LIBBFD_SUPPORT
case 0x19ee:
device_id = read_sysfs_netdev_hex_int(devname, "device");
if (device_id != 0x4000 &&
@@ -633,8 +662,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
*opt = "ctx4";
return "NFP-6xxx";
+#endif /* HAVE_LIBBFD_SUPPORT */
+ /* No NFP support in LLVM, we have no valid triple to return. */
default:
- p_err("Can't get bfd arch name for device vendor id 0x%04x",
+ p_err("Can't get arch name for device vendor id 0x%04x",
vendor_id);
return NULL;
}
@@ -702,6 +733,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
{
+ char prog_name[MAX_PROG_FULL_NAME];
unsigned int id = 0;
int fd, nb_fds = 0;
void *tmp;
@@ -727,19 +759,27 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
goto err_close_fds;
}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get prog info (%u): %s",
id, strerror(errno));
goto err_close_fd;
}
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) {
close(fd);
continue;
}
+ if (!tag) {
+ get_prog_full_name(&info, fd, prog_name,
+ sizeof(prog_name));
+ if (strncmp(nametag, prog_name, sizeof(prog_name))) {
+ close(fd);
+ continue;
+ }
+ }
+
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
@@ -800,7 +840,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds)
NEXT_ARGP();
name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ if (strlen(name) > MAX_PROG_FULL_NAME - 1) {
p_err("can't parse name");
return -1;
}
@@ -879,7 +919,7 @@ static int map_fd_by_name(char *name, int **fds)
goto err_close_fds;
}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ err = bpf_map_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get map info (%u): %s",
id, strerror(errno));
@@ -989,7 +1029,8 @@ exit_free:
return fd;
}
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info,
+ __u32 *info_len)
{
int err;
int fd;
@@ -998,7 +1039,7 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
if (fd < 0)
return -1;
- err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ err = bpf_map_get_info_by_fd(fd, info, info_len);
if (err) {
p_err("can't get map info: %s", strerror(errno));
close(fd);
@@ -1008,12 +1049,62 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
return fd;
}
-size_t hash_fn_for_key_as_id(const void *key, void *ctx)
+size_t hash_fn_for_key_as_id(long key, void *ctx)
{
- return (size_t)key;
+ return key;
}
-bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx)
{
return k1 == k2;
}
+
+const char *bpf_attach_type_input_str(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_CGROUP_INET_INGRESS: return "ingress";
+ case BPF_CGROUP_INET_EGRESS: return "egress";
+ case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create";
+ case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release";
+ case BPF_CGROUP_SOCK_OPS: return "sock_ops";
+ case BPF_CGROUP_DEVICE: return "device";
+ case BPF_CGROUP_INET4_BIND: return "bind4";
+ case BPF_CGROUP_INET6_BIND: return "bind6";
+ case BPF_CGROUP_INET4_CONNECT: return "connect4";
+ case BPF_CGROUP_INET6_CONNECT: return "connect6";
+ case BPF_CGROUP_INET4_POST_BIND: return "post_bind4";
+ case BPF_CGROUP_INET6_POST_BIND: return "post_bind6";
+ case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4";
+ case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6";
+ case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4";
+ case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6";
+ case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4";
+ case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6";
+ case BPF_CGROUP_SYSCTL: return "sysctl";
+ case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4";
+ case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6";
+ case BPF_CGROUP_GETSOCKOPT: return "getsockopt";
+ case BPF_CGROUP_SETSOCKOPT: return "setsockopt";
+ case BPF_TRACE_RAW_TP: return "raw_tp";
+ case BPF_TRACE_FENTRY: return "fentry";
+ case BPF_TRACE_FEXIT: return "fexit";
+ case BPF_MODIFY_RETURN: return "mod_ret";
+ case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select";
+ case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate";
+ default: return libbpf_bpf_attach_type_str(t);
+ }
+}
+
+int pathname_concat(char *buf, int buf_sz, const char *path,
+ const char *name)
+{
+ int len;
+
+ len = snprintf(buf, buf_sz, "%s/%s", path, name);
+ if (len < 0)
+ return -EINVAL;
+ if (len >= buf_sz)
+ return -ENAMETOOLONG;
+
+ return 0;
+}