diff options
author | Neill Kapron <nkapron@google.com> | 2023-10-17 21:39:38 +0000 |
---|---|---|
committer | Neill Kapron <nkapron@google.com> | 2023-10-17 21:47:39 +0000 |
commit | 22a9c2dedab7722404b4055abc7559dd0bf37cf5 (patch) | |
tree | 7b9493e79c33e6ae4c03ab5eeb8ddb2b716f9a00 | |
parent | ea353ab8f68b23b5b5df9048beec26e3e3660680 (diff) | |
parent | 37e3bf08e0314efa08ba88600f07adf0079c987d (diff) | |
download | dwarves-22a9c2dedab7722404b4055abc7559dd0bf37cf5.tar.gz |
Merge remote-tracking branch 'aosp/upstream-master'
* aosp/upstream-master: (102 commits)
pahole: Don't keep structs in multiple RB trees
README: Remove old comments about ancient distros and add 'git submodule' instruction
spec: Migrate license to a SPDX standardized one
dwarf_loader: DW_TAG_subroutine_type may have a DW_AT_byte_size
pdwtags: Print DW_TAG_constant tags
fprintf: Add DW_TAG_constant pretty printer (constant__fprintf)
dwarf_loader: Add support for DW_TAG_constant
pahole: Prep 1.25
fprintf: Fix `*` not being printed for pointers with btf_type_tag
btf_loader: A hack for BTF import of btf_type_tag attributes
fprintf: Correct names for types with btf_type_tag attribute
btf_encoder: Compare functions via prototypes not parameter names
fprintf: Support skipping modifier
fprintf: Generalize function prototype print to support passing conf
dwarf_loader: Fix for BTF id drift caused by adding unspecified types
fprintf: Support DW_TAG_LLVM_annotation in dwarf_tag_name()
CMakeLists.txt: Call cmake_minimum_required() before project()
dwarf_loader: Only mark parameter as using an unexpected register when it does
dwarf_loader: Fix parameter location retrieval for location lists
dwarf_loader: Fix detection of struct parameters
...
Change-Id: I2acdff5dedb4df07f5fd2890db52fea90b1614c8
Signed-off-by: Neill Kapron <nkapron@google.com>
-rw-r--r-- | Android.bp | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 8 | ||||
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | METADATA | 4 | ||||
-rw-r--r-- | NEWS | 101 | ||||
-rw-r--r-- | README | 17 | ||||
-rw-r--r-- | btf_encoder.c | 530 | ||||
-rw-r--r-- | btf_encoder.h | 8 | ||||
-rw-r--r-- | btf_loader.c | 73 | ||||
-rwxr-xr-x | btfdiff | 1 | ||||
-rw-r--r-- | changes-v1.24 | 36 | ||||
-rw-r--r-- | changes-v1.25 | 58 | ||||
-rw-r--r-- | codiff.c | 2 | ||||
-rw-r--r-- | ctf_loader.c | 3 | ||||
-rw-r--r-- | ctracer.c | 12 | ||||
-rw-r--r-- | dutil.h | 4 | ||||
-rw-r--r-- | dwarf_loader.c | 425 | ||||
-rw-r--r-- | dwarves.c | 227 | ||||
-rw-r--r-- | dwarves.h | 114 | ||||
-rw-r--r-- | dwarves_emit.c | 190 | ||||
-rw-r--r-- | dwarves_emit.h | 8 | ||||
-rw-r--r-- | dwarves_fprintf.c | 208 | ||||
-rw-r--r-- | dwarves_reorganize.c | 4 | ||||
l--------- | lib/bpf | 1 | ||||
-rw-r--r-- | man-pages/pahole.1 | 56 | ||||
-rw-r--r-- | pahole.c | 322 | ||||
-rw-r--r-- | pdwtags.c | 8 | ||||
-rw-r--r-- | pfunct.c | 10 | ||||
-rw-r--r-- | rpm/SPECS/dwarves.spec | 44 | ||||
-rw-r--r-- | syscse.c | 2 |
30 files changed, 2201 insertions, 278 deletions
@@ -37,6 +37,7 @@ cc_library_host_static { "dutil.c", "dwarf_loader.c", "dwarves.c", + "dwarves_emit.c", "dwarves_fprintf.c", "dwarves_reorganize.c", "elf_symtab.c", diff --git a/CMakeLists.txt b/CMakeLists.txt index c0363b8..98642e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -project(pahole C) cmake_minimum_required(VERSION 2.8.12) +project(pahole C) cmake_policy(SET CMP0005 NEW) option(LIBBPF_EMBEDDED "Use the embedded version of libbpf instead of searching it via pkg-config" ON) @@ -54,9 +54,9 @@ if (NOT DEFINED BUILD_SHARED_LIBS) endif (NOT DEFINED BUILD_SHARED_LIBS) # Just for grepping, DWARVES_VERSION isn't used anywhere anymore -# add_definitions(-D_GNU_SOURCE -DDWARVES_VERSION="v1.23") +# add_definitions(-D_GNU_SOURCE -DDWARVES_VERSION="v1.25") add_definitions(-D_GNU_SOURCE -DDWARVES_MAJOR_VERSION=1) -add_definitions(-D_GNU_SOURCE -DDWARVES_MINOR_VERSION=23) +add_definitions(-D_GNU_SOURCE -DDWARVES_MINOR_VERSION=25) find_package(DWARF REQUIRED) find_package(ZLIB REQUIRED) find_package(argp REQUIRED) @@ -149,7 +149,7 @@ target_link_libraries(dtagnames dwarves) set(pahole_SRCS pahole.c) add_executable(pahole ${pahole_SRCS}) -target_link_libraries(pahole dwarves dwarves_reorganize) +target_link_libraries(pahole dwarves dwarves_emit dwarves_reorganize) set(pdwtags_SRCS pdwtags.c) add_executable(pdwtags ${pdwtags_SRCS}) @@ -52,6 +52,8 @@ changes-v1.20 changes-v1.21 changes-v1.22 changes-v1.23 +changes-v1.24 +changes-v1.25 buildcmd.sh COPYING NEWS @@ -11,7 +11,7 @@ third_party { type: GIT value: "https://git.kernel.org/pub/scm/devel/pahole/pahole" } - version: "c2b7b8c20877d267159ace36119f6340b9d12823" - last_upgrade_date { year: 2022 month: 1 day: 5 } + version: "37e3bf08e0314efa08ba88600f07adf0079c987d" + last_upgrade_date { year: 2023 month: 10 day: 17 } license_type: RESTRICTED } @@ -1,3 +1,104 @@ +v1.24 + +Sat Apr 8 2023 + +eab23ebd2ce22b3a fprintf: Fix `*` not being printed for pointers with btf_type_tag +c969e4969ccd2e7c btf_loader: A hack for BTF import of btf_type_tag attributes +40ebd8b9e3312d0a fprintf: Correct names for types with btf_type_tag attribute +4d17096076b2351f btf_encoder: Compare functions via prototypes not parameter names +82730394195276ac fprintf: Support skipping modifier +d184aaa125ea40ff fprintf: Generalize function prototype print to support passing conf +a9498899109d3be1 dwarf_loader: Fix for BTF id drift caused by adding unspecified types +eeeab1c9e8b85fc7 fprintf: Support DW_TAG_LLVM_annotation in dwarf_tag_name() +4ec4b64bd501986b CMakeLists.txt: Call cmake_minimum_required() before project() +111dfd2cee118892 dwarf_loader: Only mark parameter as using an unexpected register when it does +5851040e2468ba04 dwarf_loader: Fix parameter location retrieval for location lists +721ca66d5be462b2 dwarf_loader: Fix detection of struct parameters +ef68019c357845b4 pahole: Update man page for options also +d58c61498ea78064 pahole: Update descriptions for btf_gen_optimized, skip_encoding_btf_inconsistent_proto +9b2abc1c7a16c39d btf_encoder: Exclude functions with unexpected param register use not optimizations +a53c58158b761f8f dwarf_loader: Mark functions that do not use expected registers for params +431df45378ef00f3 btfdiff: Exclude Rust CUs since those are not yet being converted to BTF on the Linux kernel +1231b6b9b4d88e00 dwarf_loader: Fix sorting of Rust structs +c4eb1897d1f3841d core: Check that we're adding DW_TAG_member sorted by byte offset +b53d430aeab416e7 btf_encoder: Ensure ELF function representation is fully initialized +a3b47e41656d21e7 CMakeList.txt: Bump version to the upcoming 1.25 release, not out of the door yet +f104790698ae93f9 btf_encoder: Support delaying function addition to check for function prototype inconsistencies +6d95d162c6c804c4 btf_encoder: Represent "."-suffixed functions (".isra.0") in BTF +c8e8dbcd71507618 btf_encoder: Rework btf_encoders__*() API to allow traversal of encoders +d381cc7458c53fdf btf_encoder: Refactor function addition into dedicated btf_encoder__add_func +52b25808e44a8e7f btf_encoder: Store type_id_off, unspecified type in encoder +75939e655a797896 dwarf_loader: Help spotting functions with optimized-out parameters +2b5cb9bc0e905811 pahole: Sync with libbpf-1.1 +45c044860c2abce7 dwarf_loader: Sync with LINUX_ELFNOTE_LTO_INFO macro from kernel +74f21ed08e4313f0 pahole: Set libbpf debug printer in -V mode +bc1538dcb14bc4d9 pahole: Use type__fprintf() directly for --compile +51643cb290f91425 core: Introduce base_type__language_defined() +ba4bdc73806101dc dwarf_loader: DW_TAG_inlined_subroutine needs recoding by DW_AT_abstract_origin +02d67c51765dfbd5 pfunct: Use zalloc() to make the code more robust +cd4d2d251e9a7f98 pahole: Use zalloc() to make the code more robust +e5e24ada4f5578ae core: Use zalloc() to make the code more robust +b72f5188856df0ab dwarves: Zero-initialize struct cu in cu__new() to prevent incorrect BTF types +d85b86ecc1df4076 emit: Support DW_TAG_atomic_type when emitting definitions for a typedef +7d0bc9334d2f1143 fprintf: Support _Atomic typedefs +8894c04f00e4c702 fprintf: Move the "typedef " invariant printf to the start of typedef__fprintf() +3836623af1b5ff1f pahole: Allow skipping the emission of atomic typedefs +6bb5a1fa990f5bd6 emit: Allow skip emitting the atomic typedefs +8d2e166dd3d168d9 emit: Optionally pass a conf_fprintf struct to type_emissions__init +8c2b37ec71423067 emit: Emit typedefs for atomic_ prefixed base types +a4f3a79da8d4ed8b emit: cu__type() == NULL means "void" +bcc648a10cbcd0b9 btf_encoder: Encode DW_TAG_unspecified_type returning routines as void +cffe5e1f75e1612e core: Record if a CU has a DW_TAG_unspecified_type +75e0fe28bb02036d core: Add DW_TAG_unspecified_type to tag__is_tag_type() set +121a46a026afac19 btf_encoder: Store the CU being processed to avoid changing many functions +56bcfa9135312ffe fprintf: Emit "_Atomic" modifiers for DW_TAG_atomic_type +cf27a2b805f21356 core: Print more info on tag__assert_search_result() +d5012f7be79f1b81 btf_encoder: Add extra debug info for unsupported DWARF tags +79d9a783ea74797a pahole: Support '--lang/--lang_exclude=asm' +e819d737de88dc1f pahole: Add "btf" to the format-path option man page +f01e5f3a849558b8 dwarf_loader: Support DW_TAG_label outside DW_TAG_lexblock +b84120772df33f93 pahole: Allow --compile to work with DWARF in addition to with BTF +6fdb0140692acff0 dwarves: support DW_TAG_atomic_type +d7507140eab4bf9f emit: Don't mark a enum with nr_members == 0 as printed, its just a fwd decl +843fe9bfab4859b4 emit: Check if disambiguated struct/enum/union name was already emitted in a previous CU +f5857bd34b220a64 pahole: Honour --compile when -C is used +2bb968b567011f8a btf: Fix building with system libbpf +ea30d58a2329764b core: Conditionally define language encodings + +Wed Aug 17 2022 + +d6c952893b1bbea9 dwarf_loader: Encode char type as signed +23342fef5e5f6070 dwarf_loader: Fix elfutils dwfl_getmodules() error checking +9712d9ec929fb6b3 btf_loader: Add support to BTF_KIND_ENUM64 +35a11221b305a520 btf: Support BTF_KIND_ENUM64 +3aa68ba050d4ddc4 libbpf: Sync with latest libbpf repo +c3d6522e0499c5b0 fprintf: Fix plural/singular when printing cacheline boundary +384a16a011226163 btf_encoder: Normalize array index type for parallel dwarf loading case +d9e36dd9dd2a526a libbpf: Sync with latest libbpf repo +49358dfe2aaae4e9 pahole: Add --lang_exclude to allow skipping compilation units written in some languages +8ee363790b743728 pahole: Introduce --lang to ask for only compilation units written in some languages +c3f2fe2f6a81f13c core: Add string to id language lookup method +3d0dfe0610517ff6 pahole: Don't try encode BTF when all CUs were filtered +2730ef6965efd8e6 fprintf: preserve conf_fprintf pointer in tag__ptr_name +1bc98ed290d2c833 btf_encoder: Collect info of per-cpu variables from threads +31df013b70540072 dwarves: Set errno if load fails in cus__load_files() +f952a6f69f9508c8 pahole: Avoid segfault when parsing bogus file +65d7273668ded59b pahole: Introduce --compile to produce a compilable output +4d004e2314f3252e core: Ditch 'dwarves__active_loader' extern declaration, it was nuked +4f332dbfd02072e4 emit: Notice type shadowing, i.e. multiple types with the same name (enum, struct, union, etc) +0a82f74ce25a5904 core: Make type->packed_attributes_inferred a one bit member +fac821246c582299 core: type->declaration is just one bit, make it a bitfield member +742f04f89da03665 emit: Search for data structures using its type in addition to its name +32cc1481721c4b11 fprintf: Consider enumerations without members as forward declarations +6afc296eeb180e25 emit: Fix printing typedef of nameless struct/union +49a2dd657728675b fprintf: Check if conf->conf_fprintf is not NULL in when resolving cacheline_size +46cec35ff0411e0f fprintf: Fix division by zero for uninitialized conf_fprintf->cacheline_size field +73383b3a39afe86b libbpf: Update libbpf to the latest git HEAD +21352753186ec582 pahole: Use per-thread btf instances to avoid mutex locking +96d2c5c323255134 dwarf_loader: Prepare and pass per-thread data to worker threads +724c8fddd71be1fe dwarf_loader: Receive per-thread data on worker threads +2f7d61b2bfb59427 core: Define DW_TAG_skeleton_unit if not available on current dwarf.h + v1.23 Wed Dec 8 2021 @@ -18,19 +18,6 @@ cmake Options: Default is to install to /usr/local, use -DCMAKE_INSTALL_PREFIX= when invoking cmake to specify another install location. -Known to work scenarios: +You may need to update the libbpf git submodule: -Mandriva Cooker: - -cmake 2.4.5-1mdv2007.1 -libelfutils1-devel 0.123-1mdv2007.1 - -Debian Unstable: - -cmake 2.4.5-1 -libdw-dev 0.123-2 - -Fedora Core 6: - -cmake 2.4.5-2.fc6 -elfutils-devel 0.126-1.fc6 +git submodule update --init --recursive diff --git a/btf_encoder.c b/btf_encoder.c index 154d679..1aa0ad0 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -9,12 +9,12 @@ Copyright (C) Red Hat Inc */ +#include <linux/btf.h> #include "dwarves.h" #include "elf_symtab.h" #include "btf_encoder.h" #include "gobuffer.h" -#include <linux/btf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> #include <ctype.h> /* for isalpha() and isalnum() */ @@ -30,10 +30,24 @@ #include <errno.h> #include <stdint.h> +#include <search.h> /* for tsearch(), tfind() and tdestroy() */ +#include <pthread.h> + +#define BTF_ENCODER_MAX_PROTO 512 + +/* state used to do later encoding of saved functions */ +struct btf_encoder_state { + uint32_t type_id_off; + bool got_proto; + char proto[BTF_ENCODER_MAX_PROTO]; +}; struct elf_function { const char *name; bool generated; + size_t prefixlen; + struct function *function; + struct btf_encoder_state state; }; #define MAX_PERCPU_VAR_CNT 4096 @@ -44,12 +58,18 @@ struct var_info { uint32_t sz; }; +/* + * cu: cu being processed. + */ struct btf_encoder { struct list_head node; struct btf *btf; + struct cu *cu; struct gobuffer percpu_secinfo; const char *filename; struct elf_symtab *symtab; + uint32_t type_id_off; + int saved_func_cnt; bool has_index_type, need_index_type, skip_encoding_vars, @@ -70,22 +90,42 @@ struct btf_encoder { struct elf_function *entries; int allocated; int cnt; + int suffix_cnt; /* number of .isra, .part etc */ } functions; }; -void btf_encoders__add(struct list_head *encoders, struct btf_encoder *encoder) -{ - list_add_tail(&encoder->node, encoders); -} +static LIST_HEAD(encoders); +static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; -struct btf_encoder *btf_encoders__first(struct list_head *encoders) +static void btf_encoder__add_saved_funcs(struct btf_encoder *encoder); + +/* mutex only needed for add/delete, as this can happen in multiple encoding + * threads. Traversal of the list is currently confined to thread collection. + */ + +#define btf_encoders__for_each_encoder(encoder) \ + list_for_each_entry(encoder, &encoders, node) + +static void btf_encoders__add(struct btf_encoder *encoder) { - return list_first_entry(encoders, struct btf_encoder, node); + pthread_mutex_lock(&encoders__lock); + list_add_tail(&encoder->node, &encoders); + pthread_mutex_unlock(&encoders__lock); } -struct btf_encoder *btf_encoders__next(struct btf_encoder *encoder) +static void btf_encoders__delete(struct btf_encoder *encoder) { - return list_next_entry(encoder, node); + struct btf_encoder *existing = NULL; + + pthread_mutex_lock(&encoders__lock); + /* encoder may not have been added to list yet; check. */ + btf_encoders__for_each_encoder(existing) { + if (encoder == existing) + break; + } + if (encoder == existing) + list_del(&encoder->node); + pthread_mutex_unlock(&encoders__lock); } #define PERCPU_SECTION ".data..percpu" @@ -124,7 +164,7 @@ static int btf_var_secinfo_cmp(const void *a, const void *b) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) #define BITS_ROUNDUP_BYTES(bits) (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) -static const char * const btf_kind_str[NR_BTF_KINDS] = { +static const char * const btf_kind_str[] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", [BTF_KIND_PTR] = "PTR", @@ -144,6 +184,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; static const char *btf__printable_name(const struct btf *btf, uint32_t offset) @@ -172,7 +213,7 @@ __attribute ((format (printf, 5, 6))) static void btf__log_err(const struct btf *btf, int kind, const char *name, bool output_cr, const char *fmt, ...) { - fprintf(stderr, "[%u] %s %s", btf__get_nr_types(btf) + 1, + fprintf(stderr, "[%u] %s %s", btf__type_cnt(btf), btf_kind_str[kind], name ?: "(anon)"); if (fmt && *fmt) { @@ -203,7 +244,7 @@ static void btf_encoder__log_type(const struct btf_encoder *encoder, const struc out = err ? stderr : stdout; fprintf(out, "[%u] %s %s", - btf__get_nr_types(btf), btf_kind_str[kind], + btf__type_cnt(btf) - 1, btf_kind_str[kind], btf__printable_name(btf, t->name_off)); if (fmt && *fmt) { @@ -449,10 +490,10 @@ static int btf_encoder__add_field(struct btf_encoder *encoder, const char *name, int err; err = btf__add_field(btf, name, type, offset, bitfield_size); - t = btf__type_by_id(btf, btf__get_nr_types(btf)); + t = btf__type_by_id(btf, btf__type_cnt(btf) - 1); if (err) { fprintf(stderr, "[%u] %s %s's field '%s' offset=%u bit_size=%u type=%u Error emitting field\n", - btf__get_nr_types(btf), btf_kind_str[btf_kind(t)], + btf__type_cnt(btf) - 1, btf_kind_str[btf_kind(t)], btf__printable_name(btf, t->name_off), name, offset, bitfield_size, type); } else { @@ -490,34 +531,87 @@ static int32_t btf_encoder__add_struct(struct btf_encoder *encoder, uint8_t kind return id; } -static int32_t btf_encoder__add_enum(struct btf_encoder *encoder, const char *name, uint32_t bit_size) +#if LIBBPF_MAJOR_VERSION < 1 +static inline int libbpf_err(int ret) +{ + if (ret < 0) + errno = -ret; + return ret; +} + +static +int btf__add_enum64(struct btf *btf __maybe_unused, const char *name __maybe_unused, + __u32 byte_sz __maybe_unused, bool is_signed __maybe_unused) +{ + return libbpf_err(-ENOTSUP); +} + +static +int btf__add_enum64_value(struct btf *btf __maybe_unused, const char *name __maybe_unused, + __u64 value __maybe_unused) +{ + return libbpf_err(-ENOTSUP); +} +#endif + +static int32_t btf_encoder__add_enum(struct btf_encoder *encoder, const char *name, struct type *etype, + struct conf_load *conf_load) { struct btf *btf = encoder->btf; const struct btf_type *t; int32_t id, size; + bool is_enum32; - size = BITS_ROUNDUP_BYTES(bit_size); - id = btf__add_enum(btf, name, size); + size = BITS_ROUNDUP_BYTES(etype->size); + is_enum32 = size <= 4 || conf_load->skip_encoding_btf_enum64; + if (is_enum32) + id = btf__add_enum(btf, name, size); + else + id = btf__add_enum64(btf, name, size, etype->is_signed_enum); if (id > 0) { t = btf__type_by_id(btf, id); btf_encoder__log_type(encoder, t, false, true, "size=%u", t->size); } else { - btf__log_err(btf, BTF_KIND_ENUM, name, true, + btf__log_err(btf, is_enum32 ? BTF_KIND_ENUM : BTF_KIND_ENUM64, name, true, "size=%u Error emitting BTF type", size); } return id; } -static int btf_encoder__add_enum_val(struct btf_encoder *encoder, const char *name, int32_t value) +static int btf_encoder__add_enum_val(struct btf_encoder *encoder, const char *name, int64_t value, + struct type *etype, struct conf_load *conf_load) { - int err = btf__add_enum_value(encoder->btf, name, value); + const char *fmt_str; + int err; + + /* If enum64 is not allowed, generate enum32 with unsigned int value. In enum64-supported + * libbpf library, btf__add_enum_value() will set the kflag (sign bit) in common_type + * if the value is negative. + */ + if (conf_load->skip_encoding_btf_enum64) + err = btf__add_enum_value(encoder->btf, name, (uint32_t)value); + else if (etype->size > 32) + err = btf__add_enum64_value(encoder->btf, name, value); + else + err = btf__add_enum_value(encoder->btf, name, value); if (!err) { - if (encoder->verbose) - printf("\t%s val=%d\n", name, value); + if (encoder->verbose) { + if (conf_load->skip_encoding_btf_enum64) { + printf("\t%s val=%u\n", name, (uint32_t)value); + } else { + fmt_str = etype->is_signed_enum ? "\t%s val=%lld\n" : "\t%s val=%llu\n"; + printf(fmt_str, name, (unsigned long long)value); + } + } } else { - fprintf(stderr, "\t%s val=%d Error emitting BTF enum value\n", - name, value); + if (conf_load->skip_encoding_btf_enum64) { + fprintf(stderr, "\t%s val=%u Error emitting BTF enum value\n", name, (uint32_t)value); + } else { + fmt_str = etype->is_signed_enum ? "\t%s val=%lld Error emitting BTF enum value\n" + : "\t%s val=%llu Error emitting BTF enum value\n"; + fprintf(stderr, fmt_str, name, (unsigned long long)value); + } } return err; } @@ -535,7 +629,15 @@ static int32_t btf_encoder__add_func_param(struct btf_encoder *encoder, const ch } } -static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype, uint32_t type_id_off) +static int32_t btf_encoder__tag_type(struct btf_encoder *encoder, uint32_t tag_type) +{ + if (tag_type == 0) + return 0; + + return encoder->type_id_off + tag_type; +} + +static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype) { struct btf *btf = encoder->btf; const struct btf_type *t; @@ -545,7 +647,7 @@ static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct f /* add btf_type for func_proto */ nr_params = ftype->nr_parms + (ftype->unspec_parms ? 1 : 0); - type_id = ftype->tag.type == 0 ? 0 : type_id_off + ftype->tag.type; + type_id = btf_encoder__tag_type(encoder, ftype->tag.type); id = btf__add_func_proto(btf, type_id); if (id > 0) { @@ -563,7 +665,7 @@ static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct f ftype__for_each_parameter(ftype, param) { const char *name = parameter__name(param); - type_id = param->tag.type == 0 ? 0 : type_id_off + param->tag.type; + type_id = param->tag.type == 0 ? 0 : encoder->type_id_off + param->tag.type; ++param_idx; if (btf_encoder__add_func_param(encoder, name, type_id, param_idx == nr_params)) return -1; @@ -606,6 +708,32 @@ static int32_t btf_encoder__add_var_secinfo(struct btf_encoder *encoder, uint32_ return gobuffer__add(&encoder->percpu_secinfo, &si, sizeof(si)); } +int32_t btf_encoder__add_encoder(struct btf_encoder *encoder, struct btf_encoder *other) +{ + struct gobuffer *var_secinfo_buf = &other->percpu_secinfo; + size_t sz = gobuffer__size(var_secinfo_buf); + uint16_t nr_var_secinfo = sz / sizeof(struct btf_var_secinfo); + uint32_t type_id; + uint32_t next_type_id = btf__type_cnt(encoder->btf); + int32_t i, id; + struct btf_var_secinfo *vsi; + + if (encoder == other) + return 0; + + btf_encoder__add_saved_funcs(other); + + for (i = 0; i < nr_var_secinfo; i++) { + vsi = (struct btf_var_secinfo *)var_secinfo_buf->entries + i; + type_id = next_type_id + vsi->type - 1; /* Type ID starts from 1 */ + id = btf_encoder__add_var_secinfo(encoder, type_id, vsi->offset, vsi->size); + if (id < 0) + return id; + } + + return btf__add_btf(encoder->btf, other->btf); +} + static int32_t btf_encoder__add_datasec(struct btf_encoder *encoder, const char *section_name) { struct gobuffer *var_secinfo_buf = &encoder->percpu_secinfo; @@ -670,6 +798,168 @@ static int32_t btf_encoder__add_decl_tag(struct btf_encoder *encoder, const char return id; } +static bool proto__get(struct function *func, char *proto, size_t len) +{ + const struct conf_fprintf conf = { + .name_spacing = 23, + .type_spacing = 26, + .emit_stats = 0, + .no_parm_names = 1, + .skip_emitting_errors = 1, + .skip_emitting_modifier = 1, + }; + + return function__prototype_conf(func, func->priv, &conf, proto, len) != NULL; +} + +static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func, struct function *f2) +{ + char proto[BTF_ENCODER_MAX_PROTO]; + struct function *f1 = func->function; + const char *name; + + if (!f1) + return false; + + name = function__name(f1); + + if (f1->proto.nr_parms != f2->proto.nr_parms) { + if (encoder->verbose) + printf("function mismatch for '%s'(%s): %d params != %d params\n", + name, f1->alias ?: name, + f1->proto.nr_parms, f2->proto.nr_parms); + return false; + } + if (f1->proto.nr_parms == 0) + return true; + + if (f1->proto.tag.type == f2->proto.tag.type) + return true; + + if (!func->state.got_proto) + func->state.got_proto = proto__get(f1, func->state.proto, sizeof(func->state.proto)); + + if (proto__get(f2, proto, sizeof(proto))) { + if (strcmp(func->state.proto, proto) != 0) { + if (encoder->verbose) + printf("function mismatch for '%s'('%s'): '%s' != '%s'\n", + name, f1->alias ?: name, + func->state.proto, proto); + return false; + } + } + return true; +} + +static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func) +{ + fn->priv = encoder->cu; + if (func->function) { + struct function *existing = func->function; + + /* If saving and we find an existing entry, we want to merge + * observations across both functions, checking that the + * "seen optimized parameters", "inconsistent prototype" + * and "unexpected register" status is reflected in the + * the func entry. + * If the entry is new, record encoder state required + * to add the local function later (encoder + type_id_off) + * such that we can add the function later. + */ + existing->proto.optimized_parms |= fn->proto.optimized_parms; + existing->proto.unexpected_reg |= fn->proto.unexpected_reg; + if (!existing->proto.unexpected_reg && !existing->proto.inconsistent_proto && + !funcs__match(encoder, func, fn)) + existing->proto.inconsistent_proto = 1; + } else { + func->state.type_id_off = encoder->type_id_off; + func->function = fn; + encoder->saved_func_cnt++; + } + return 0; +} + +static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn) +{ + int btf_fnproto_id, btf_fn_id, tag_type_id; + struct llvm_annotation *annot; + const char *name; + + btf_fnproto_id = btf_encoder__add_func_proto(encoder, &fn->proto); + name = function__name(fn); + btf_fn_id = btf_encoder__add_ref_type(encoder, BTF_KIND_FUNC, btf_fnproto_id, name, false); + if (btf_fnproto_id < 0 || btf_fn_id < 0) { + printf("error: failed to encode function '%s'\n", function__name(fn)); + return -1; + } + list_for_each_entry(annot, &fn->annots, node) { + tag_type_id = btf_encoder__add_decl_tag(encoder, annot->value, btf_fn_id, + annot->component_idx); + if (tag_type_id < 0) { + fprintf(stderr, "error: failed to encode tag '%s' to func %s with component_idx %d\n", + annot->value, name, annot->component_idx); + return -1; + } + } + return 0; +} + +static void btf_encoder__add_saved_funcs(struct btf_encoder *encoder) +{ + int i; + + for (i = 0; i < encoder->functions.cnt; i++) { + struct elf_function *func = &encoder->functions.entries[i]; + struct function *fn = func->function; + struct btf_encoder *other_encoder; + + if (!fn || fn->proto.processed) + continue; + + /* merge optimized-out status across encoders; since each + * encoder has the same elf symbol table we can use the + * same index to access the same elf symbol. + */ + btf_encoders__for_each_encoder(other_encoder) { + struct function *other_fn; + + if (other_encoder == encoder) + continue; + + other_fn = other_encoder->functions.entries[i].function; + if (!other_fn) + continue; + fn->proto.optimized_parms |= other_fn->proto.optimized_parms; + fn->proto.unexpected_reg |= other_fn->proto.unexpected_reg; + if (other_fn->proto.inconsistent_proto) + fn->proto.inconsistent_proto = 1; + if (!fn->proto.unexpected_reg && !fn->proto.inconsistent_proto && + !funcs__match(encoder, func, other_fn)) + fn->proto.inconsistent_proto = 1; + other_fn->proto.processed = 1; + } + /* do not exclude functions with optimized-out parameters; they + * may still be _called_ with the right parameter values, they + * just do not _use_ them. Only exclude functions with + * unexpected register use or multiple inconsistent prototypes. + */ + if (fn->proto.unexpected_reg || fn->proto.inconsistent_proto) { + if (encoder->verbose) { + const char *name = function__name(fn); + + printf("skipping addition of '%s'(%s) due to %s\n", + name, fn->alias ?: name, + fn->proto.unexpected_reg ? "unexpected register used for parameter" : + "multiple inconsistent function prototypes"); + } + } else { + encoder->type_id_off = func->state.type_id_off; + btf_encoder__add_func(encoder, fn); + } + fn->proto.processed = 1; + } +} + /* * This corresponds to the same macro defined in * include/linux/kallsyms.h @@ -681,6 +971,11 @@ static int functions_cmp(const void *_a, const void *_b) const struct elf_function *a = _a; const struct elf_function *b = _b; + /* if search key allows prefix match, verify target has matching + * prefix len and prefix matches. + */ + if (a->prefixlen && a->prefixlen == b->prefixlen) + return strncmp(a->name, b->name, b->prefixlen); return strcmp(a->name, b->name); } @@ -713,14 +1008,25 @@ static int btf_encoder__collect_function(struct btf_encoder *encoder, GElf_Sym * } encoder->functions.entries[encoder->functions.cnt].name = name; + if (strchr(name, '.')) { + const char *suffix = strchr(name, '.'); + + encoder->functions.suffix_cnt++; + encoder->functions.entries[encoder->functions.cnt].prefixlen = suffix - name; + } encoder->functions.entries[encoder->functions.cnt].generated = false; + encoder->functions.entries[encoder->functions.cnt].function = NULL; + encoder->functions.entries[encoder->functions.cnt].state.got_proto = false; + encoder->functions.entries[encoder->functions.cnt].state.proto[0] = '\0'; + encoder->functions.entries[encoder->functions.cnt].state.type_id_off = 0; encoder->functions.cnt++; return 0; } -static struct elf_function *btf_encoder__find_function(const struct btf_encoder *encoder, const char *name) +static struct elf_function *btf_encoder__find_function(const struct btf_encoder *encoder, + const char *name, size_t prefixlen) { - struct elf_function key = { .name = name }; + struct elf_function key = { .name = name, .prefixlen = prefixlen }; return bsearch(&key, encoder->functions.entries, encoder->functions.cnt, sizeof(key), functions_cmp); } @@ -767,22 +1073,21 @@ static void dump_invalid_symbol(const char *msg, const char *sym, fprintf(stderr, "PAHOLE: Error: Use '--btf_encode_force' to ignore such symbols and force emit the btf.\n"); } -static int tag__check_id_drift(const struct tag *tag, - uint32_t core_id, uint32_t btf_type_id, - uint32_t type_id_off) +static int tag__check_id_drift(struct btf_encoder *encoder, const struct tag *tag, + uint32_t core_id, uint32_t btf_type_id) { - if (btf_type_id != (core_id + type_id_off)) { + if (btf_type_id != (core_id + encoder->type_id_off)) { fprintf(stderr, "%s: %s id drift, core_id: %u, btf_type_id: %u, type_id_off: %u\n", __func__, dwarf_tag_name(tag->tag), - core_id, btf_type_id, type_id_off); + core_id, btf_type_id, encoder->type_id_off); return -1; } return 0; } -static int32_t btf_encoder__add_struct_type(struct btf_encoder *encoder, struct tag *tag, uint32_t type_id_off) +static int32_t btf_encoder__add_struct_type(struct btf_encoder *encoder, struct tag *tag) { struct type *type = tag__type(tag); struct class_member *pos; @@ -804,7 +1109,8 @@ static int32_t btf_encoder__add_struct_type(struct btf_encoder *encoder, struct * is required. */ name = class_member__name(pos); - if (btf_encoder__add_field(encoder, name, type_id_off + pos->tag.type, pos->bitfield_size, pos->bit_offset)) + if (btf_encoder__add_field(encoder, name, encoder->type_id_off + pos->tag.type, + pos->bitfield_size, pos->bit_offset)) return -1; } @@ -823,30 +1129,32 @@ static uint32_t array_type__nelems(struct tag *tag) return nelem; } -static int32_t btf_encoder__add_enum_type(struct btf_encoder *encoder, struct tag *tag) +static int32_t btf_encoder__add_enum_type(struct btf_encoder *encoder, struct tag *tag, + struct conf_load *conf_load) { struct type *etype = tag__type(tag); struct enumerator *pos; const char *name = type__name(etype); int32_t type_id; - type_id = btf_encoder__add_enum(encoder, name, etype->size); + type_id = btf_encoder__add_enum(encoder, name, etype, conf_load); if (type_id < 0) return type_id; type__for_each_enumerator(etype, pos) { name = enumerator__name(pos); - if (btf_encoder__add_enum_val(encoder, name, pos->value)) + if (btf_encoder__add_enum_val(encoder, name, pos->value, etype, conf_load)) return -1; } return type_id; } -static int btf_encoder__encode_tag(struct btf_encoder *encoder, struct tag *tag, uint32_t type_id_off) +static int btf_encoder__encode_tag(struct btf_encoder *encoder, struct tag *tag, + struct conf_load *conf_load) { /* single out type 0 as it represents special type "void" */ - uint32_t ref_type_id = tag->type == 0 ? 0 : type_id_off + tag->type; + uint32_t ref_type_id = tag->type == 0 ? 0 : encoder->type_id_off + tag->type; struct base_type *bt; const char *name; @@ -876,18 +1184,27 @@ static int btf_encoder__encode_tag(struct btf_encoder *encoder, struct tag *tag, if (tag__type(tag)->declaration) return btf_encoder__add_ref_type(encoder, BTF_KIND_FWD, 0, name, tag->tag == DW_TAG_union_type); else - return btf_encoder__add_struct_type(encoder, tag, type_id_off); + return btf_encoder__add_struct_type(encoder, tag); case DW_TAG_array_type: /* TODO: Encode one dimension at a time. */ encoder->need_index_type = true; return btf_encoder__add_array(encoder, ref_type_id, encoder->array_index_id, array_type__nelems(tag)); case DW_TAG_enumeration_type: - return btf_encoder__add_enum_type(encoder, tag); + return btf_encoder__add_enum_type(encoder, tag, conf_load); case DW_TAG_subroutine_type: - return btf_encoder__add_func_proto(encoder, tag__ftype(tag), type_id_off); + return btf_encoder__add_func_proto(encoder, tag__ftype(tag)); + case DW_TAG_unspecified_type: + /* Just don't encode this for now, converting anything with this type to void (0) instead. + * + * If we end up needing to encode this, one possible hack is to do as follows, as "const void". + * + * Returning zero means we skipped encoding a DWARF type. + */ + // btf_encoder__add_ref_type(encoder, BTF_KIND_CONST, 0, NULL, false); + return 0; default: - fprintf(stderr, "Unsupported DW_TAG_%s(0x%x)\n", - dwarf_tag_name(tag->tag), tag->tag); + fprintf(stderr, "Unsupported DW_TAG_%s(0x%x): type: 0x%x\n", + dwarf_tag_name(tag->tag), tag->tag, ref_type_id); return -1; } } @@ -899,9 +1216,9 @@ static int btf_encoder__write_raw_file(struct btf_encoder *encoder) const void *raw_btf_data; int fd, err; - raw_btf_data = btf__get_raw_data(encoder->btf, &raw_btf_size); + raw_btf_data = btf__raw_data(encoder->btf, &raw_btf_size); if (raw_btf_data == NULL) { - fprintf(stderr, "%s: btf__get_raw_data failed!\n", __func__); + fprintf(stderr, "%s: btf__raw_data failed!\n", __func__); return -1; } @@ -976,7 +1293,7 @@ static int btf_encoder__write_elf(struct btf_encoder *encoder) } } - raw_btf_data = btf__get_raw_data(btf, &raw_btf_size); + raw_btf_data = btf__raw_data(btf, &raw_btf_size); if (btf_data) { /* Existing .BTF section found */ @@ -1039,14 +1356,17 @@ int btf_encoder__encode(struct btf_encoder *encoder) { int err; + /* for single-threaded case, saved funcs are added here */ + btf_encoder__add_saved_funcs(encoder); + if (gobuffer__size(&encoder->percpu_secinfo) != 0) btf_encoder__add_datasec(encoder, PERCPU_SECTION); /* Empty file, nothing to do, so... done! */ - if (btf__get_nr_types(encoder->btf) == 0) + if (btf__type_cnt(encoder->btf) == 1) return 0; - if (btf__dedup(encoder->btf, NULL, NULL)) { + if (btf__dedup(encoder->btf, NULL)) { fprintf(stderr, "%s: btf__dedup failed!\n", __func__); return -1; } @@ -1178,8 +1498,9 @@ static bool ftype__has_arg_names(const struct ftype *ftype) return true; } -static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder, struct cu *cu, uint32_t type_id_off) +static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder) { + struct cu *cu = encoder->cu; uint32_t core_id; struct tag *pos; int err = -1; @@ -1262,7 +1583,7 @@ static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder, struct continue; } - type = var->ip.tag.type + type_id_off; + type = var->ip.tag.type + encoder->type_id_off; linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC; if (encoder->verbose) { @@ -1374,6 +1695,7 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam if (encoder->verbose) printf("File %s:\n", cu->filename); + btf_encoders__add(encoder); } out: return encoder; @@ -1388,6 +1710,7 @@ void btf_encoder__delete(struct btf_encoder *encoder) if (encoder == NULL) return; + btf_encoders__delete(encoder); __gobuffer__delete(&encoder->percpu_secinfo); zfree(&encoder->filename); btf__free(encoder->btf); @@ -1401,34 +1724,40 @@ void btf_encoder__delete(struct btf_encoder *encoder) free(encoder); } -int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu) +int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu, struct conf_load *conf_load) { - uint32_t type_id_off = btf__get_nr_types(encoder->btf); struct llvm_annotation *annot; - int btf_type_id, tag_type_id; + int btf_type_id, tag_type_id, skipped_types = 0; uint32_t core_id; struct function *fn; struct tag *pos; int err = 0; + encoder->cu = cu; + encoder->type_id_off = btf__type_cnt(encoder->btf) - 1; if (!encoder->has_index_type) { /* cu__find_base_type_by_name() takes "type_id_t *id" */ type_id_t id; if (cu__find_base_type_by_name(cu, "int", &id)) { encoder->has_index_type = true; - encoder->array_index_id = type_id_off + id; + encoder->array_index_id = encoder->type_id_off + id; } else { encoder->has_index_type = false; - encoder->array_index_id = type_id_off + cu->types_table.nr_entries; + encoder->array_index_id = encoder->type_id_off + cu->types_table.nr_entries; } } cu__for_each_type(cu, core_id, pos) { - btf_type_id = btf_encoder__encode_tag(encoder, pos, type_id_off); + btf_type_id = btf_encoder__encode_tag(encoder, pos, conf_load); + + if (btf_type_id == 0) { + ++skipped_types; + continue; + } if (btf_type_id < 0 || - tag__check_id_drift(pos, core_id, btf_type_id, type_id_off)) { + tag__check_id_drift(encoder, pos, core_id, btf_type_id + skipped_types)) { err = -1; goto out; } @@ -1439,7 +1768,8 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu) bt.name = 0; bt.bit_size = 32; - btf_encoder__add_base_type(encoder, &bt, "__ARRAY_SIZE_TYPE__"); + bt.is_signed = true; + btf_encoder__add_base_type(encoder, &bt, "int"); encoder->has_index_type = true; } @@ -1461,7 +1791,7 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu) continue; } - btf_type_id = type_id_off + core_id; + btf_type_id = encoder->type_id_off + core_id; ns = tag__namespace(pos); list_for_each_entry(annot, &ns->annots, node) { tag_type_id = btf_encoder__add_decl_tag(encoder, annot->value, btf_type_id, annot->component_idx); @@ -1474,8 +1804,8 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu) } cu__for_each_function(cu, core_id, fn) { - int btf_fnproto_id, btf_fn_id; - const char *name; + struct elf_function *func = NULL; + bool save = false; /* * Skip functions that: @@ -1489,43 +1819,73 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu) if (!ftype__has_arg_names(&fn->proto)) continue; if (encoder->functions.cnt) { - struct elf_function *func; const char *name; name = function__name(fn); if (!name) continue; - func = btf_encoder__find_function(encoder, name); - if (!func || func->generated) + /* prefer exact function name match... */ + func = btf_encoder__find_function(encoder, name, 0); + if (func) { + if (func->generated) + continue; + if (conf_load->skip_encoding_btf_inconsistent_proto) + save = true; + else + func->generated = true; + } else if (encoder->functions.suffix_cnt && + conf_load->btf_gen_optimized) { + /* falling back to name.isra.0 match if no exact + * match is found; only bother if we found any + * .suffix function names. The function + * will be saved and added once we ensure + * it does not have optimized-out parameters + * in any cu. + */ + func = btf_encoder__find_function(encoder, name, + strlen(name)); + if (func) { + save = true; + if (encoder->verbose) + printf("matched function '%s' with '%s'%s\n", + name, func->name, + fn->proto.optimized_parms ? + ", has optimized-out parameters" : + fn->proto.unexpected_reg ? ", has unexpected register use by params" : + ""); + fn->alias = func->name; + } + } + if (!func) continue; - func->generated = true; } else { if (!fn->external) continue; } - btf_fnproto_id = btf_encoder__add_func_proto(encoder, &fn->proto, type_id_off); - name = function__name(fn); - btf_fn_id = btf_encoder__add_ref_type(encoder, BTF_KIND_FUNC, btf_fnproto_id, name, false); - if (btf_fnproto_id < 0 || btf_fn_id < 0) { - err = -1; - printf("error: failed to encode function '%s'\n", function__name(fn)); + if (save) + err = btf_encoder__save_func(encoder, fn, func); + else + err = btf_encoder__add_func(encoder, fn); + if (err) goto out; - } - - list_for_each_entry(annot, &fn->annots, node) { - tag_type_id = btf_encoder__add_decl_tag(encoder, annot->value, btf_fn_id, annot->component_idx); - if (tag_type_id < 0) { - fprintf(stderr, "error: failed to encode tag '%s' to func %s with component_idx %d\n", - annot->value, name, annot->component_idx); - goto out; - } - } } if (!encoder->skip_encoding_vars) - err = btf_encoder__encode_cu_variables(encoder, cu, type_id_off); + err = btf_encoder__encode_cu_variables(encoder); + + /* It is only safe to delete this CU if we have not stashed any static + * functions for later addition. + */ + if (!err) + err = encoder->saved_func_cnt > 0 ? LSK__KEEPIT : LSK__DELETE; out: + encoder->cu = NULL; return err; } + +struct btf *btf_encoder__btf(struct btf_encoder *encoder) +{ + return encoder->btf; +} diff --git a/btf_encoder.h b/btf_encoder.h index f133b0d..34516bb 100644 --- a/btf_encoder.h +++ b/btf_encoder.h @@ -21,12 +21,10 @@ void btf_encoder__delete(struct btf_encoder *encoder); int btf_encoder__encode(struct btf_encoder *encoder); -int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu); +int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu, struct conf_load *conf_load); -void btf_encoders__add(struct list_head *encoders, struct btf_encoder *encoder); +struct btf *btf_encoder__btf(struct btf_encoder *encoder); -struct btf_encoder *btf_encoders__first(struct list_head *encoders); - -struct btf_encoder *btf_encoders__next(struct btf_encoder *encoder); +int btf_encoder__add_encoder(struct btf_encoder *encoder, struct btf_encoder *other); #endif /* _BTF_ENCODER_H_ */ diff --git a/btf_loader.c b/btf_loader.c index 7a5b16f..3fe07d0 100644 --- a/btf_loader.c +++ b/btf_loader.c @@ -108,6 +108,7 @@ static struct base_type *base_type__new(const char *name, uint32_t attrs, bt->is_bool = attrs & BTF_INT_BOOL; bt->name_has_encoding = false; bt->float_type = float_type; + INIT_LIST_HEAD(&bt->node); } return bt; } @@ -312,6 +313,56 @@ out_free: return -ENOMEM; } +#if LIBBPF_MAJOR_VERSION >= 1 +static struct enumerator *enumerator__new64(const char *name, uint64_t value) +{ + struct enumerator *en = tag__alloc(sizeof(*en)); + + if (en != NULL) { + en->name = name; + en->value = value; // Value is already 64-bit, as this is used with DWARF as well + en->tag.tag = DW_TAG_enumerator; + } + + return en; +} + +static int create_new_enumeration64(struct cu *cu, const struct btf_type *tp, uint32_t id) +{ + struct btf_enum64 *ep = btf_enum64(tp); + uint16_t i, vlen = btf_vlen(tp); + struct type *enumeration = type__new(DW_TAG_enumeration_type, + cu__btf_str(cu, tp->name_off), + tp->size ? tp->size * 8 : (sizeof(int) * 8)); + + if (enumeration == NULL) + return -ENOMEM; + + for (i = 0; i < vlen; i++) { + const char *name = cu__btf_str(cu, ep[i].name_off); + uint64_t value = btf_enum64_value(&ep[i]); + struct enumerator *enumerator = enumerator__new64(name, value); + + if (enumerator == NULL) + goto out_free; + + enumeration__add(enumeration, enumerator); + } + + cu__add_tag_with_id(cu, &enumeration->namespace.tag, id); + + return 0; +out_free: + enumeration__delete(enumeration); + return -ENOMEM; +} +#else +static int create_new_enumeration64(struct cu *cu __maybe_unused, const struct btf_type *tp __maybe_unused, uint32_t id __maybe_unused) +{ + return -ENOTSUP; +} +#endif + static int create_new_subroutine_type(struct cu *cu, const struct btf_type *tp, uint32_t id) { struct ftype *proto = tag__alloc(sizeof(*proto)); @@ -378,10 +429,11 @@ static int create_new_tag(struct cu *cu, int type, const struct btf_type *tp, ui return -ENOMEM; switch (type) { - case BTF_KIND_CONST: tag->tag = DW_TAG_const_type; break; - case BTF_KIND_PTR: tag->tag = DW_TAG_pointer_type; break; - case BTF_KIND_RESTRICT: tag->tag = DW_TAG_restrict_type; break; - case BTF_KIND_VOLATILE: tag->tag = DW_TAG_volatile_type; break; + case BTF_KIND_CONST: tag->tag = DW_TAG_const_type; break; + case BTF_KIND_PTR: tag->tag = DW_TAG_pointer_type; break; + case BTF_KIND_RESTRICT: tag->tag = DW_TAG_restrict_type; break; + case BTF_KIND_VOLATILE: tag->tag = DW_TAG_volatile_type; break; + case BTF_KIND_TYPE_TAG: tag->tag = DW_TAG_LLVM_annotation; break; default: free(tag); printf("%s: Unknown type %d\n\n", __func__, type); @@ -399,7 +451,7 @@ static int btf__load_types(struct btf *btf, struct cu *cu) uint32_t type_index; int err; - for (type_index = 1; type_index <= btf__get_nr_types(btf); type_index++) { + for (type_index = 1; type_index < btf__type_cnt(btf); type_index++) { const struct btf_type *type_ptr = btf__type_by_id(btf, type_index); uint32_t type = btf_kind(type_ptr); @@ -419,6 +471,9 @@ static int btf__load_types(struct btf *btf, struct cu *cu) case BTF_KIND_ENUM: err = create_new_enumeration(cu, type_ptr, type_index); break; + case BTF_KIND_ENUM64: + err = create_new_enumeration64(cu, type_ptr, type_index); + break; case BTF_KIND_FWD: err = create_new_forward_decl(cu, type_ptr, type_index); break; @@ -435,6 +490,12 @@ static int btf__load_types(struct btf *btf, struct cu *cu) case BTF_KIND_PTR: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + /* For type tag it's a bit of a lie. + * In DWARF it is encoded as a child tag of whatever type it + * applies to. Here we load it as a standalone tag with a pointer + * to a next type only to have a valid ID in the types table. + */ + case BTF_KIND_TYPE_TAG: err = create_new_tag(cu, type, type_ptr, type_index); break; case BTF_KIND_UNKN: @@ -624,7 +685,7 @@ static int cus__load_btf(struct cus *cus, struct conf_load *conf, const char *fi * The app stole this cu, possibly deleting it, * so forget about it */ - if (conf && conf->steal && conf->steal(cu, conf)) + if (conf && conf->steal && conf->steal(cu, conf, NULL)) return 0; cus__add(cus, cu); @@ -30,6 +30,7 @@ ${pahole_bin} -F dwarf \ --suppress_aligned_attribute \ --suppress_force_paddings \ --suppress_packed \ + --lang_exclude rust \ --show_private_classes $dwarf_input > $dwarf_output ${pahole_bin} -F btf \ --sort \ diff --git a/changes-v1.24 b/changes-v1.24 new file mode 100644 index 0000000..c6e19f6 --- /dev/null +++ b/changes-v1.24 @@ -0,0 +1,36 @@ +BTF encoder: + +- Add support to BTF_KIND_ENUM64 to represent enumeration entries + with more than 32 bits. + +- Support multithreaded encoding, in addition to DWARF + multithreaded loading, speeding up the process. + + Selected just like DWARF multithreaded loading, using the + 'pahole -j' option. + +- Encode 'char' type as signed. + +BTF Loader: + +- Add support to BTF_KIND_ENUM64. + +pahole: + +- Introduce --lang and --lang_exclude to specify the language the + DWARF compile units were originated from to use or filter. + + Use case is to exclude Rust compile units while aspects of the + DWARF generated for it get sorted out in a way that the kernel + BPF verifier don't refuse loading the BTF generated from them. + +- Introduce --compile to generate compilable code in a similar fashion to: + + bpftool btf dump file vmlinux format c > vmlinux.h + + As with 'bpftool', this will notice type shadowing, i.e. multiple types + with the same name and will disambiguate by adding a suffix. + +- Don't segfault when processing bogus files. + +Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> diff --git a/changes-v1.25 b/changes-v1.25 new file mode 100644 index 0000000..9534996 --- /dev/null +++ b/changes-v1.25 @@ -0,0 +1,58 @@ +DWARF loader: + +- Support for DW_TAG_unspecified_type more generally, that in binutils 2.40 is used + for assembly functions, resulting in BTF encoding problems when building the Linux + kernel. + +- Make sure struct member offsets are in ascending order. This is part of the set of + changes to support encoding BTF for Rust for use with the Linux kernel, where the + BTF verifier considers invalid offset unordered struct members. + +- Support C atomic types (DW_TAG_atomic_type), that are not used in the Linux kernel but + is present in user space components such as Open VSwitch. + +BTF loader: + +- Initial support for DW_TAG_LLVM_annotation, used for BTF type tags, to encode things + like __rcu, __user annotations in the Linux kernel. This is still in flux with changes + in how these are encoded that resulted from the discussion to support this in gcc in + addition to in clang, where it was first designed. + +BTF encoder: + +- Exclude functions with the same name (static functions in different CUs), + inconsistent prototypes or not following calling convention. + +- Allow generation of BTF for optimized functions, those that end with a .isra* + suffix (inter procedural scalar replacement of aggregates) or .constprop* + (constant propagation). + +Pretty printer: + +- For now the DW_TAG_LLVM_annotation tags are being suppressed, so the output from + BTF and DWARF matches, further work is planned to support it so that the output + matches the original source code and can be recompilable, resulting in the same + DWARF info. + +- Support C atomic types, allowing the generation of source code that can be + compiled with resulting DWARF info matching the original source code. + +pahole: + +- Support --lang=/--lang_exclude=asm, the DW_LANG_ define for assembly is out + of order, special case it to support asking for CUs written in assembly to be + selected or excluded. + +- Support suppressing the atomic type modifiers/attributes. + +- Allow filtering out functions optimized by the compiler, where the calling convention isn't + the one expected by BPF or arguments are optimized out. + +- Support --compile from DWARF in addition to from BTF, this allows user space components + such as Open VSwitch to use pahole to generate compilable code for its data structures. + +btfdiff: + +- Exclude RUST CUs, as those are not yet being BTF encoded. + +Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> @@ -433,7 +433,7 @@ static void show_changed_member(char change, const struct class_member *member, const struct tag *type = cu__type(cu, member->tag.type); char bf[128]; - tag__assert_search_result(type); + tag__assert_search_result(type, member->tag.tag, class_member__name(member)); printf(" %c%-26s %-21s /* %5u %5zd */\n", change, tag__name(type, cu, bf, sizeof(bf), NULL), class_member__name(member), diff --git a/ctf_loader.c b/ctf_loader.c index 7c34739..2570b09 100644 --- a/ctf_loader.c +++ b/ctf_loader.c @@ -157,6 +157,7 @@ static struct base_type *base_type__new(const char *name, uint32_t attrs, bt->is_varargs = attrs & CTF_TYPE_INT_VARARGS; bt->name_has_encoding = false; bt->float_type = float_type; + INIT_LIST_HEAD(&bt->node); } return bt; } @@ -722,7 +723,7 @@ int ctf__load_file(struct cus *cus, struct conf_load *conf, * The app stole this cu, possibly deleting it, * so forget about it */ - if (conf && conf->steal && conf->steal(cu, conf)) + if (conf && conf->steal && conf->steal(cu, conf, NULL)) return 0; cus__add(cus, cu); @@ -357,7 +357,7 @@ static struct class *class__clone_base_types(const struct tag *tag, type__for_each_data_member_safe(&clone->type, pos, next) { struct tag *member_type = cu__type(cu, pos->tag.type); - tag__assert_search_result(member_type); + tag__assert_search_result(member_type, pos->tag.tag, class_member__name(pos)); if (!tag__is_base_type(member_type, cu)) { next = class__remove_member(clone, cu, pos); class_member__delete(pos); @@ -498,7 +498,7 @@ static struct tag *pointer_filter(struct tag *tag, struct cu *cu, type__for_each_member(type, pos) { struct tag *ctype = cu__type(cu, pos->tag.type); - tag__assert_search_result(ctype); + tag__assert_search_result(ctype, pos->tag.tag, class_member__name(pos)); if (tag__is_pointer_to(ctype, target_type_id)) return tag; } @@ -612,7 +612,7 @@ static void emit_list_of_types(struct list_head *list) * Lets look at the other CUs, perhaps we have already * emmited this one */ - if (type_emissions__find_definition(&emissions, structure__name(pos))) { + if (type_emissions__find_definition(&emissions, type__tag(type)->tag, structure__name(pos))) { type->definition_emitted = 1; continue; } @@ -687,7 +687,7 @@ static int function__emit_probes(struct function *func, uint32_t function_id, list_for_each_entry(pos, &func->proto.parms, tag.node) { struct tag *type = cu__type(cu, pos->tag.type); - tag__assert_search_result(type); + tag__assert_search_result(type, pos->tag.tag, parameter__name(pos)); if (!tag__is_pointer_to(type, target_type_id)) continue; @@ -762,7 +762,7 @@ static int cu_emit_pointer_probes_iterator(struct cu *cu, void *cookie) type__for_each_member(tag__type(pointer), pos_member) { struct tag *ctype = cu__type(cu, pos_member->tag.type); - tag__assert_search_result(ctype); + tag__assert_search_result(ctype, pos_member->tag.tag, class_member__name(pos_member)); if (tag__is_pointer_to(ctype, target_type_id)) break; } @@ -960,7 +960,7 @@ failure: goto out; } - type_emissions__init(&emissions); + type_emissions__init(&emissions, NULL); /* * Create the methods_cus (Compilation Units) object where we will @@ -344,4 +344,8 @@ void __zfree(void **ptr); #define zfree(ptr) __zfree((void **)(ptr)) +#ifndef BTF_KIND_ENUM64 +#define BTF_KIND_ENUM64 19 +#endif + #endif /* _DUTIL_H_ */ diff --git a/dwarf_loader.c b/dwarf_loader.c index 2ea901c..ccf3194 100644 --- a/dwarf_loader.c +++ b/dwarf_loader.c @@ -8,6 +8,7 @@ #include <dirent.h> #include <dwarf.h> #include <elfutils/libdwfl.h> +#include <elfutils/version.h> #include <errno.h> #include <fcntl.h> #include <fnmatch.h> @@ -52,6 +53,10 @@ #define DW_OP_addrx 0xa1 #endif +#ifndef EM_RISCV +#define EM_RISCV 243 +#endif + static pthread_mutex_t libdw__lock = PTHREAD_MUTEX_INITIALIZER; static uint32_t hashtags__bits = 12; @@ -560,10 +565,11 @@ static struct base_type *base_type__new(Dwarf_Die *die, struct cu *cu, struct co bt->bit_size = attr_numeric(die, DW_AT_byte_size) * 8; uint64_t encoding = attr_numeric(die, DW_AT_encoding); bt->is_bool = encoding == DW_ATE_boolean; - bt->is_signed = encoding == DW_ATE_signed; + bt->is_signed = (encoding == DW_ATE_signed) || (encoding == DW_ATE_signed_char); bt->is_varargs = false; bt->name_has_encoding = true; bt->float_type = encoding_to_float_type(encoding); + INIT_LIST_HEAD(&bt->node); } return bt; @@ -632,6 +638,18 @@ static void type__init(struct type *type, Dwarf_Die *die, struct cu *cu, struct type->resized = 0; type->nr_members = 0; type->nr_static_members = 0; + type->is_signed_enum = 0; + + Dwarf_Attribute attr; + if (dwarf_attr(die, DW_AT_type, &attr) != NULL) { + Dwarf_Die type_die; + if (dwarf_formref_die(&attr, &type_die) != NULL) { + uint64_t encoding = attr_numeric(&type_die, DW_AT_encoding); + + if (encoding == DW_ATE_signed || encoding == DW_ATE_signed_char) + type->is_signed_enum = 1; + } + } } static struct type *type__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) @@ -734,6 +752,19 @@ static struct variable *variable__new(Dwarf_Die *die, struct cu *cu, struct conf return var; } +static struct constant *constant__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) +{ + struct constant *constant = tag__alloc(cu, sizeof(*constant)); + + if (constant != NULL) { + tag__init(&constant->tag, cu, die); + constant->name = attr_string(die, DW_AT_name, conf); + constant->value = attr_numeric(die, DW_AT_const_value); + } + + return constant; +} + static int tag__recode_dwarf_bitfield(struct tag *tag, struct cu *cu, uint16_t bit_size) { int id; @@ -770,7 +801,8 @@ static int tag__recode_dwarf_bitfield(struct tag *tag, struct cu *cu, uint16_t b break; case DW_TAG_const_type: - case DW_TAG_volatile_type: { + case DW_TAG_volatile_type: + case DW_TAG_atomic_type: { const struct dwarf_tag *dtag = tag->priv; struct dwarf_tag *dtype = dwarf_cu__find_type_by_ref(cu->priv, &dtag->type); @@ -978,13 +1010,160 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu, return member; } -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) +/* How many function parameters are passed via registers? Used below in + * determining if an argument has been optimized out or if it is simply + * an argument > cu__nr_register_params(). Making cu__nr_register_params() + * return 0 allows unsupported architectures to skip tagging optimized-out + * values. + */ +static int arch__nr_register_params(const GElf_Ehdr *ehdr) +{ + switch (ehdr->e_machine) { + case EM_S390: return 5; + case EM_SPARC: + case EM_SPARCV9: + case EM_X86_64: return 6; + case EM_AARCH64: + case EM_ARC: + case EM_ARM: + case EM_MIPS: + case EM_PPC: + case EM_PPC64: + case EM_RISCV: return 8; + default: break; + } + + return 0; +} + +/* map from parameter index (0 for first, ...) to expected DW_OP_reg. + * This will allow us to identify cases where optimized-out parameters + * interfere with expectations about register contents on function + * entry. + */ +static void arch__set_register_params(const GElf_Ehdr *ehdr, struct cu *cu) +{ + memset(cu->register_params, -1, sizeof(cu->register_params)); + + switch (ehdr->e_machine) { + case EM_S390: + /* https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf */ + cu->register_params[0] = DW_OP_reg2; // %r2 + cu->register_params[1] = DW_OP_reg3; // %r3 + cu->register_params[2] = DW_OP_reg4; // %r4 + cu->register_params[3] = DW_OP_reg5; // %r5 + cu->register_params[4] = DW_OP_reg6; // %r6 + return; + case EM_X86_64: + /* //en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI */ + cu->register_params[0] = DW_OP_reg5; // %rdi + cu->register_params[1] = DW_OP_reg4; // %rsi + cu->register_params[2] = DW_OP_reg1; // %rdx + cu->register_params[3] = DW_OP_reg2; // %rcx + cu->register_params[4] = DW_OP_reg8; // %r8 + cu->register_params[5] = DW_OP_reg9; // %r9 + return; + case EM_ARM: + /* https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#machine-registers */ + case EM_AARCH64: + /* https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#machine-registers */ + cu->register_params[0] = DW_OP_reg0; + cu->register_params[1] = DW_OP_reg1; + cu->register_params[2] = DW_OP_reg2; + cu->register_params[3] = DW_OP_reg3; + cu->register_params[4] = DW_OP_reg4; + cu->register_params[5] = DW_OP_reg5; + cu->register_params[6] = DW_OP_reg6; + cu->register_params[7] = DW_OP_reg7; + return; + default: + return; + } +} + +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, + struct conf_load *conf, int param_idx) { struct parameter *parm = tag__alloc(cu, sizeof(*parm)); if (parm != NULL) { + Dwarf_Addr base, start, end; + bool has_const_value; + Dwarf_Attribute attr; + struct location loc; + tag__init(&parm->tag, cu, die); parm->name = attr_string(die, DW_AT_name, conf); + + if (param_idx >= cu->nr_register_params || param_idx < 0) + return parm; + /* Parameters which use DW_AT_abstract_origin to point at + * the original parameter definition (with no name in the DIE) + * are the result of later DWARF generation during compilation + * so often better take into account if arguments were + * optimized out. + * + * By checking that locations for parameters that are expected + * to be passed as registers are actually passed as registers, + * we can spot optimized-out parameters. + * + * It can also be the case that a parameter DIE has + * a constant value attribute reflecting optimization or + * has no location attribute. + * + * From the DWARF spec: + * + * "4.1.10 + * + * A DW_AT_const_value attribute for an entry describing a + * variable or formal parameter whose value is constant and not + * represented by an object in the address space of the program, + * or an entry describing a named constant. (Note + * that such an entry does not have a location attribute.)" + * + * So we can also use the absence of a location for a parameter + * as evidence it has been optimized out. This info will + * need to be shared between a parameter and any abstract + * origin references however, since gcc can have location + * information in the parameter that refers back to the original + * via abstract origin, so we need to share location presence + * between these parameter representations. See + * ftype__recode_dwarf_types() below for how this is handled. + */ + has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL; + parm->has_loc = dwarf_attr(die, DW_AT_location, &attr) != NULL; + /* dwarf_getlocations() handles location lists; here we are + * only interested in the first expr. + */ + if (parm->has_loc && +#if _ELFUTILS_PREREQ(0, 157) + dwarf_getlocations(&attr, 0, &base, &start, &end, + &loc.expr, &loc.exprlen) > 0 && +#else + dwarf_getlocation(&attr, &loc.expr, &loc.exprlen) == 0 && +#endif + loc.exprlen != 0) { + int expected_reg = cu->register_params[param_idx]; + Dwarf_Op *expr = loc.expr; + + switch (expr->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + /* mark parameters that use an unexpected + * register to hold a parameter; these will + * be problematic for users of BTF as they + * violate expectations about register + * contents. + */ + if (expected_reg >= 0 && expected_reg != expr->atom) + parm->unexpected_reg = 1; + break; + default: + parm->optimized = 1; + break; + } + } else if (has_const_value) { + parm->optimized = 1; + } } return parm; @@ -1107,6 +1286,7 @@ static void ftype__init(struct ftype *ftype, Dwarf_Die *die, struct cu *cu) assert(tag == DW_TAG_subprogram || tag == DW_TAG_subroutine_type); #endif tag__init(&ftype->tag, cu, die); + ftype->byte_size = attr_numeric(die, DW_AT_byte_size); INIT_LIST_HEAD(&ftype->parms); ftype->nr_parms = 0; ftype->unspec_parms = 0; @@ -1436,7 +1616,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die, struct cu *cu, struct conf_load *conf, int param_idx) { - struct parameter *parm = parameter__new(die, cu, conf); + struct parameter *parm = parameter__new(die, cu, conf, param_idx); if (parm == NULL) return NULL; @@ -1491,6 +1671,16 @@ static struct tag *die__create_new_variable(Dwarf_Die *die, struct cu *cu, struc return &var->ip.tag; } +static struct tag *die__create_new_constant(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) +{ + struct constant *constant = constant__new(die, cu, conf); + + if (constant == NULL) + return NULL; + + return &constant->tag; +} + static struct tag *die__create_new_subroutine_type(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) { @@ -1993,8 +2183,10 @@ static struct tag *__die__process_tag(Dwarf_Die *die, struct cu *cu, case DW_TAG_imported_module: case DW_TAG_reference_type: case DW_TAG_restrict_type: - case DW_TAG_unspecified_type: case DW_TAG_volatile_type: + case DW_TAG_atomic_type: + tag = die__create_new_tag(die, cu); break; + case DW_TAG_unspecified_type: tag = die__create_new_tag(die, cu); break; case DW_TAG_pointer_type: tag = die__create_new_pointer_tag(die, cu, conf); break; @@ -2019,6 +2211,8 @@ static struct tag *__die__process_tag(Dwarf_Die *die, struct cu *cu, tag = die__create_new_union(die, cu, conf); break; case DW_TAG_variable: tag = die__create_new_variable(die, cu, conf); break; + case DW_TAG_constant: // First seen in a Go CU + tag = die__create_new_constant(die, cu, conf); break; default: __cu__tag_not_handled(die, fn); /* fall thru */ @@ -2058,8 +2252,24 @@ static int die__process_unit(Dwarf_Die *die, struct cu *cu, struct conf_load *co continue; } - uint32_t id; - cu__add_tag(cu, tag, &id); + uint32_t id = 0; + /* There is no BTF representation for unspecified types. + * Currently we want such types to be represented as `void` + * (and thus skip BTF encoding). + * + * As BTF encoding is skipped, such types must not be added to type table, + * otherwise an ID for a type would be allocated and we would be forced + * to put something in BTF at this ID. + * Thus avoid `cu__add_tag()` call for such types. + * + * On the other hand, there might be references to this type from other + * tags, so `dwarf_cu__find_tag_by_ref()` must return something. + * Thus call `cu__hash()` for such types. + * + * Note, that small_id of zero would be assigned to unspecified type entry. + */ + if (tag->tag != DW_TAG_unspecified_type) + cu__add_tag(cu, tag, &id); cu__hash(cu, tag); struct dwarf_tag *dtag = tag->priv; dtag->small_id = id; @@ -2175,6 +2385,7 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu) ftype__for_each_parameter(type, pos) { struct dwarf_tag *dpos = pos->tag.priv; + struct parameter *opos; struct dwarf_tag *dtype; if (dpos->type.off == 0) { @@ -2188,8 +2399,22 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu) tag__print_abstract_origin_not_found(&pos->tag); continue; } - pos->name = tag__parameter(dtype->tag)->name; + opos = tag__parameter(dtype->tag); + pos->name = opos->name; pos->tag.type = dtype->tag->type; + /* share location information between parameter and + * abstract origin; if neither have location, we will + * mark the parameter as optimized out. Also share + * info regarding unexpected register use for + * parameters. + */ + if (pos->has_loc) + opos->has_loc = pos->has_loc; + + if (pos->optimized) + opos->optimized = pos->optimized; + if (pos->unexpected_reg) + opos->unexpected_reg = pos->unexpected_reg; continue; } @@ -2216,9 +2441,15 @@ static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu) lexblock__recode_dwarf_types(tag__lexblock(pos), cu); continue; case DW_TAG_inlined_subroutine: - dtype = dwarf_cu__find_tag_by_ref(dcu, &dpos->type); + if (dpos->type.off != 0) + dtype = dwarf_cu__find_tag_by_ref(dcu, &dpos->type); + else + dtype = dwarf_cu__find_tag_by_ref(dcu, &dpos->abstract_origin); if (dtype == NULL) { - tag__print_type_not_found(pos); + if (dpos->type.off != 0) + tag__print_type_not_found(pos); + else + tag__print_abstract_origin_not_found(pos); continue; } ftype__recode_dwarf_types(dtype->tag, cu); @@ -2459,18 +2690,70 @@ out: return 0; } -static int cu__resolve_func_ret_types(struct cu *cu) +static bool param__is_struct(struct cu *cu, struct tag *tag) +{ + struct tag *type = cu__type(cu, tag->type); + + if (!type) + return false; + + switch (type->tag) { + case DW_TAG_structure_type: + return true; + case DW_TAG_const_type: + case DW_TAG_typedef: + /* handle "typedef struct", const parameter */ + return param__is_struct(cu, type); + default: + return false; + } +} + +static int cu__resolve_func_ret_types_optimized(struct cu *cu) { struct ptr_table *pt = &cu->functions_table; uint32_t i; for (i = 0; i < pt->nr_entries; ++i) { struct tag *tag = pt->entries[i]; + struct parameter *pos; + struct function *fn = tag__function(tag); + bool has_unexpected_reg = false, has_struct_param = false; + + /* mark function as optimized if parameter is, or + * if parameter does not have a location; at this + * point location presence has been marked in + * abstract origins for cases where a parameter + * location is not stored in the original function + * parameter tag. + * + * Also mark functions which, due to optimization, + * use an unexpected register for a parameter. + * Exception is functions which have a struct + * as a parameter, as multiple registers may + * be used to represent it, throwing off register + * to parameter mapping. + */ + ftype__for_each_parameter(&fn->proto, pos) { + if (pos->optimized || !pos->has_loc) + fn->proto.optimized_parms = 1; + + if (pos->unexpected_reg) + has_unexpected_reg = true; + } + if (has_unexpected_reg) { + ftype__for_each_parameter(&fn->proto, pos) { + has_struct_param = param__is_struct(cu, &pos->tag); + if (has_struct_param) + break; + } + if (!has_struct_param) + fn->proto.unexpected_reg = 1; + } if (tag == NULL || tag->type != 0) continue; - struct function *fn = tag__function(tag); if (!fn->abstract_origin) continue; @@ -2498,6 +2781,7 @@ static int cu__recode_dwarf_types_table(struct cu *cu, if (tag__recode_dwarf_type(tag, cu)) return -1; } + return 0; } @@ -2592,7 +2876,7 @@ static int die__process_and_recode(Dwarf_Die *die, struct cu *cu, struct conf_lo if (ret != 0) return ret; - return cu__resolve_func_ret_types(cu); + return cu__resolve_func_ret_types_optimized(cu); } static int class_member__cache_byte_size(struct tag *tag, struct cu *cu, @@ -2694,18 +2978,60 @@ static int class_member__cache_byte_size(struct tag *tag, struct cu *cu, return 0; } -static int cu__finalize(struct cu *cu, struct conf_load *conf) +static bool cu__language_reorders_offsets(const struct cu *cu) +{ + return cu->language == DW_LANG_Rust; +} + +static int type__sort_by_offset(struct tag *tag, struct cu *cu, void *cookie __maybe_unused) +{ + if (!tag__is_type(tag)) + return 0; + + struct type *type = tag__type(tag); + struct class_member *current_member; + + // There may be more than DW_TAG_members entries in the type tags, so do a simple + // bubble sort for now, so that the other non tags stay where they are. +restart: + type__for_each_data_member(type, current_member) { + if (list_is_last(¤t_member->tag.node, &type->namespace.tags)) + break; + + struct class_member *next_member = list_entry(current_member->tag.node.next, typeof(*current_member), tag.node); + + if (current_member->byte_offset <= next_member->byte_offset) + continue; + + list_del(¤t_member->tag.node); + list_add(¤t_member->tag.node, &next_member->tag.node); + goto restart; + } + + return 0; +} + +static void cu__sort_types_by_offset(struct cu *cu, struct conf_load *conf) +{ + cu__for_all_tags(cu, type__sort_by_offset, conf); +} + +static int cu__finalize(struct cu *cu, struct conf_load *conf, void *thr_data) { cu__for_all_tags(cu, class_member__cache_byte_size, conf); + + if (cu__language_reorders_offsets(cu)) + cu__sort_types_by_offset(cu, conf); + if (conf && conf->steal) { - return conf->steal(cu, conf); + return conf->steal(cu, conf, thr_data); } return LSK__KEEPIT; } -static int cus__finalize(struct cus *cus, struct cu *cu, struct conf_load *conf) +static int cus__finalize(struct cus *cus, struct cu *cu, struct conf_load *conf, void *thr_data) { - int lsk = cu__finalize(cu, conf); + int lsk = cu__finalize(cu, conf, thr_data); switch (lsk) { case LSK__DELETE: cu__delete(cu); @@ -2733,6 +3059,8 @@ static int cu__set_common(struct cu *cu, struct conf_load *conf, return DWARF_CB_ABORT; cu->little_endian = ehdr.e_ident[EI_DATA] == ELFDATA2LSB; + cu->nr_register_params = arch__nr_register_params(&ehdr); + arch__set_register_params(&ehdr, cu); return 0; } @@ -2788,8 +3116,8 @@ static int __cus__load_debug_types(struct conf_load *conf, Dwfl_Module *mod, Dwa return 0; } -/* Match the define in linux:include/linux/elfnote.h */ -#define LINUX_ELFNOTE_BUILD_LTO 0x101 +/* Match the define in linux:include/linux/elfnote-lto.h */ +#define LINUX_ELFNOTE_LTO_INFO 0x101 static bool cus__merging_cu(Dwarf *dw, Elf *elf) { @@ -2807,7 +3135,7 @@ static bool cus__merging_cu(Dwarf *dw, Elf *elf) size_t name_off, desc_off, offset = 0; GElf_Nhdr hdr; while ((offset = gelf_getnote(data, offset, &hdr, &name_off, &desc_off)) != 0) { - if (hdr.n_type != LINUX_ELFNOTE_BUILD_LTO) + if (hdr.n_type != LINUX_ELFNOTE_LTO_INFO) continue; /* owner is Linux */ @@ -2874,7 +3202,13 @@ struct dwarf_cus { struct dwarf_cu *type_dcu; }; -static int dwarf_cus__create_and_process_cu(struct dwarf_cus *dcus, Dwarf_Die *cu_die, uint8_t pointer_size) +struct dwarf_thread { + struct dwarf_cus *dcus; + void *data; +}; + +static int dwarf_cus__create_and_process_cu(struct dwarf_cus *dcus, Dwarf_Die *cu_die, + uint8_t pointer_size, void *thr_data) { /* * DW_AT_name in DW_TAG_compile_unit can be NULL, first seen in: @@ -2896,7 +3230,7 @@ static int dwarf_cus__create_and_process_cu(struct dwarf_cus *dcus, Dwarf_Die *c cu->dfops = &dwarf__ops; if (die__process_and_recode(cu_die, cu, dcus->conf) != 0 || - cus__finalize(dcus->cus, cu, dcus->conf) == LSK__STOP_LOADING) + cus__finalize(dcus->cus, cu, dcus->conf, thr_data) == LSK__STOP_LOADING) return DWARF_CB_ABORT; return DWARF_CB_OK; @@ -2930,7 +3264,8 @@ out_unlock: static void *dwarf_cus__process_cu_thread(void *arg) { - struct dwarf_cus *dcus = arg; + struct dwarf_thread *dthr = arg; + struct dwarf_cus *dcus = dthr->dcus; uint8_t pointer_size, offset_size; Dwarf_Die die_mem, *cu_die; @@ -2938,11 +3273,13 @@ static void *dwarf_cus__process_cu_thread(void *arg) if (cu_die == NULL) break; - if (dwarf_cus__create_and_process_cu(dcus, cu_die, pointer_size) == DWARF_CB_ABORT) + if (dwarf_cus__create_and_process_cu(dcus, cu_die, + pointer_size, dthr->data) == DWARF_CB_ABORT) goto out_abort; } - if (dcus->conf->thread_exit && dcus->conf->thread_exit() != 0) + if (dcus->conf->thread_exit && + dcus->conf->thread_exit(dcus->conf, dthr->data) != 0) goto out_abort; return (void *)DWARF_CB_OK; @@ -2953,10 +3290,26 @@ out_abort: static int dwarf_cus__threaded_process_cus(struct dwarf_cus *dcus) { pthread_t threads[dcus->conf->nr_jobs]; + struct dwarf_thread dthr[dcus->conf->nr_jobs]; + void *thread_data[dcus->conf->nr_jobs]; + int res; int i; + if (dcus->conf->threads_prepare) { + res = dcus->conf->threads_prepare(dcus->conf, dcus->conf->nr_jobs, thread_data); + if (res != 0) + return res; + } else { + memset(thread_data, 0, sizeof(void *) * dcus->conf->nr_jobs); + } + for (i = 0; i < dcus->conf->nr_jobs; ++i) { - dcus->error = pthread_create(&threads[i], NULL, dwarf_cus__process_cu_thread, dcus); + dthr[i].dcus = dcus; + dthr[i].data = thread_data[i]; + + dcus->error = pthread_create(&threads[i], NULL, + dwarf_cus__process_cu_thread, + &dthr[i]); if (dcus->error) goto out_join; } @@ -2972,6 +3325,13 @@ out_join: dcus->error = (long)res; } + if (dcus->conf->threads_collect) { + res = dcus->conf->threads_collect(dcus->conf, dcus->conf->nr_jobs, + thread_data, dcus->error); + if (dcus->error == 0) + dcus->error = res; + } + return dcus->error; } @@ -2988,7 +3348,8 @@ static int __dwarf_cus__process_cus(struct dwarf_cus *dcus) if (cu_die == NULL) break; - if (dwarf_cus__create_and_process_cu(dcus, cu_die, pointer_size) == DWARF_CB_ABORT) + if (dwarf_cus__create_and_process_cu(dcus, cu_die, + pointer_size, NULL) == DWARF_CB_ABORT) return DWARF_CB_ABORT; dcus->off = noff; @@ -3079,10 +3440,10 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf, * encoded in another subprogram through abstract_origin * tag. Let us visit all subprograms again to resolve this. */ - if (cu__resolve_func_ret_types(cu) != LSK__KEEPIT) + if (cu__resolve_func_ret_types_optimized(cu) != LSK__KEEPIT) goto out_abort; - if (cus__finalize(cus, cu, conf) == LSK__STOP_LOADING) + if (cus__finalize(cus, cu, conf, NULL) == LSK__STOP_LOADING) goto out_abort; return 0; @@ -3114,7 +3475,7 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf, } if (type_cu != NULL) { - type_lsk = cu__finalize(type_cu, conf); + type_lsk = cu__finalize(type_cu, conf, NULL); if (type_lsk == LSK__KEEPIT) { cus__add(cus, type_cu); } @@ -3247,7 +3608,9 @@ static int cus__process_file(struct cus *cus, struct conf_load *conf, int fd, }; /* Process the one or more modules gleaned from this file. */ - dwfl_getmodules(dwfl, cus__process_dwflmod, &parms, 0); + int err = dwfl_getmodules(dwfl, cus__process_dwflmod, &parms, 0); + if (err < 0) + return -1; // We can't call dwfl_end(dwfl) here, as we keep pointers to strings // allocated by libdw that will be freed at dwfl_end(), so leave this for @@ -175,10 +175,10 @@ void tag__delete(struct tag *tag) } } -void tag__not_found_die(const char *file, int line, const char *func) +void tag__not_found_die(const char *file, int line, const char *func, int tag, const char *name) { - fprintf(stderr, "%s::%s(%d): tag not found, please report to " - "acme@kernel.org\n", file, func, line); + fprintf(stderr, "%s::%s(%d, related to the type of tag DW_TAG_%s \"%s\"): tag not found, please report to " + "acme@kernel.org\n", file, func, line, dwarf_tag_name(tag), name); exit(1); } @@ -250,6 +250,29 @@ static struct ase_type_name_to_size { { .name = NULL }, }; +bool base_type__language_defined(struct base_type *bt) +{ + int i = 0; + char bf[64]; + const char *name; + + if (bt->name_has_encoding) + name = bt->name; + else + name = base_type__name(bt, bf, sizeof(bf)); + + while (base_type_name_to_size_table[i].name != NULL) { + if (bt->name_has_encoding) { + if (strcmp(base_type_name_to_size_table[i].name, bt->name) == 0) + return true; + } else if (strcmp(base_type_name_to_size_table[i].name, name) == 0) + return true; + ++i; + } + + return false; +} + size_t base_type__name_to_size(struct base_type *bt, struct cu *cu) { int i = 0; @@ -345,6 +368,7 @@ void __type__init(struct type *type) type->sizeof_member = NULL; type->member_prefix = NULL; type->member_prefix_len = 0; + type->suffix_disambiguation = 0; } struct class_member * @@ -387,7 +411,8 @@ reevaluate: case DW_TAG_const_type: case DW_TAG_typedef: case DW_TAG_rvalue_reference_type: - case DW_TAG_volatile_type: { + case DW_TAG_volatile_type: + case DW_TAG_atomic_type: { struct tag *tag = cu__type(cu, type->type); if (tag == NULL) { tag__id_not_found_fprintf(stderr, type->type); @@ -624,7 +649,7 @@ struct cu *cu__new(const char *name, uint8_t addr_size, const unsigned char *build_id, int build_id_len, const char *filename, bool use_obstack) { - struct cu *cu = malloc(sizeof(*cu) + build_id_len); + struct cu *cu = zalloc(sizeof(*cu) + build_id_len); if (cu != NULL) { uint32_t void_id; @@ -1111,6 +1136,7 @@ size_t tag__size(const struct tag *tag, const struct cu *cu) case DW_TAG_reference_type: return cu->addr_size; case DW_TAG_base_type: return base_type__size(tag); case DW_TAG_enumeration_type: return tag__type(tag)->size / 8; + case DW_TAG_subroutine_type: return tag__ftype(tag)->byte_size ?: cu->addr_size; } if (tag->type == 0) { /* struct class: unions, structs */ @@ -1191,6 +1217,10 @@ void type__delete(struct type *type) return; type__delete_class_members(type); + + if (type->suffix_disambiguation) + zfree(&type->namespace.name); + free(type); } @@ -1211,6 +1241,9 @@ void enumeration__delete(struct type *type) enumerator__delete(pos); } + if (type->suffix_disambiguation) + zfree(&type->namespace.name); + free(type); } @@ -1607,7 +1640,7 @@ void type__check_structs_at_unnatural_alignments(struct type *type, const struct struct class *cls = tag__class(member_type); cls->is_packed = true; - cls->type.packed_attributes_inferred = true; + cls->type.packed_attributes_inferred = 1; } } } @@ -1663,7 +1696,7 @@ bool class__infer_packed_attributes(struct class *cls, const struct cu *cu) cls->is_packed = true; out: - ctype->packed_attributes_inferred = true; + ctype->packed_attributes_inferred = 1; return cls->is_packed; } @@ -1695,11 +1728,11 @@ void union__infer_packed_attributes(struct type *type, const struct cu *cu) struct class *cls = tag__class(member_type); cls->is_packed = true; - cls->type.packed_attributes_inferred = true; + cls->type.packed_attributes_inferred = 1; } } - type->packed_attributes_inferred = true; + type->packed_attributes_inferred = 1; } /** class__has_hole_ge - check if class has a hole greater or equal to @size @@ -2077,6 +2110,172 @@ int cus__load_file(struct cus *cus, struct conf_load *conf, #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) +#ifndef DW_LANG_C89 +#define DW_LANG_C89 0x0001 +#endif +#ifndef DW_LANG_C +#define DW_LANG_C 0x0002 +#endif +#ifndef DW_LANG_Ada83 +#define DW_LANG_Ada83 0x0003 +#endif +#ifndef DW_LANG_C_plus_plus +#define DW_LANG_C_plus_plus 0x0004 +#endif +#ifndef DW_LANG_Cobol74 +#define DW_LANG_Cobol74 0x0005 +#endif +#ifndef DW_LANG_Cobol85 +#define DW_LANG_Cobol85 0x0006 +#endif +#ifndef DW_LANG_Fortran77 +#define DW_LANG_Fortran77 0x0007 +#endif +#ifndef DW_LANG_Fortran90 +#define DW_LANG_Fortran90 0x0008 +#endif +#ifndef DW_LANG_Pascal83 +#define DW_LANG_Pascal83 0x0009 +#endif +#ifndef DW_LANG_Modula2 +#define DW_LANG_Modula2 0x000a +#endif +#ifndef DW_LANG_Java +#define DW_LANG_Java 0x000b +#endif +#ifndef DW_LANG_C99 +#define DW_LANG_C99 0x000c +#endif +#ifndef DW_LANG_Ada95 +#define DW_LANG_Ada95 0x000d +#endif +#ifndef DW_LANG_Fortran95 +#define DW_LANG_Fortran95 0x000e +#endif +#ifndef DW_LANG_PLI +#define DW_LANG_PLI 0x000f +#endif +#ifndef DW_LANG_ObjC +#define DW_LANG_ObjC 0x0010 +#endif +#ifndef DW_LANG_ObjC_plus_plus +#define DW_LANG_ObjC_plus_plus 0x0011 +#endif +#ifndef DW_LANG_UPC +#define DW_LANG_UPC 0x0012 +#endif +#ifndef DW_LANG_D +#define DW_LANG_D 0x0013 +#endif +#ifndef DW_LANG_Python +#define DW_LANG_Python 0x0014 +#endif +#ifndef DW_LANG_OpenCL +#define DW_LANG_OpenCL 0x0015 +#endif +#ifndef DW_LANG_Go +#define DW_LANG_Go 0x0016 +#endif +#ifndef DW_LANG_Modula3 +#define DW_LANG_Modula3 0x0017 +#endif +#ifndef DW_LANG_Haskell +#define DW_LANG_Haskell 0x0018 +#endif +#ifndef DW_LANG_C_plus_plus_03 +#define DW_LANG_C_plus_plus_03 0x0019 +#endif +#ifndef DW_LANG_C_plus_plus_11 +#define DW_LANG_C_plus_plus_11 0x001a +#endif +#ifndef DW_LANG_OCaml +#define DW_LANG_OCaml 0x001b +#endif +#ifndef DW_LANG_Rust +#define DW_LANG_Rust 0x001c +#endif +#ifndef DW_LANG_C11 +#define DW_LANG_C11 0x001d +#endif +#ifndef DW_LANG_Swift +#define DW_LANG_Swift 0x001e +#endif +#ifndef DW_LANG_Julia +#define DW_LANG_Julia 0x001f +#endif +#ifndef DW_LANG_Dylan +#define DW_LANG_Dylan 0x0020 +#endif +#ifndef DW_LANG_C_plus_plus_14 +#define DW_LANG_C_plus_plus_14 0x0021 +#endif +#ifndef DW_LANG_Fortran03 +#define DW_LANG_Fortran03 0x0022 +#endif +#ifndef DW_LANG_Fortran08 +#define DW_LANG_Fortran08 0x0023 +#endif +#ifndef DW_LANG_RenderScript +#define DW_LANG_RenderScript 0x0024 +#endif +#ifndef DW_LANG_BLISS +#define DW_LANG_BLISS 0x0025 +#endif + +int lang__str2int(const char *lang) +{ + static const char *languages[] = { + [DW_LANG_Ada83] = "ada83", + [DW_LANG_Ada95] = "ada95", + [DW_LANG_BLISS] = "bliss", + [DW_LANG_C11] = "c11", + [DW_LANG_C89] = "c89", + [DW_LANG_C99] = "c99", + [DW_LANG_C] = "c", + [DW_LANG_Cobol74] = "cobol74", + [DW_LANG_Cobol85] = "cobol85", + [DW_LANG_C_plus_plus_03] = "c++03", + [DW_LANG_C_plus_plus_11] = "c++11", + [DW_LANG_C_plus_plus_14] = "c++14", + [DW_LANG_C_plus_plus] = "c++", + [DW_LANG_D] = "d", + [DW_LANG_Dylan] = "dylan", + [DW_LANG_Fortran03] = "fortran03", + [DW_LANG_Fortran08] = "fortran08", + [DW_LANG_Fortran77] = "fortran77", + [DW_LANG_Fortran90] = "fortran90", + [DW_LANG_Fortran95] = "fortran95", + [DW_LANG_Go] = "go", + [DW_LANG_Haskell] = "haskell", + [DW_LANG_Java] = "java", + [DW_LANG_Julia] = "julia", + [DW_LANG_Modula2] = "modula2", + [DW_LANG_Modula3] = "modula3", + [DW_LANG_ObjC] = "objc", + [DW_LANG_ObjC_plus_plus] = "objc++", + [DW_LANG_OCaml] = "ocaml", + [DW_LANG_OpenCL] = "opencl", + [DW_LANG_Pascal83] = "pascal83", + [DW_LANG_PLI] = "pli", + [DW_LANG_Python] = "python", + [DW_LANG_RenderScript] = "renderscript", + [DW_LANG_Rust] = "rust", + [DW_LANG_Swift] = "swift", + [DW_LANG_UPC] = "upc", + }; + + if (strcasecmp(lang, "asm") == 0) + return DW_LANG_Mips_Assembler; + + // c89 is the first, bliss is the last, see /usr/include/dwarf.h + for (int id = DW_LANG_C89; id <= DW_LANG_BLISS; ++id) + if (languages[id] && strcasecmp(lang, languages[id]) == 0) + return id; + + return -1; +} + + static int sysfs__read_build_id(const char *filename, void *build_id, size_t size) { int fd, err = -1; @@ -2391,8 +2590,11 @@ int cus__load_files(struct cus *cus, struct conf_load *conf, int i = 0; while (filenames[i] != NULL) { - if (cus__load_file(cus, conf, filenames[i])) + int err = cus__load_file(cus, conf, filenames[i]); + if (err) { + errno = -err; return -++i; + } ++i; } @@ -2408,12 +2610,9 @@ int cus__fprintf_load_files_err(struct cus *cus __maybe_unused, const char *tool struct cus *cus__new(void) { - struct cus *cus = malloc(sizeof(*cus)); + struct cus *cus = zalloc(sizeof(*cus)); if (cus != NULL) { - cus->nr_entries = 0; - cus->priv = NULL; - cus->loader_exit = NULL; INIT_LIST_HEAD(&cus->cus); pthread_mutex_init(&cus->mutex, NULL); } @@ -48,8 +48,9 @@ struct conf_fprintf; */ struct conf_load { enum load_steal_kind (*steal)(struct cu *cu, - struct conf_load *conf); - int (*thread_exit)(void); + struct conf_load *conf, + void *thr_data); + int (*thread_exit)(struct conf_load *conf, void *thr_data); void *cookie; char *format_path; int nr_jobs; @@ -64,12 +65,17 @@ struct conf_load { bool skip_encoding_btf_decl_tag; bool skip_missing; bool skip_encoding_btf_type_tag; + bool skip_encoding_btf_enum64; + bool btf_gen_optimized; + bool skip_encoding_btf_inconsistent_proto; uint8_t hashtable_bits; uint8_t max_hashtable_bits; uint16_t kabi_prefix_len; const char *kabi_prefix; struct btf *base_btf; struct conf_fprintf *conf_fprintf; + int (*threads_prepare)(struct conf_load *conf, int nr_threads, void **thr_data); + int (*threads_collect)(struct conf_load *conf, int nr_threads, void **thr_data, int error); }; /** struct conf_fprintf - hints to the __fprintf routines @@ -87,6 +93,7 @@ struct conf_load { * @suppress_force_paddings: This makes sense only if the debugging format has struct alignment information, * So allow for it to be disabled and disable it automatically for things like BTF, * that don't have such info. + * @skip_emitting_atomic_typedefs: Allow not emitting "typedef _Atomic int atomic_int;" and friends */ struct conf_fprintf { const char *prefix; @@ -125,6 +132,9 @@ struct conf_fprintf { uint8_t classes_as_structs:1; uint8_t hex_fmt:1; uint8_t strip_inline:1; + uint8_t skip_emitting_atomic_typedefs:1; + uint8_t skip_emitting_errors:1; + uint8_t skip_emitting_modifier:1; }; struct cus; @@ -226,7 +236,7 @@ struct debug_fmt_ops { bool has_alignment_info; }; -extern struct debug_fmt_ops *dwarves__active_loader; +#define ARCH_MAX_REGISTER_PARAMS 8 struct cu { struct list_head node; @@ -250,6 +260,8 @@ struct cu { uint8_t has_addr_info:1; uint8_t uses_global_strings:1; uint8_t little_endian:1; + uint8_t nr_register_params; + int register_params[ARCH_MAX_REGISTER_PARAMS]; uint16_t language; unsigned long nr_inline_expansions; size_t size_inline_expansions; @@ -293,6 +305,8 @@ static inline __pure bool cu__is_c(const struct cu *cu) return cu->language == LANG_C; } +int lang__str2int(const char *lang); + /** * cu__for_each_cached_symtab_entry - iterate thru the cached symtab entries * @cu: struct cu instance @@ -373,6 +387,19 @@ static inline __pure bool cu__is_c(const struct cu *cu) continue; \ else +/** + * cu__for_each_constant - iterate thru all the global constant tags + * @cu: struct cu instance to iterate + * @pos: struct tag iterator + * @id: uint32_t tag id + */ +#define cu__for_each_constant(cu, id, pos) \ + for (id = 0; id < cu->tags_table.nr_entries; ++id) \ + if (!(pos = cu->tags_table.entries[id]) || \ + !tag__is_constant(pos)) \ + continue; \ + else + int cu__add_tag(struct cu *cu, struct tag *tag, uint32_t *id); int cu__add_tag_with_id(struct cu *cu, struct tag *tag, uint32_t id); int cu__table_add_tag(struct cu *cu, struct tag *tag, uint32_t *id); @@ -479,11 +506,21 @@ static inline bool tag__is_variable(const struct tag *tag) return tag->tag == DW_TAG_variable; } +static inline bool tag__is_constant(const struct tag *tag) +{ + return tag->tag == DW_TAG_constant; +} + static inline bool tag__is_volatile(const struct tag *tag) { return tag->tag == DW_TAG_volatile_type; } +static inline bool tag__is_atomic(const struct tag *tag) +{ + return tag->tag == DW_TAG_atomic_type; +} + static inline bool tag__is_restrict(const struct tag *tag) { return tag->tag == DW_TAG_restrict_type; @@ -493,7 +530,8 @@ static inline int tag__is_modifier(const struct tag *tag) { return tag__is_const(tag) || tag__is_volatile(tag) || - tag__is_restrict(tag); + tag__is_restrict(tag) || + tag__is_atomic(tag); } static inline bool tag__has_namespace(const struct tag *tag) @@ -536,6 +574,8 @@ static inline int tag__is_tag_type(const struct tag *tag) tag->tag == DW_TAG_subroutine_type || tag->tag == DW_TAG_unspecified_type || tag->tag == DW_TAG_volatile_type || + tag->tag == DW_TAG_atomic_type || + tag->tag == DW_TAG_unspecified_type || tag->tag == DW_TAG_LLVM_annotation; } @@ -570,11 +610,11 @@ size_t tag__fprintf(struct tag *tag, const struct cu *cu, const char *tag__name(const struct tag *tag, const struct cu *cu, char *bf, size_t len, const struct conf_fprintf *conf); -void tag__not_found_die(const char *file, int line, const char *func); +void tag__not_found_die(const char *file, int line, const char *func, int tag, const char *name); -#define tag__assert_search_result(tag) \ - do { if (!tag) tag__not_found_die(__FILE__,\ - __LINE__, __func__); } while (0) +#define tag__assert_search_result(result, tag, name) \ + do { if (!result) tag__not_found_die(__FILE__,\ + __LINE__, __func__, tag, name); } while (0) size_t tag__size(const struct tag *tag, const struct cu *cu); size_t tag__nr_cachelines(const struct conf_fprintf *conf, const struct tag *tag, const struct cu *cu); @@ -753,6 +793,27 @@ const char *variable__name(const struct variable *var); const char *variable__type_name(const struct variable *var, const struct cu *cu, char *bf, size_t len); +struct constant { + struct tag tag; + const char *name; + uint64_t value; +}; + +static inline struct constant *tag__constant(const struct tag *tag) +{ + return (struct constant *)tag; +} + +static inline const char *constant__name(const struct constant *constant) +{ + return constant->name; +} + +static inline uint64_t constant__value(const struct constant *constant) +{ + return constant->value; +} + struct lexblock { struct ip_tag ip; struct list_head tags; @@ -786,6 +847,9 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu, struct parameter { struct tag tag; const char *name; + uint8_t optimized:1; + uint8_t unexpected_reg:1; + uint8_t has_loc:1; }; static inline struct parameter *tag__parameter(const struct tag *tag) @@ -804,8 +868,13 @@ static inline const char *parameter__name(const struct parameter *parm) struct ftype { struct tag tag; struct list_head parms; + size_t byte_size; // First seen in DW_TAG_subroutine_type in a Go CU uint16_t nr_parms; - uint8_t unspec_parms; /* just one bit is needed */ + uint8_t unspec_parms:1; /* just one bit is needed */ + uint8_t optimized_parms:1; + uint8_t unexpected_reg:1; + uint8_t processed:1; + uint8_t inconsistent_proto:1; }; static inline struct ftype *tag__ftype(const struct tag *tag) @@ -858,6 +927,7 @@ struct function { struct rb_node rb_node; const char *name; const char *linkage_name; + const char *alias; /* name.isra.0 */ uint32_t cu_total_size_inline_expansions; uint16_t cu_total_nr_inline_expansions; uint8_t inlined:2; @@ -916,6 +986,10 @@ size_t function__fprintf_stats(const struct tag *tag_func, FILE *fp); const char *function__prototype(const struct function *func, const struct cu *cu, char *bf, size_t len); +const char *function__prototype_conf(const struct function *func, + const struct cu *cu, + const struct conf_fprintf *conf, + char *bf, size_t len); static __pure inline uint64_t function__addr(const struct function *func) { @@ -1012,6 +1086,8 @@ struct tag_cu_node { * @nr_tags: number of tags * @alignment: DW_AT_alignement, zero if not present, gcc emits since circa 7.3.1 * @natural_alignment: For inferring __packed__, normally the widest scalar in it, recursively + * @suffix_disambiguation: if we have both 'union foo' and 'struct foo' then we must disambiguate, + * useful to generate a vmlinux.h with all Linux types out of BTF data, for instance. * @sizeof_member: Use this to find the size of the record * @type_member: Use this to select a member from where to get an id on an enum to find a type * to cast for, needs to be used with the upcoming type_enum. @@ -1035,11 +1111,13 @@ struct type { uint16_t member_prefix_len; uint16_t max_tag_name_len; uint16_t natural_alignment; - bool packed_attributes_inferred; - uint8_t declaration; /* only one bit used */ + uint8_t suffix_disambiguation; + uint8_t packed_attributes_inferred:1; + uint8_t declaration:1; uint8_t definition_emitted:1; uint8_t fwd_decl_emitted:1; uint8_t resized:1; + uint8_t is_signed_enum:1; }; void __type__init(struct type *type); @@ -1310,12 +1388,14 @@ enum base_type_float_type { struct base_type { struct tag tag; const char *name; + struct list_head node; uint16_t bit_size; uint8_t name_has_encoding:1; uint8_t is_signed:1; uint8_t is_bool:1; uint8_t is_varargs:1; uint8_t float_type:4; + uint8_t definition_emitted:1; }; static inline struct base_type *tag__base_type(const struct tag *tag) @@ -1334,6 +1414,8 @@ const char *base_type__name(const struct base_type *btype, char *bf, size_t len) size_t base_type__name_to_size(struct base_type *btype, struct cu *cu); +bool base_type__language_defined(struct base_type *bt); + struct array_type { struct tag tag; uint32_t *nr_entries; @@ -1359,7 +1441,7 @@ static inline struct string_type *tag__string_type(const struct tag *tag) struct enumerator { struct tag tag; const char *name; - uint32_t value; + uint64_t value; struct tag_cu type_enum; // To cache the type_enum searches }; @@ -1390,4 +1472,12 @@ extern bool no_bitfield_type_recode; extern const char tabs[]; +#ifndef DW_TAG_atomic_type +#define DW_TAG_atomic_type 0x47 +#endif + +#ifndef DW_TAG_skeleton_unit +#define DW_TAG_skeleton_unit 0x4a +#endif + #endif /* _DWARVES_H_ */ diff --git a/dwarves_emit.c b/dwarves_emit.c index 5bf7946..01b33b7 100644 --- a/dwarves_emit.c +++ b/dwarves_emit.c @@ -13,10 +13,12 @@ #include "dwarves_emit.h" #include "dwarves.h" -void type_emissions__init(struct type_emissions *emissions) +void type_emissions__init(struct type_emissions *emissions, struct conf_fprintf *conf_fprintf) { + INIT_LIST_HEAD(&emissions->base_type_definitions); INIT_LIST_HEAD(&emissions->definitions); INIT_LIST_HEAD(&emissions->fwd_decls); + emissions->conf_fprintf = conf_fprintf; } static void type_emissions__add_definition(struct type_emissions *emissions, @@ -37,7 +39,7 @@ static void type_emissions__add_fwd_decl(struct type_emissions *emissions, } struct type *type_emissions__find_definition(const struct type_emissions *emissions, - const char *name) + uint16_t tag, const char *name) { struct type *pos; @@ -45,13 +47,41 @@ struct type *type_emissions__find_definition(const struct type_emissions *emissi return NULL; list_for_each_entry(pos, &emissions->definitions, node) - if (type__name(pos) != NULL && + if (type__tag(pos)->tag == tag && + type__name(pos) != NULL && strcmp(type__name(pos), name) == 0) return pos; return NULL; } +static bool type__can_have_shadow_definition(struct type *type) +{ + struct tag *tag = type__tag(type); + + return tag__is_struct(tag) || tag__is_union(tag) || tag__is_enumeration(tag); +} + +// Find if 'struct foo' is defined with a pre-existing 'enum foo', 'union foo', etc +struct type *type_emissions__find_shadow_definition(const struct type_emissions *emissions, + uint16_t tag, const char *name) +{ + struct type *pos; + + if (name == NULL) + return NULL; + + list_for_each_entry(pos, &emissions->definitions, node) { + if (type__tag(pos)->tag != tag && + type__name(pos) != NULL && + type__can_have_shadow_definition(pos) && + strcmp(type__name(pos), name) == 0) + return pos; + } + + return NULL; +} + static struct type *type_emissions__find_fwd_decl(const struct type_emissions *emissions, const char *name) { @@ -82,7 +112,7 @@ static int enumeration__emit_definitions(struct tag *tag, return 0; /* Ok, lets look at the previous CUs: */ - if (type_emissions__find_definition(emissions, type__name(etype)) != NULL) { + if (type_emissions__find_definition(emissions, DW_TAG_enumeration_type, type__name(etype)) != NULL) { /* * Yes, so lets mark it visited on this CU too, * to speed up the lookup. @@ -93,7 +123,13 @@ static int enumeration__emit_definitions(struct tag *tag, enumeration__fprintf(tag, conf, fp); fputs(";\n", fp); - type_emissions__add_definition(emissions, etype); + + // See comment on enumeration__fprintf(), it seems this happens with DWARF as well + // or BTF doesn't have type->declaration set because DWARF didn't have it set. + // But we consider type->nr_members == 0 as just a forward declaration, so don't + // mark it as defined because we may need it to __really__ printf it later. + if (etype->nr_members != 0) + type_emissions__add_definition(emissions, etype); return 1; } @@ -111,7 +147,7 @@ static int typedef__emit_definitions(struct tag *tdef, struct cu *cu, return 0; /* Ok, lets look at the previous CUs: */ - if (type_emissions__find_definition(emissions, type__name(def)) != NULL) { + if (type_emissions__find_definition(emissions, DW_TAG_typedef, type__name(def)) != NULL) { /* * Yes, so lets mark it visited on this CU too, * to speed up the lookup. @@ -121,9 +157,17 @@ static int typedef__emit_definitions(struct tag *tdef, struct cu *cu, } type = cu__type(cu, tdef->type); - tag__assert_search_result(type); + if (type == NULL) // void + goto emit; switch (type->tag) { + case DW_TAG_atomic_type: + type = cu__type(cu, tdef->type); + if (type) + tag__emit_definitions(type, cu, emissions, fp); + else + fprintf(stderr, "%s: couldn't find the type pointed from _Atomic for '%s'\n", __func__, type__name(def)); + break; case DW_TAG_array_type: tag__emit_definitions(type, cu, emissions, fp); break; @@ -165,9 +209,8 @@ static int typedef__emit_definitions(struct tag *tdef, struct cu *cu, struct type *ctype = tag__type(type); if (type__name(ctype) == NULL) { - if (type__emit_definitions(type, cu, emissions, fp)) - type__emit(type, cu, "typedef", - type__name(def), fp); + type__emit_definitions(type__tag(ctype), cu, emissions, fp); + type__emit(type__tag(ctype), cu, "typedef", type__name(def), fp); goto out; } else if (type__emit_definitions(type, cu, emissions, fp)) type__emit(type, cu, NULL, NULL, fp); @@ -182,6 +225,7 @@ static int typedef__emit_definitions(struct tag *tdef, struct cu *cu, * will thus be emitted before the function typedef, making a no go to * redefine the typedef after struct __wait_queue. */ +emit: if (!def->definition_emitted) { typedef__fprintf(tdef, cu, NULL, fp); fputs(";\n", fp); @@ -218,6 +262,91 @@ static int type__emit_fwd_decl(struct type *ctype, struct type_emissions *emissi return 1; } +static struct base_type *base_type_emissions__find_definition(const struct type_emissions *emissions, const char *name) +{ + struct base_type *pos; + + if (name == NULL) + return NULL; + + list_for_each_entry(pos, &emissions->base_type_definitions, node) + if (strcmp(__base_type__name(pos), name) == 0) + return pos; + + return NULL; +} + +static void base_type_emissions__add_definition(struct type_emissions *emissions, struct base_type *type) +{ + type->definition_emitted = 1; + if (!list_empty(&type->node)) + list_del(&type->node); + list_add_tail(&type->node, &emissions->base_type_definitions); +} + +static const char *base_type__stdint2simple(const char *name) +{ + if (strcmp(name, "int32_t") == 0) + return "int"; + if (strcmp(name, "int16_t") == 0) + return "short"; + if (strcmp(name, "int8_t") == 0) + return "char"; + if (strcmp(name, "int64_t") == 0) + return "long"; + return name; +} + +static int base_type__emit_definitions(struct base_type *type, struct type_emissions *emissions, FILE *fp) +{ +#define base_type__prefix "atomic_" + const size_t prefixlen = sizeof(base_type__prefix) - 1; + const char *name = __base_type__name(type); + + // See if it was already emitted in this CU + if (type->definition_emitted) + return 0; + + // We're only emitting for "atomic_" prefixed base types + if (strncmp(name, base_type__prefix, prefixlen) != 0) + return 0; + + // See if it was already emitted in another CU + if (base_type_emissions__find_definition(emissions, name)) { + type->definition_emitted = 1; + return 0; + } + + const char *non_atomic_name = name + prefixlen; + + fputs("typedef _Atomic", fp); + + if (non_atomic_name[0] == 's' && + non_atomic_name[1] != 'i' && non_atomic_name[1] != 'h') // exclude atomic_size_t and atomic_short + fprintf(fp, " signed %s", non_atomic_name + 1); + else if (non_atomic_name[0] == 'l' && non_atomic_name[1] == 'l') + fprintf(fp, " long long"); + else if (non_atomic_name[0] == 'u') { + fprintf(fp, " unsigned"); + if (non_atomic_name[1] == 'l') { + fprintf(fp, " long"); + if (non_atomic_name[2] == 'l') + fprintf(fp, " long"); + } else + fprintf(fp, " %s", base_type__stdint2simple(non_atomic_name + 1)); + } else if (non_atomic_name[0] == 'b') + fprintf(fp, " _Bool"); + else + fprintf(fp, " %s", base_type__stdint2simple(non_atomic_name)); + + fprintf(fp, " %s;\n", name); + + base_type_emissions__add_definition(emissions, type); + return 1; + +#undef base_type__prefix +} + static int tag__emit_definitions(struct tag *tag, struct cu *cu, struct type_emissions *emissions, FILE *fp) { @@ -228,6 +357,10 @@ static int tag__emit_definitions(struct tag *tag, struct cu *cu, return 0; next_indirection: switch (type->tag) { + case DW_TAG_base_type: + if (emissions->conf_fprintf && emissions->conf_fprintf->skip_emitting_atomic_typedefs) + return 0; + return base_type__emit_definitions(tag__base_type(type), emissions, fp); case DW_TAG_pointer_type: case DW_TAG_reference_type: pointer = 1; @@ -235,6 +368,7 @@ next_indirection: case DW_TAG_array_type: case DW_TAG_const_type: case DW_TAG_volatile_type: + case DW_TAG_atomic_type: type = cu__type(cu, type->type); if (type == NULL) return 0; @@ -299,7 +433,7 @@ int type__emit_definitions(struct tag *tag, struct cu *cu, return 0; /* Ok, lets look at the previous CUs: */ - if (type_emissions__find_definition(emissions, type__name(ctype)) != NULL) { + if (type_emissions__find_definition(emissions, tag->tag, type__name(ctype)) != NULL) { ctype->definition_emitted = 1; return 0; } @@ -307,6 +441,40 @@ int type__emit_definitions(struct tag *tag, struct cu *cu, if (tag__is_typedef(tag)) return typedef__emit_definitions(tag, cu, emissions, fp); + /* + * vmlinux.h:120298:8: error: ‘irte’ defined as wrong kind of tag + * + * If we have a 'struct foo' and we then find a 'union foo', which happens + * twice in the Linux kernel, for instance, then we need to disambiguate by + * adding a suffix to the second type with the same name. + * + * That is the strategy used in: + * + * btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h + */ + if (type__can_have_shadow_definition(ctype)) { + if (type_emissions__find_shadow_definition(emissions, tag->tag, type__name(ctype))) { + ctype->suffix_disambiguation = 1; + + char *disambiguated_name; + + if (asprintf(&disambiguated_name, "%s__%u", type__name(ctype), ctype->suffix_disambiguation) == -1) { + fprintf(stderr, "emit: Not enough memory to allocate disambiguated type name for '%s'\n", + type__name(ctype)); + } else { + // Will be deleted in type__delete() on noticing ctype->suffix_disambiguation != 0 + tag__namespace(tag)->name = disambiguated_name; + + // Now look again if it was emitted in a previous CU with the disambiguated name + if (type_emissions__find_definition(emissions, tag->tag, type__name(ctype)) != NULL) { + ctype->definition_emitted = 1; + return 0; + } + } + + } + } + type_emissions__add_definition(emissions, ctype); type__check_structs_at_unnatural_alignments(ctype, cu); diff --git a/dwarves_emit.h b/dwarves_emit.h index be02acd..6022bee 100644 --- a/dwarves_emit.h +++ b/dwarves_emit.h @@ -9,19 +9,23 @@ */ #include <stdio.h> +#include <stdint.h> #include "list.h" struct cu; struct ftype; struct tag; struct type; +struct conf_fprintf; struct type_emissions { struct list_head definitions; /* struct type entries */ + struct list_head base_type_definitions; /* struct base_type entries */ struct list_head fwd_decls; /* struct class entries */ + struct conf_fprintf *conf_fprintf; }; -void type_emissions__init(struct type_emissions *temissions); +void type_emissions__init(struct type_emissions *temissions, struct conf_fprintf *conf_fprintf); int ftype__emit_definitions(struct ftype *ftype, struct cu *cu, struct type_emissions *emissions, FILE *fp); @@ -30,6 +34,6 @@ int type__emit_definitions(struct tag *tag, struct cu *cu, void type__emit(struct tag *tag_type, struct cu *cu, const char *prefix, const char *suffix, FILE *fp); struct type *type_emissions__find_definition(const struct type_emissions *temissions, - const char *name); + uint16_t tag, const char *name); #endif /* _DWARVES_EMIT_H_ */ diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c index c5921d7..ef8b82b 100644 --- a/dwarves_fprintf.c +++ b/dwarves_fprintf.c @@ -93,6 +93,7 @@ static const char *dwarf_tag_names[] = { [DW_TAG_skeleton_unit] = "skeleton_unit", [DW_TAG_immutable_type] = "immutable_type", #endif + [DW_TAG_atomic_type] = "atomic_type", }; static const char *dwarf_gnu_tag_names[] = { @@ -137,6 +138,8 @@ const char *dwarf_tag_name(const uint32_t tag) #endif ) return dwarf_gnu_tag_names[tag - DW_TAG_MIPS_loop]; + else if (tag == DW_TAG_LLVM_annotation) + return "LLVM_annotation"; return "INVALID"; } @@ -148,10 +151,30 @@ static struct conf_fprintf conf_fprintf__defaults = { const char tabs[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; +/* + * In dwarves_emit.c we can call type__emit() using a locally setup conf_fprintf for which + * the conf->cacheline_size member is not setup and is thus zero, so check for that and + * use the default one for that case. + * + * We really need to make the *_emit*() methods to receive a conf_fprintf pointer for + * which conf->cacheline_size is set, all tools call: + * + * dwarves__resolve_cacheline_size(&conf_load, 0); + * + * To have a conf_load/conf_fprintf with that resolved, but that is not being passed to + * the *_emit*() routines, duh. + * + * Fixing this properly will entail a series of patches, so to fix this problem + * more quickly add this helper. + */ +static uint16_t conf_fprintf__cacheline_size(const struct conf_fprintf *conf) +{ + return conf->cacheline_size ?: conf_fprintf__defaults.cacheline_size; +} size_t tag__nr_cachelines(const struct conf_fprintf *conf, const struct tag *tag, const struct cu *cu) { - return (tag__size(tag, cu) + conf->cacheline_size - 1) / conf->cacheline_size; + return (tag__size(tag, cu) + conf_fprintf__cacheline_size(conf) - 1) / conf_fprintf__cacheline_size(conf); } static const char *tag__accessibility(const struct tag *tag) @@ -282,32 +305,33 @@ size_t typedef__fprintf(const struct tag *tag, const struct cu *cu, const struct tag *ptr_type; char bf[512]; int is_pointer = 0; - size_t printed; + size_t printed = fprintf(fp, "typedef "); /* * Check for void (humm, perhaps we should have a fake void tag instance * to avoid all these checks? */ if (tag->type == 0) - return fprintf(fp, "typedef void %s", type__name(type)); - + return printed + fprintf(fp, "void %s", type__name(type)); +next_type: tag_type = cu__type(cu, tag->type); if (tag_type == NULL) { - printed = fprintf(fp, "typedef "); printed += tag__id_not_found_fprintf(fp, tag->type); return printed + fprintf(fp, " %s", type__name(type)); } switch (tag_type->tag) { + case DW_TAG_atomic_type: + printed += fprintf(fp, "_Atomic "); + tag = tag_type; + goto next_type; case DW_TAG_array_type: - printed = fprintf(fp, "typedef "); return printed + array_type__fprintf(tag_type, cu, type__name(type), pconf, fp); case DW_TAG_pointer_type: if (tag_type->type == 0) /* void pointer */ break; ptr_type = cu__type(cu, tag_type->type); if (ptr_type == NULL) { - printed = fprintf(fp, "typedef "); printed += tag__id_not_found_fprintf(fp, tag_type->type); return printed + fprintf(fp, " *%s", type__name(type)); } @@ -317,7 +341,6 @@ size_t typedef__fprintf(const struct tag *tag, const struct cu *cu, is_pointer = 1; /* Fall thru */ case DW_TAG_subroutine_type: - printed = fprintf(fp, "typedef "); return printed + ftype__fprintf(tag__ftype(tag_type), cu, type__name(type), 0, is_pointer, 0, true, pconf, fp); case DW_TAG_class_type: @@ -325,27 +348,27 @@ size_t typedef__fprintf(const struct tag *tag, const struct cu *cu, struct type *ctype = tag__type(tag_type); if (type__name(ctype) != NULL) - return fprintf(fp, "typedef struct %s %s", type__name(ctype), type__name(type)); + return printed + fprintf(fp, "struct %s %s", type__name(ctype), type__name(type)); struct conf_fprintf tconf = *pconf; tconf.suffix = type__name(type); - return fprintf(fp, "typedef ") + __class__fprintf(tag__class(tag_type), cu, &tconf, fp); + return printed + __class__fprintf(tag__class(tag_type), cu, &tconf, fp); } case DW_TAG_enumeration_type: { struct type *ctype = tag__type(tag_type); if (type__name(ctype) != NULL) - return fprintf(fp, "typedef enum %s %s", type__name(ctype), type__name(type)); + return printed + fprintf(fp, "enum %s %s", type__name(ctype), type__name(type)); struct conf_fprintf tconf = *pconf; tconf.suffix = type__name(type); - return fprintf(fp, "typedef ") + enumeration__fprintf(tag_type, &tconf, fp); + return printed + enumeration__fprintf(tag_type, &tconf, fp); } } - return fprintf(fp, "typedef %s %s", + return printed + fprintf(fp, "%s %s", tag__name(tag_type, cu, bf, sizeof(bf), pconf), type__name(type)); } @@ -399,16 +422,29 @@ size_t enumeration__fprintf(const struct tag *tag, const struct conf_fprintf *co struct type *type = tag__type(tag); struct enumerator *pos; int max_entry_name_len = enumeration__max_entry_name_len(type); - size_t printed = fprintf(fp, "enum%s%s {\n", type__name(type) ? " " : "", type__name(type) ?: ""); + size_t printed = fprintf(fp, "enum%s%s", type__name(type) ? " " : "", type__name(type) ?: ""); int indent = conf->indent; if (indent >= (int)sizeof(tabs)) indent = sizeof(tabs) - 1; + if (type->nr_members) { + printed += fprintf(fp, " {\n"); + } else { + // enum x86_intercept_stage in the Linux kernel comes just as a forward + // declaration, but then BTF isn't setting the type->declaration to mark + // it as such, do the best we can and assume is a forward decl. + return printed; + } + type__for_each_enumerator(type, pos) { printed += fprintf(fp, "%.*s\t%-*s = ", indent, tabs, max_entry_name_len, enumerator__name(pos)); - printed += fprintf(fp, conf->hex_fmt ? "%#x" : "%u", pos->value); + if (conf->hex_fmt) + printed += fprintf(fp, "%#llx", (unsigned long long)pos->value); + else + printed += fprintf(fp, type->is_signed_enum ? "%lld" : "%llu", + (unsigned long long)pos->value); printed += fprintf(fp, ",\n"); } @@ -451,7 +487,8 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu, const struct conf_fprintf *conf); static const char *tag__ptr_name(const struct tag *tag, const struct cu *cu, - char *bf, size_t len, const char *ptr_suffix) + char *bf, size_t len, const char *ptr_suffix, + const struct conf_fprintf *conf) { if (tag->type == 0) /* No type == void */ snprintf(bf, len, "void %s", ptr_suffix); @@ -469,14 +506,15 @@ static const char *tag__ptr_name(const struct tag *tag, const struct cu *cu, struct tag *next_type = cu__type(cu, type->type); if (next_type && tag__is_pointer(next_type)) { - const_pointer = "const "; + if (!(conf && conf->skip_emitting_modifier)) + const_pointer = "const "; type = next_type; } } snprintf(bf, len, "%s %s%s", __tag__name(type, cu, - tmpbf, sizeof(tmpbf), NULL), + tmpbf, sizeof(tmpbf), conf), const_pointer, ptr_suffix); } @@ -510,9 +548,9 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu, strncpy(bf, function__name(tag__function(tag)), len); break; case DW_TAG_pointer_type: - return tag__ptr_name(tag, cu, bf, len, "*"); + return tag__ptr_name(tag, cu, bf, len, "*", conf); case DW_TAG_reference_type: - return tag__ptr_name(tag, cu, bf, len, "&"); + return tag__ptr_name(tag, cu, bf, len, "&", conf); case DW_TAG_ptr_to_member_type: { char suffix[512]; type_id_t id = tag__ptr_to_member_type(tag)->containing_type; @@ -527,11 +565,12 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu, snprintf(suffix + l, sizeof(suffix) - l, "::*"); } - return tag__ptr_name(tag, cu, bf, len, suffix); + return tag__ptr_name(tag, cu, bf, len, suffix, conf); } case DW_TAG_volatile_type: case DW_TAG_const_type: case DW_TAG_restrict_type: + case DW_TAG_atomic_type: case DW_TAG_unspecified_type: type = cu__type(cu, tag->type); if (type == NULL && tag->type != 0) @@ -542,12 +581,16 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu, *type_str = __tag__name(type, cu, tmpbf, sizeof(tmpbf), pconf); - switch (tag->tag) { - case DW_TAG_volatile_type: prefix = "volatile "; break; - case DW_TAG_const_type: prefix = "const "; break; - case DW_TAG_restrict_type: suffix = " restrict"; break; + if (!pconf->skip_emitting_modifier) { + switch (tag->tag) { + case DW_TAG_volatile_type: prefix = "volatile "; break; + case DW_TAG_const_type: prefix = "const "; break; + case DW_TAG_restrict_type: suffix = " restrict"; break; + case DW_TAG_atomic_type: prefix = "_Atomic "; break; + } } - snprintf(bf, len, "%s%s%s ", prefix, type_str, suffix); + snprintf(bf, len, "%s%s%s%s", prefix, type_str, suffix, + pconf->no_parm_names ? "" : " "); } break; case DW_TAG_array_type: @@ -574,6 +617,13 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu, case DW_TAG_variable: snprintf(bf, len, "%s", variable__name(tag__variable(tag))); break; + case DW_TAG_LLVM_annotation: + type = cu__type(cu, tag->type); + if (type == NULL && tag->type != 0) + tag__id_not_found_snprintf(bf, len, tag->type); + else if (!tag__has_type_loop(tag, type, bf, len, NULL)) + __tag__name(type, cu, bf, len, conf); + break; default: snprintf(bf, len, "%s%s", tag__prefix(cu, tag->tag, pconf), type__name(tag__type(tag)) ?: ""); @@ -633,6 +683,22 @@ static size_t type__fprintf_stats(struct type *type, const struct cu *cu, return printed; } +static type_id_t skip_llvm_annotations(const struct cu *cu, type_id_t id) +{ + struct tag *type; + + for (;;) { + if (id == 0) + break; + type = cu__type(cu, id); + if (type == NULL || type->tag != DW_TAG_LLVM_annotation || type->type == id) + break; + id = type->type; + } + + return id; +} + static size_t union__fprintf(struct type *type, const struct cu *cu, const struct conf_fprintf *conf, FILE *fp); @@ -730,12 +796,16 @@ inner_struct: tconf.suppress_offset_comment = suppress_offset_comment; } + const char *modifier; + next_type: switch (type->tag) { - case DW_TAG_pointer_type: - if (type->type != 0) { + case DW_TAG_pointer_type: { + type_id_t ptype_id = skip_llvm_annotations(cu, type->type); + + if (ptype_id != 0) { int n; - struct tag *ptype = cu__type(cu, type->type); + struct tag *ptype = cu__type(cu, ptype_id); if (ptype == NULL) goto out_type_not_found; n = tag__has_type_loop(type, ptype, NULL, 0, fp); @@ -761,6 +831,7 @@ next_type: } } /* Fall Thru */ + } default: print_default: printed += fprintf(fp, "%-*s %s", tconf.type_spacing, @@ -771,10 +842,17 @@ print_default: printed += ftype__fprintf(tag__ftype(type), cu, name, 0, 0, tconf.type_spacing, true, &tconf, fp); break; - case DW_TAG_const_type: { - size_t const_printed = fprintf(fp, "%s ", "const"); - tconf.type_spacing -= const_printed; - printed += const_printed; + case DW_TAG_atomic_type: + modifier = "_Atomic"; + goto print_modifier; + case DW_TAG_const_type: + modifier = "const"; +print_modifier: { + if (!conf->skip_emitting_modifier) { + size_t modifier_printed = fprintf(fp, "%s ", modifier); + tconf.type_spacing -= modifier_printed; + printed += modifier_printed; + } struct tag *ttype = cu__type(cu, type->type); if (ttype) { @@ -828,6 +906,14 @@ print_default: else printed += enumeration__fprintf(type, &tconf, fp); break; + case DW_TAG_LLVM_annotation: { + struct tag *ttype = cu__type(cu, type->type); + if (ttype) { + type = ttype; + goto next_type; + } + goto out_type_not_found; + } } out: if (type_expanded) @@ -1056,21 +1142,33 @@ static size_t union__fprintf(struct type *type, const struct cu *cu, conf->suffix ? " " : "", conf->suffix ?: ""); } -const char *function__prototype(const struct function *func, - const struct cu *cu, char *bf, size_t len) +const char *function__prototype_conf(const struct function *func, + const struct cu *cu, + const struct conf_fprintf *conf, + char *bf, size_t len) { FILE *bfp = fmemopen(bf, len, "w"); if (bfp != NULL) { - ftype__fprintf(&func->proto, cu, NULL, 0, 0, 0, true, - &conf_fprintf__defaults, bfp); + ftype__fprintf(&func->proto, cu, NULL, 0, 0, 0, true, conf, + bfp); fclose(bfp); - } else + } else { + if (conf->skip_emitting_errors) + return NULL; snprintf(bf, len, "<ERROR(%s): fmemopen failed!>", __func__); + } return bf; } +const char *function__prototype(const struct function *func, + const struct cu *cu, char *bf, size_t len) +{ + return function__prototype_conf(func, cu, &conf_fprintf__defaults, + bf, len); +} + size_t ftype__fprintf_parms(const struct ftype *ftype, const struct cu *cu, int indent, const struct conf_fprintf *conf, FILE *fp) @@ -1316,11 +1414,11 @@ static size_t class__fprintf_cacheline_boundary(struct conf_fprintf *conf, FILE *fp) { int indent = conf->indent; - uint32_t cacheline = offset / conf->cacheline_size; + uint32_t cacheline = offset / conf_fprintf__cacheline_size(conf); size_t printed = 0; if (cacheline > *conf->cachelinep) { - const uint32_t cacheline_pos = offset % conf->cacheline_size; + const uint32_t cacheline_pos = offset % conf_fprintf__cacheline_size(conf); const uint32_t cacheline_in_bytes = offset - cacheline_pos; if (cacheline_pos == 0) @@ -1329,9 +1427,10 @@ static size_t class__fprintf_cacheline_boundary(struct conf_fprintf *conf, cacheline_in_bytes); else printed += fprintf(fp, "/* --- cacheline %u boundary " - "(%u bytes) was %u bytes ago --- " + "(%u bytes) was %u byte%s ago --- " "*/\n", cacheline, - cacheline_in_bytes, cacheline_pos); + cacheline_in_bytes, cacheline_pos, + cacheline_pos > 1 ? "s" : ""); printed += fprintf(fp, "%.*s", indent, tabs); @@ -1762,7 +1861,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu, } printed += fprintf(fp, " */\n"); } - cacheline = (cconf.base_offset + type->size) % conf->cacheline_size; + cacheline = (cconf.base_offset + type->size) % conf_fprintf__cacheline_size(conf); if (cacheline != 0) printed += fprintf(fp, "%.*s/* last cacheline: %u bytes */\n", cconf.indent, tabs, @@ -1831,6 +1930,24 @@ static size_t variable__fprintf(const struct tag *tag, const struct cu *cu, return printed; } +static size_t constant__fprintf(const struct tag *tag, const struct cu *cu, + const struct conf_fprintf *conf, FILE *fp) +{ + struct constant *constant = tag__constant(tag); + const char *name = constant__name(constant); + size_t printed = 0; + + if (name != NULL) { + struct tag *type = cu__type(cu, constant->tag.type); + if (type != NULL) { + printed += fprintf(fp, "const "); + printed += type__fprintf(type, cu, name, conf, fp); + printed += fprintf(fp, " = %" PRIu64, constant__value(constant)); + } + } + return printed; +} + static size_t namespace__fprintf(const struct tag *tag, const struct cu *cu, const struct conf_fprintf *conf, FILE *fp) { @@ -1920,6 +2037,9 @@ size_t tag__fprintf(struct tag *tag, const struct cu *cu, case DW_TAG_variable: printed += variable__fprintf(tag, cu, pconf, fp); break; + case DW_TAG_constant: // First seen in a Go CU + printed += constant__fprintf(tag, cu, pconf, fp); + break; case DW_TAG_imported_declaration: printed += imported_declaration__fprintf(tag, cu, fp); break; @@ -2003,7 +2123,7 @@ void dwarves__resolve_cacheline_size(const struct conf_load *conf, uint16_t user } else size = user_cacheline_size; - if (conf) + if (conf && conf->conf_fprintf) conf->conf_fprintf->cacheline_size = size; conf_fprintf__defaults.cacheline_size = size; diff --git a/dwarves_reorganize.c b/dwarves_reorganize.c index 79b159b..14f5e82 100644 --- a/dwarves_reorganize.c +++ b/dwarves_reorganize.c @@ -550,8 +550,8 @@ static int class__demote_bitfields(struct class *class, const struct cu *cu, cu__find_base_type_of_size(cu, bytes_needed, &new_type_id); - tag__assert_search_result(old_type_tag); - tag__assert_search_result(new_type_tag); + tag__assert_search_result(old_type_tag, member->tag.tag, class_member__name(member)); + tag__assert_search_result(new_type_tag, member->tag.tag, class_member__name(member)); if (verbose) { char old_bf[64], new_bf[64]; diff --git a/lib/bpf b/lib/bpf deleted file mode 120000 index b7cf7d3..0000000 --- a/lib/bpf +++ /dev/null @@ -1 +0,0 @@ -../../libbpf
\ No newline at end of file diff --git a/man-pages/pahole.1 b/man-pages/pahole.1 index c1ec63e..c1b48de 100644 --- a/man-pages/pahole.1 +++ b/man-pages/pahole.1 @@ -110,6 +110,26 @@ comparable when using using multiple threads to load DWARF data, when the order that the types in the compile units is processed is not deterministic. .TP +.B \-\-compile +Generate compileable code, with all definitions for all types, i.e.: +.PP +.nf +$ pahole --compile > vmlinux.h +.fi + +Produces a header that can be included in a C source file and built. In +the example provided it will use the BTF info if available, otherwise will +look for a DWARF file matching the running kernel build-id. + +.TP +.B \-\-skip_emitting_atomic_typedefs +Do not emit 'typedef _Atomic int atomic_int' & friends when used with options +like --compile. Use it if the compiler provides these already, as of circa +2022 with gcc 12.2.1 those are not encoded in DWARF so to generate compilable +code we need emit those typedefs for the atomic types used in the data structures +being emitted from debugging information. + +.TP .B \-\-count=COUNT Pretty print the first COUNT records from input. @@ -125,8 +145,8 @@ offset from the beginning of a struct is. .TP .B \-F, \-\-format_path Allows specifying a list of debugging formats to try, in order. Right now this -includes "ctf" and "dwarf". The default format path used is equivalent to -"-F dwarf,ctf". +includes "btf", "ctf" and "dwarf". The default format path used is equivalent to +"-F dwarf,btf,ctf". .TP .B \-\-hashbits=BITS @@ -202,10 +222,18 @@ Do not encode VARs in BTF. Do not encode decl tags in BTF. .TP +.B \-\-skip_encoding_btf_enum64 +Do not encode enum64 in BTF. + +.TP .B \-\-skip_encoding_btf_type_tag Do not encode type tags in BTF. .TP +.B \-\-skip_encoding_btf_inconsistent_proto +Do not encode functions with multiple inconsistent prototypes or unexpected register use for their parameters, where the registers used do not match calling conventions. + +.TP .B \-j, \-\-jobs=N Run N jobs in parallel. Defaults to number of online processors + 10% (like the 'ninja' build system) if no argument is specified. @@ -238,6 +266,10 @@ Allow producing BTF_KIND_FLOAT entries in systems where the vmlinux DWARF information has float types. .TP +.B \-\-btf_gen_optimized +Generate BTF for functions with optimization-related suffixes (.isra, .constprop). + +.TP .B \-\-btf_gen_all Allow using all the BTF features supported by pahole. @@ -357,6 +389,26 @@ Exclude PREFIXed classes. Exclude PREFIXed compilation units. .TP +.B \-\-lang=languages +Only process compilation units built from source code written in the specified languages. + +Supported languages: + + ada83, ada95, asm, bliss, c, c89, c99, c11, c++, c++03, c++11, c++14, cobol74, + cobol85, d, dylan, fortran77, fortran90, fortran95, fortran03, fortran08, + go, haskell, java, julia, modula2, modula3, objc, objc++, ocaml, opencl, + pascal83, pli, python, renderscript, rust, swift, upc + +The linux kernel, for instance, is written in 'c89' circa 2022, use that in filters. + +.B \-\-lang_exclude=languages +Don't process compilation units built from source code written in the specified languages. + +To filter out compilation units written in Rust, for instance, use: + + pahole -j --btf_encode --lang_exclude rust + +.TP .B \-y, \-\-prefix_filter=PREFIX Include PREFIXed classes. @@ -24,6 +24,7 @@ #include "dwarves_reorganize.h" #include "dwarves.h" +#include "dwarves_emit.h" #include "dutil.h" //#include "ctf_encoder.h" FIXME: disabled, probably its better to move to Oracle's libctf #include "btf_encoder.h" @@ -80,6 +81,9 @@ static const char *class_name; static LIST_HEAD(class_names); static char separator = '\t'; +static bool compilable; +static struct type_emissions emissions; + static struct conf_fprintf conf = { .emit_stats = 1, }; @@ -100,11 +104,10 @@ struct structure { static struct structure *structure__new(struct class *class, struct cu *cu, uint32_t id) { - struct structure *st = malloc(sizeof(*st)); + struct structure *st = zalloc(sizeof(*st)); if (st != NULL) { st->nr_files = 1; - st->nr_methods = 0; st->class = class; st->cu = cu; st->id = id; @@ -125,6 +128,79 @@ static struct rb_root structures__tree = RB_ROOT; static LIST_HEAD(structures__list); static pthread_mutex_t structures_lock = PTHREAD_MUTEX_INITIALIZER; +static struct { + char *str; + int *entries; + int nr_entries; + bool exclude; +} languages; + +static int lang_id_cmp(const void *pa, const void *pb) +{ + int a = *(int *)pa, + b = *(int *)pb; + return a - b; +} + +static int parse_languages(void) +{ + int nr_allocated = 4; + char *lang = languages.str; + + languages.entries = zalloc(sizeof(int) * nr_allocated); + if (languages.entries == NULL) + goto out_enomem; + + while (1) { + char *sep = strchr(lang, ','); + + if (sep) + *sep = '\0'; + + int id = lang__str2int(lang); + + if (sep) + *sep = ','; + + if (id < 0) { + fprintf(stderr, "pahole: unknown language \"%s\"\n", lang); + goto out_free; + } + + if (languages.nr_entries >= nr_allocated) { + nr_allocated *= 2; + int *entries = realloc(languages.entries, nr_allocated); + + if (entries == NULL) + goto out_enomem; + + languages.entries = entries; + } + + languages.entries[languages.nr_entries++] = id; + + if (!sep) + break; + + lang = sep + 1; + } + + qsort(languages.entries, languages.nr_entries, sizeof(int), lang_id_cmp); + + return 0; +out_enomem: + fprintf(stderr, "pahole: not enough memory to parse --lang\n"); +out_free: + zfree(&languages.entries); + languages.nr_entries = 0; + return -1; +} + +static bool languages__in(int lang) +{ + return bsearch(&lang, languages.entries, languages.nr_entries, sizeof(int), lang_id_cmp) != NULL; +} + static int type__compare_members_types(struct type *a, struct cu *cu_a, struct type *b, struct cu *cu_b) { int ret = strcmp(type__name(a), type__name(b)); @@ -437,7 +513,14 @@ static void class_formatter(struct class *class, struct cu *cu, uint32_t id) } else conf.prefix = conf.suffix = NULL; - tag__fprintf(tag, cu, &conf, stdout); + if (compilable) { + if (type__emit_definitions(tag, cu, &emissions, stdout)) { + tag__fprintf(tag, cu, &conf, stdout); + putchar(';'); + } + } else { + tag__fprintf(tag, cu, &conf, stdout); + } putchar('\n'); } @@ -590,16 +673,24 @@ static void print_ordered_classes(void) if (!need_resort) { __print_ordered_classes(&structures__tree); } else { - struct rb_root resorted = RB_ROOT; + structures__tree = RB_ROOT; - resort_classes(&resorted, &structures__list); - __print_ordered_classes(&resorted); + resort_classes(&structures__tree, &structures__list); + __print_ordered_classes(&structures__tree); } } static struct cu *cu__filter(struct cu *cu) { + if (languages.nr_entries) { + bool in = languages__in(cu->language); + + if ((!in && !languages.exclude) || + (in && languages.exclude)) + return NULL; + } + if (cu__exclude_prefix != NULL && (cu->name == NULL || strncmp(cu__exclude_prefix, cu->name, @@ -781,19 +872,19 @@ static void class__resize_LP(struct tag *tag, struct cu *cu) continue; type = cu__type(cu, tag_pos->type); - tag__assert_search_result(type); + tag__assert_search_result(type, tag_pos->tag, class_member__name(tag__class_member(tag_pos))); if (type->tag == DW_TAG_array_type) { int i; for (i = 0; i < tag__array_type(type)->dimensions; ++i) array_multiplier *= tag__array_type(type)->nr_entries[i]; type = cu__type(cu, type->type); - tag__assert_search_result(type); + tag__assert_search_result(type, tag_pos->tag, class_member__name(tag__class_member(tag_pos))); } if (tag__is_typedef(type)) { type = tag__follow_typedef(type, cu); - tag__assert_search_result(type); + tag__assert_search_result(type, tag_pos->tag, class_member__name(tag__class_member(tag_pos))); } switch (type->tag) { @@ -863,7 +954,7 @@ static void union__find_new_size(struct tag *tag, struct cu *cu) continue; type = cu__type(cu, tag_pos->type); - tag__assert_search_result(type); + tag__assert_search_result(type, tag_pos->tag, class_member__name(tag__class_member(tag_pos))); if (tag__is_typedef(type)) type = tag__follow_typedef(type, cu); @@ -1006,7 +1097,7 @@ static void print_structs_with_pointer_to(struct cu *cu, uint32_t type) type__for_each_member(&pos->type, pos_member) { struct tag *ctype = cu__type(cu, pos_member->tag.type); - tag__assert_search_result(ctype); + tag__assert_search_result(ctype, pos_member->tag.tag, class_member__name(pos_member)); if (!tag__is_pointer_to(ctype, type)) continue; @@ -1090,6 +1181,13 @@ static void print_containers(struct cu *cu, uint32_t type, int ident) } } +static int +libbpf_print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + /* Name and version of program. */ ARGP_PROGRAM_VERSION_HOOK_DEF = dwarves_print_version; @@ -1127,6 +1225,13 @@ ARGP_PROGRAM_VERSION_HOOK_DEF = dwarves_print_version; #define ARGP_skip_encoding_btf_decl_tag 331 #define ARGP_skip_missing 332 #define ARGP_skip_encoding_btf_type_tag 333 +#define ARGP_compile 334 +#define ARGP_languages 335 +#define ARGP_languages_exclude 336 +#define ARGP_skip_encoding_btf_enum64 337 +#define ARGP_skip_emitting_atomic_typedefs 338 +#define ARGP_btf_gen_optimized 339 +#define ARGP_skip_encoding_btf_inconsistent_proto 340 static const struct argp_option pahole__options[] = { { @@ -1456,6 +1561,11 @@ static const struct argp_option pahole__options[] = { .doc = "Allow using all the BTF features supported by pahole." }, { + .name = "compile", + .key = ARGP_compile, + .doc = "Emit compilable types" + }, + { .name = "structs", .key = ARGP_just_structs, .doc = "Show just structs", @@ -1513,6 +1623,38 @@ static const struct argp_option pahole__options[] = { .doc = "Do not encode TAGs in BTF." }, { + .name = "lang", + .key = ARGP_languages, + .arg = "LANGUAGES", + .doc = "Only consider compilation units written in these languages" + }, + { + .name = "lang_exclude", + .key = ARGP_languages_exclude, + .arg = "LANGUAGES", + .doc = "Don't consider compilation units written in these languages" + }, + { + .name = "skip_encoding_btf_enum64", + .key = ARGP_skip_encoding_btf_enum64, + .doc = "Do not encode ENUM64sin BTF." + }, + { + .name = "skip_emitting_atomic_typedefs", + .key = ARGP_skip_emitting_atomic_typedefs, + .doc = "Do not emit 'typedef _Atomic int atomic_int' & friends." + }, + { + .name = "btf_gen_optimized", + .key = ARGP_btf_gen_optimized, + .doc = "Generate BTF for functions with optimization-related suffixes (.isra, .constprop)." + }, + { + .name = "skip_encoding_btf_inconsistent_proto", + .key = ARGP_skip_encoding_btf_inconsistent_proto, + .doc = "Skip functions that have multiple inconsistent function prototypes sharing the same name, or that use unexpected registers for parameter values." + }, + { .name = NULL, } }; @@ -1581,7 +1723,9 @@ static error_t pahole__options_parser(int key, char *arg, formatter = NULL; break; case 't': separator = arg[0]; break; case 'u': defined_in = 1; break; - case 'V': global_verbose = 1; break; + case 'V': global_verbose = 1; + libbpf_set_print(libbpf_print_all_levels); + break; case 'w': word_size = atoi(arg); break; case 'X': cu__exclude_prefix = arg; cu__exclude_prefix_len = strlen(cu__exclude_prefix); @@ -1598,6 +1742,12 @@ static error_t pahole__options_parser(int key, char *arg, formatter = class_name_formatter; break; // case 'Z': ctf_encode = 1; break; // FIXME: Disabled + case ARGP_compile: + compilable = true; + type_emissions__init(&emissions, &conf); + conf.no_semicolon = true; + conf.strip_inline = true; + break; case ARGP_flat_arrays: conf.flat_arrays = 1; break; case ARGP_suppress_aligned_attribute: conf.suppress_aligned_attribute = 1; break; @@ -1666,6 +1816,19 @@ static error_t pahole__options_parser(int key, char *arg, conf_load.skip_missing = true; break; case ARGP_skip_encoding_btf_type_tag: conf_load.skip_encoding_btf_type_tag = true; break; + case ARGP_languages_exclude: + languages.exclude = true; + /* fallthru */ + case ARGP_languages: + languages.str = arg; break; + case ARGP_skip_encoding_btf_enum64: + conf_load.skip_encoding_btf_enum64 = true; break; + case ARGP_skip_emitting_atomic_typedefs: + conf.skip_emitting_atomic_typedefs = true; break; + case ARGP_btf_gen_optimized: + conf_load.btf_gen_optimized = true; break; + case ARGP_skip_encoding_btf_inconsistent_proto: + conf_load.skip_encoding_btf_inconsistent_proto = true; break; default: return ARGP_ERR_UNKNOWN; } @@ -2108,7 +2271,7 @@ static struct type_instance *type_instance__new(struct type *type, struct cu *cu if (type == NULL) return NULL; - struct type_instance *instance = malloc(sizeof(*instance) + type->size); + struct type_instance *instance = zalloc(sizeof(*instance) + type->size); if (instance) { instance->type = type; @@ -2574,7 +2737,7 @@ static int class_member_filter__parse(struct class_member_filter *filter, struct static struct class_member_filter *class_member_filter__new(struct type *type, char *sfilter) { - struct class_member_filter *filter = malloc(sizeof(*filter)); + struct class_member_filter *filter = zalloc(sizeof(*filter)); if (filter && class_member_filter__parse(filter, type, sfilter)) { free(filter); @@ -2728,7 +2891,7 @@ static void prototype__delete(struct prototype *prototype) static struct tag_cu_node *tag_cu_node__new(struct tag *tag, struct cu *cu) { - struct tag_cu_node *tc = malloc(sizeof(*tc)); + struct tag_cu_node *tc = zalloc(sizeof(*tc)); if (tc) { tc->tc.tag = tag; @@ -2798,8 +2961,75 @@ out: static struct type_instance *header; +struct thread_data { + struct btf *btf; + struct btf_encoder *encoder; +}; + +static int pahole_threads_prepare(struct conf_load *conf, int nr_threads, void **thr_data) +{ + int i; + struct thread_data *threads = calloc(sizeof(struct thread_data), nr_threads); + + for (i = 0; i < nr_threads; i++) + thr_data[i] = threads + i; + + return 0; +} + +static int pahole_thread_exit(struct conf_load *conf, void *thr_data) +{ + struct thread_data *thread = thr_data; + + if (thread == NULL) + return 0; + + /* + * Here we will call btf__dedup() here once we extend + * btf__dedup(). + */ + + return 0; +} + +static int pahole_threads_collect(struct conf_load *conf, int nr_threads, void **thr_data, + int error) +{ + struct thread_data **threads = (struct thread_data **)thr_data; + int i; + int err = 0; + + if (error) + goto out; + + for (i = 0; i < nr_threads; i++) { + /* + * Merge content of the btf instances of worker threads to the btf + * instance of the primary btf_encoder. + */ + if (!threads[i]->btf) + continue; + err = btf_encoder__add_encoder(btf_encoder, threads[i]->encoder); + if (err < 0) + goto out; + } + err = 0; + +out: + for (i = 0; i < nr_threads; i++) { + if (threads[i]->encoder && threads[i]->encoder != btf_encoder) { + btf_encoder__delete(threads[i]->encoder); + threads[i]->encoder = NULL; + } + } + free(threads[0]); + + return err; +} + static enum load_steal_kind pahole_stealer(struct cu *cu, - struct conf_load *conf_load) + struct conf_load *conf_load, + void *thr_data) { int ret = LSK__DELETE; @@ -2818,6 +3048,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, if (btf_encode) { static pthread_mutex_t btf_lock = PTHREAD_MUTEX_INITIALIZER; + struct btf_encoder *encoder; pthread_mutex_lock(&btf_lock); /* @@ -2827,21 +3058,58 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, * point we'll have cu->elf setup... */ if (!btf_encoder) { + /* + * btf_encoder is the primary encoder. + * And, it is used by the thread + * create it. + */ btf_encoder = btf_encoder__new(cu, detached_btf_filename, conf_load->base_btf, skip_encoding_btf_vars, btf_encode_force, btf_gen_floats, global_verbose); - if (btf_encoder == NULL) { - ret = LSK__STOP_LOADING; - goto out_btf; + if (btf_encoder && thr_data) { + struct thread_data *thread = thr_data; + + thread->encoder = btf_encoder; + thread->btf = btf_encoder__btf(btf_encoder); } } + pthread_mutex_unlock(&btf_lock); + + if (!btf_encoder) { + ret = LSK__STOP_LOADING; + goto out_btf; + } + + /* + * thr_data keeps per-thread data for worker threads. Each worker thread + * has an encoder. The main thread will merge the data collected by all + * these encoders to btf_encoder. However, the first thread reaching this + * function creates btf_encoder and reuses it as its local encoder. It + * avoids copying the data collected by the first thread. + */ + if (thr_data) { + struct thread_data *thread = thr_data; + + if (thread->encoder == NULL) { + thread->encoder = + btf_encoder__new(cu, detached_btf_filename, + NULL, + skip_encoding_btf_vars, + btf_encode_force, + btf_gen_floats, + global_verbose); + thread->btf = btf_encoder__btf(thread->encoder); + } + encoder = thread->encoder; + } else { + encoder = btf_encoder; + } - if (btf_encoder__encode_cu(btf_encoder, cu)) { + ret = btf_encoder__encode_cu(encoder, cu, conf_load); + if (ret < 0) { fprintf(stderr, "Encountered error while encoding BTF.\n"); exit(1); } - ret = LSK__DELETE; out_btf: - pthread_mutex_unlock(&btf_lock); return ret; } #if 0 @@ -2978,7 +3246,7 @@ out_btf: * We don't need to print it for every compile unit * but the previous options need */ - tag__fprintf(class, cu, &conf, stdout); + formatter(tag__class(class), cu, class_id); putchar('\n'); } } @@ -3149,6 +3417,9 @@ int main(int argc, char *argv[]) goto out; } + if (languages.str && parse_languages()) + return rc; + if (class_name != NULL && stats_formatter == nr_methods_formatter) { fputs("pahole: -m/nr_methods doesn't work with --class/-C, it shows all classes and the number of its methods\n", stderr); return rc; @@ -3206,6 +3477,9 @@ int main(int argc, char *argv[]) memset(tab, ' ', sizeof(tab) - 1); conf_load.steal = pahole_stealer; + conf_load.thread_exit = pahole_thread_exit; + conf_load.threads_prepare = pahole_threads_prepare; + conf_load.threads_collect = pahole_threads_collect; // Make 'pahole --header type < file' a shorter form of 'pahole -C type --count 1 < file' if (conf.header_type && !class_name && prettify_input) { @@ -3292,7 +3566,7 @@ try_sole_arg_as_class_names: type_instance__delete(header); header = NULL; - if (btf_encode) { + if (btf_encode && btf_encoder) { // maybe all CUs were filtered out and thus we don't have an encoder? err = btf_encoder__encode(btf_encoder); if (err) { fputs("Failed to encode BTF\n", stderr); @@ -67,12 +67,18 @@ static int cu__emit_tags(struct cu *cu) printf(" /* size: %zd */\n\n", tag__size(tag, cu)); } + puts("\n\n/* Constants: */\n"); + cu__for_each_constant(cu, i, tag) { + tag__fprintf(tag, cu, NULL, stdout); + printf(" /* size: %zd */\n\n", tag__size(tag, cu)); + } return 0; } static enum load_steal_kind pdwtags_stealer(struct cu *cu, - struct conf_load *conf_load __maybe_unused) + struct conf_load *conf_load __maybe_unused, + void *thr_data __maybe_unused) { cu__emit_tags(cu); return LSK__DELETE; @@ -53,7 +53,7 @@ struct fn_stats { static struct fn_stats *fn_stats__new(struct tag *tag, const struct cu *cu) { - struct fn_stats *stats = malloc(sizeof(*stats)); + struct fn_stats *stats = zalloc(sizeof(*stats)); if (stats != NULL) { const struct function *fn = tag__function(tag); @@ -489,7 +489,9 @@ int elf_symtabs__show(char *filenames[]) return EXIT_SUCCESS; } -static enum load_steal_kind pfunct_stealer(struct cu *cu, struct conf_load *conf_load __maybe_unused) +static enum load_steal_kind pfunct_stealer(struct cu *cu, + struct conf_load *conf_load __maybe_unused, + void *thr_data __maybe_unused) { if (function_name) { @@ -656,7 +658,7 @@ static error_t pfunct__options_parser(int key, char *arg, case 'a': addr = strtoull(arg, NULL, 0); conf_load.get_addr_info = true; break; case 'b': expand_types = true; - type_emissions__init(&emissions); break; + type_emissions__init(&emissions, &conf); break; case 'c': class_name = arg; break; case 'f': function_name = arg; break; case 'F': conf_load.format_path = arg; break; @@ -687,7 +689,7 @@ static error_t pfunct__options_parser(int key, char *arg, case ARGP_no_parm_names: conf.no_parm_names = 1; break; case ARGP_compile: expand_types = true; - type_emissions__init(&emissions); + type_emissions__init(&emissions, &conf); compilable_output = true; conf.no_semicolon = true; conf.strip_inline = true; diff --git a/rpm/SPECS/dwarves.spec b/rpm/SPECS/dwarves.spec index 0b4846e..2080b27 100644 --- a/rpm/SPECS/dwarves.spec +++ b/rpm/SPECS/dwarves.spec @@ -2,9 +2,9 @@ %define libver 1 Name: dwarves -Version: 1.23 -Release: 1%{?dist} -License: GPLv2 +Version: 1.25 +Release: 2%{?dist} +License: GPL-2.0-only Summary: Debugging Information Manipulation Tools (pahole & friends) URL: http://acmel.wordpress.com Source: http://fedorapeople.org/~acme/dwarves/%{name}-%{version}.tar.xz @@ -79,7 +79,7 @@ rm -Rf %{buildroot} %files %doc README.ctracer %doc README.btf -%doc changes-v1.23 +%doc changes-v1.25 %doc NEWS %{_bindir}/btfdiff %{_bindir}/codiff @@ -131,6 +131,36 @@ rm -Rf %{buildroot} %{_libdir}/%{libname}_reorganize.so %changelog +* Wed Jun 14 2023 Viktor Malik <vmalik@redhat.com> - 1.25-2 +- Migrate license to SPDX + +* Sat Apr 8 2023 Arnaldo Carvalho de Melo <acme@redhat.com> - 1.25-1 +- New release: v1.25 +- Support for DW_TAG_unspecified_type more generally. +- Make sure struct member offsets are in ascending order. Rust BTF needs this. +- Support C atomic types (DW_TAG_atomic_type). +- Initial support for DW_TAG_LLVM_annotation, used for BTF type tags, for __rcu, __user, etc +- Exclude functions with the same name (static functions in different CUs), inconsistent prototypes or not following calling convention. +- Allow generation of BTF for optimized functions, those that end with a .isra*, .constprop*. +- Support 'pahole --lang=/--lang_exclude=asm' +- Support --compile from DWARF in addition to from BTF. +- Exclude RUST CUs in 'btfdiff', as those are not yet being BTF encoded. + +* Wed Aug 17 2022 Arnaldo Carvalho de Melo <acme@redhat.com> - 1.24-1 +- New release: v1.24 +- Add support to BTF_KIND_ENUM64. +- Support multithreaded BTF encoding. +- Encode char type as signed in BTF. +- Introduce --lang and --lang_exclude to pahole. +- Introduce --compile to pahole. +- Don't segfault when processing bogus files. + +* Thu Jul 21 2022 Fedora Release Engineering <releng@fedoraproject.org> - 1.23-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild + +* Thu Jan 20 2022 Fedora Release Engineering <releng@fedoraproject.org> - 1.23-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild + * Wed Dec 8 2021 Arnaldo Carvalho de Melo <acme@redhat.com> - 1.23-1 - New release: v1.23 - Process DW_TAG_LLVM_annotation tags. @@ -161,6 +191,12 @@ rm -Rf %{buildroot} - Introduce sorted type output (--sort). - Disable incomplete CTF encoder. +* Wed Jul 21 2021 Fedora Release Engineering <releng@fedoraproject.org> - 1.21-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild + +* Mon May 10 2021 Arnaldo Carvalho de Melo <acme@redhat.com> - 1.21-2 +- Backport 0001-btf-Remove-ftrace-filter.patch from upstream + * Fri Apr 9 2021 Arnaldo Carvalho de Melo <acme@redhat.com> - 1.21-1 - New release: v1.21 - DWARF loader: @@ -64,7 +64,7 @@ static void emit_wrapper(struct function *f, struct cu *cu) const type_id_t type_id = parm->tag.type; struct tag *type = cu__type(cu, type_id); - tag__assert_search_result(type); + tag__assert_search_result(type, parm->tag.tag, parameter__name(parm)); if (type->tag == DW_TAG_base_type) { struct base_type *bt = tag__base_type(type); char bf[64]; |