aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThiƩbaud Weksteen <tweek@google.com>2020-12-15 19:50:08 +0100
committerThiƩbaud Weksteen <tweek@google.com>2020-12-15 20:34:10 +0100
commit74a2cbf763df43166ab8d1044900df93db93dcb8 (patch)
tree04b38b529132aa584837d13cec9fe3b11812e4bf
parent62f8ef84602d2e6298231d170e1e4625bde8620c (diff)
parentc41086cfcaa0ba611648edfd972a50735aa1d363 (diff)
downloadbloaty-74a2cbf763df43166ab8d1044900df93db93dcb8.tar.gz
Merge remote-tracking branch 'aosp/upstream-master' into master
Change-Id: Id182af42f82e5d3e3a1e0aa6fb99605fcff1c4cd
-rw-r--r--.gitignore6
-rw-r--r--.travis.yml13
-rw-r--r--CHANGES.md116
-rw-r--r--CMakeLists.txt303
-rw-r--r--CONTRIBUTING.md53
-rw-r--r--LICENSE202
-rw-r--r--README.md983
-rw-r--r--config.bloaty13
-rw-r--r--custom_sources.bloaty113
-rw-r--r--doc/how-bloaty-works.md373
-rwxr-xr-xmake-release-tarball.sh40
-rwxr-xr-xregen-readme.py36
-rw-r--r--src/bloaty.cc2166
-rw-r--r--src/bloaty.h604
-rw-r--r--src/bloaty.proto100
-rw-r--r--src/bloaty_package.bloaty13
-rw-r--r--src/demangle.cc1885
-rw-r--r--src/demangle.h63
-rw-r--r--src/disassemble.cc233
-rw-r--r--src/dwarf.cc2159
-rw-r--r--src/dwarf_constants.h649
-rw-r--r--src/elf.cc1432
-rw-r--r--src/macho.cc666
-rw-r--r--src/main.cc45
-rw-r--r--src/range_map.cc332
-rw-r--r--src/range_map.h391
-rw-r--r--src/re.h92
-rw-r--r--src/webassembly.cc411
-rw-r--r--tests/bloaty_misc_test.cc53
-rw-r--r--tests/bloaty_test.cc291
-rw-r--r--tests/fuzz_driver.cc37
-rw-r--r--tests/fuzz_target.cc69
-rw-r--r--tests/range_map_test.cc404
-rw-r--r--tests/strarr.h52
-rw-r--r--tests/test.h291
-rw-r--r--tests/testdata/fuzz_corpus/0034ecacd5427aafc6b97413da2053b36de5059fbin0 -> 3104 bytes
-rw-r--r--tests/testdata/fuzz_corpus/0153168d08d78a4eb486cdd421b3efd6a4e12844bin0 -> 161 bytes
-rw-r--r--tests/testdata/fuzz_corpus/04deff284542b1271c7ed6da11b4389342793f4dbin0 -> 7217 bytes
-rw-r--r--tests/testdata/fuzz_corpus/0512fc56ee361da71476c098b91d1081e5dbc4adbin0 -> 4910 bytes
-rw-r--r--tests/testdata/fuzz_corpus/0c7d074fcd0d6863b497f6137c6cacffc59c2ae8bin0 -> 10984 bytes
-rw-r--r--tests/testdata/fuzz_corpus/0efb04f81a05b500031405eccae9d7e8ea0721c5bin0 -> 11792 bytes
-rw-r--r--tests/testdata/fuzz_corpus/0f6736109fcd5db53450385486c4586f884feb23bin0 -> 11792 bytes
-rw-r--r--tests/testdata/fuzz_corpus/110a37d69bfc8f6da2f8180e907d7d2f12da1eb2bin0 -> 12149 bytes
-rw-r--r--tests/testdata/fuzz_corpus/14f1751b6ceb6aa262bced1c928c11d565c3d913bin0 -> 544 bytes
-rw-r--r--tests/testdata/fuzz_corpus/15c502b13029920e528a2982fc1559689764aaf8bin0 -> 11792 bytes
-rw-r--r--tests/testdata/fuzz_corpus/1846aea81a4e97327d5e82c8ab9e6d4c43bffee3bin0 -> 22992 bytes
-rw-r--r--tests/testdata/fuzz_corpus/1930321f0302e111b64e38b8456ef8473f3e71d8bin0 -> 22151 bytes
-rw-r--r--tests/testdata/fuzz_corpus/1bfe776624349462cb1d818443af106215021470bin0 -> 36055 bytes
-rw-r--r--tests/testdata/fuzz_corpus/2215ecf59e25dfb7b946e25bf9cbb9b6c862563fbin0 -> 7105 bytes
-rw-r--r--tests/testdata/fuzz_corpus/22a284684c8b3f13e6070870dfb21d6453c7819ebin0 -> 3080 bytes
-rw-r--r--tests/testdata/fuzz_corpus/23350c9dafa9b9a1e2a2524a2b173fdec00037abbin0 -> 440 bytes
-rw-r--r--tests/testdata/fuzz_corpus/270dcbc8975aaff7d869faa520be996460e6f7bebin0 -> 5648 bytes
-rw-r--r--tests/testdata/fuzz_corpus/2877069c49bf5773d158de6911842a58768b74c3bin0 -> 786 bytes
-rw-r--r--tests/testdata/fuzz_corpus/28d7fbe0ff87b53a011656f9e3c9c3aeb2ce2018bin0 -> 13594 bytes
-rw-r--r--tests/testdata/fuzz_corpus/2eb47f37b3b0b4bb84cc55d0125d9bb7d176b1c5bin0 -> 2813 bytes
-rw-r--r--tests/testdata/fuzz_corpus/2f6f7647f2e81f50a3f787dda064cffe03354aa8bin0 -> 268006 bytes
-rw-r--r--tests/testdata/fuzz_corpus/2fd5be6e7a99d71434a756a4d59a8d44db4942bbbin0 -> 9840 bytes
-rw-r--r--tests/testdata/fuzz_corpus/3115b1163086c5904008b9a5d17a761863910214bin0 -> 160 bytes
-rw-r--r--tests/testdata/fuzz_corpus/327c150b2d13636bb3ea5129cb58af30675e5599bin0 -> 12881 bytes
-rw-r--r--tests/testdata/fuzz_corpus/37209ceacf21ce2796c98824dc9be60b876274a2bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/3e96523b6d0025b39ddd0a771fc9f99dd1590877bin0 -> 3104 bytes
-rw-r--r--tests/testdata/fuzz_corpus/3f3c4745b7053aca15608204a7592bac44d690cbbin0 -> 12971 bytes
-rw-r--r--tests/testdata/fuzz_corpus/412f1573ff1a9675377481456d8809a850d03f1bbin0 -> 6084 bytes
-rw-r--r--tests/testdata/fuzz_corpus/42f2cd88cae45b6add339ed2c2a9074ff55d9db0bin0 -> 10984 bytes
-rw-r--r--tests/testdata/fuzz_corpus/459ef92fc33d1d9fc6048f293bab5ddb584f94a4bin0 -> 17438 bytes
-rw-r--r--tests/testdata/fuzz_corpus/48c3f1ae3089b0644c6af799af2ae94ee1c5ad30bin0 -> 10784 bytes
-rw-r--r--tests/testdata/fuzz_corpus/4e6b7729619f66a429dd9ef88d186dd37c42d8cabin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/53a2d35a2dfe33981111fce5c8fb6514dd9570cbbin0 -> 80 bytes
-rw-r--r--tests/testdata/fuzz_corpus/57354041fcdfcc3613a0762adfd5189ca60abc80bin0 -> 13188 bytes
-rw-r--r--tests/testdata/fuzz_corpus/5a90c59187664f79cdf1ded1a6eef6854ddd9a07bin0 -> 10984 bytes
-rw-r--r--tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5bin0 -> 261 bytes
-rw-r--r--tests/testdata/fuzz_corpus/5e8ec9cbd600dcc8f6dc5eafaf34226706378b60bin0 -> 3104 bytes
-rw-r--r--tests/testdata/fuzz_corpus/64779227c42248607f46879f9e4007e66ee68269bin0 -> 13116 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6511ded4f638705a5cdd071d7e21cb4febb7234cbin0 -> 3082 bytes
-rw-r--r--tests/testdata/fuzz_corpus/66845a4bce637e02379f5dbf1b860ceb7725a96dbin0 -> 10984 bytes
-rw-r--r--tests/testdata/fuzz_corpus/67630fad767a6918501667f56f09793d399a104abin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6b45a7d45ef354eb2d549918a804abc1e6d42d05bin0 -> 3072 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6d2975ba5e8ad75fb4e743d96a279001b9ecd0e3bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6d385d65872fa08e194a8b806ccfd87e49f5a554bin0 -> 14452 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6d7db4d97103830cd33688f18b7c6944218b58f8bin0 -> 160 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6ecf640685258c2bc0960ab1a797ba2db10ffd63bin0 -> 884 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6f0a1ec2ebc980c9296486ad1a5b8a564549aa9abin0 -> 9841 bytes
-rw-r--r--tests/testdata/fuzz_corpus/6fa62db4fbfc54538513558d0886ff8ae74e58edbin0 -> 161 bytes
-rw-r--r--tests/testdata/fuzz_corpus/70619c7a97d684b6dd6dddfb62782ae40bc91370bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/7069a01603ddcf775682ee40d6814f4e5559bb5abin0 -> 22881 bytes
-rw-r--r--tests/testdata/fuzz_corpus/739b5ce9def832d301c95cfa6d1a9c6e9d46c73dbin0 -> 2228 bytes
-rw-r--r--tests/testdata/fuzz_corpus/7b06150aa15f8aed1abd7a93f1772b893efc150ebin0 -> 3105 bytes
-rw-r--r--tests/testdata/fuzz_corpus/7c06c448cd75d4af6621f578d6d8cb35d2e5279ebin0 -> 257 bytes
-rw-r--r--tests/testdata/fuzz_corpus/7d09e7259aa0fb3da736b98b94211f71a5e513e6bin0 -> 118 bytes
-rw-r--r--tests/testdata/fuzz_corpus/7e290e80959e9f3b045387f7ec257182cb23721dbin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/827e96b748c33f032574b9f2b7f084920feb76abbin0 -> 13116 bytes
-rw-r--r--tests/testdata/fuzz_corpus/84675e905d3771b59fd51f606bc2a14f549aba43bin0 -> 4192 bytes
-rw-r--r--tests/testdata/fuzz_corpus/8631458a27f52b7e3cdfb06a6bde899901bfd3acbin0 -> 11124 bytes
-rw-r--r--tests/testdata/fuzz_corpus/86a3d4b71ee172cd476d035fb9445bcbb835d92abin0 -> 127 bytes
-rw-r--r--tests/testdata/fuzz_corpus/8be73e77c819315082ac4f40acc964ddfa7be6fabin0 -> 10029 bytes
-rw-r--r--tests/testdata/fuzz_corpus/8fc314d43f2d412e20a822b5f595bf61005342a9bin0 -> 795 bytes
-rw-r--r--tests/testdata/fuzz_corpus/91acbe9b1ef167d88e8a57f16db2aa740865accdbin0 -> 4193 bytes
-rw-r--r--tests/testdata/fuzz_corpus/922103ecc5f70b4235b20e30ce0a2e895ead8251bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/9b5a5fa4a46bcca17df149785daf9cd14f1c0443bin0 -> 76295 bytes
-rw-r--r--tests/testdata/fuzz_corpus/9e079b888e5d223ef0bebf13ce1e26ebdd82752abin0 -> 10986 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a22fdce1317617bf89f3283cbd44ef490a57b5e2bin0 -> 7834 bytes
-rwxr-xr-xtests/testdata/fuzz_corpus/a493f77d0d04aaed2e2dca8256c9a5cc591aa8fabin0 -> 119 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a4d1a2b246e0a1f133774daa28328c0d7ce5c3e5bin0 -> 7092 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a4da3e6bc07539aece56d12b2e15b89edd842fd8bin0 -> 3104 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a64d39a8957a4f4c7012f78b41caa8e5f3f4e484bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a69662c2423b5a1d1859f7981c9e88c4f821b0b7bin0 -> 64 bytes
-rw-r--r--tests/testdata/fuzz_corpus/a743522a548f6321b69ee5081ce898ec5e4b9c23bin0 -> 12149 bytes
-rw-r--r--tests/testdata/fuzz_corpus/af0df3abd6ff306ca9161b6f6ebc96f21c6dfa98bin0 -> 10985 bytes
-rw-r--r--tests/testdata/fuzz_corpus/b3a904cebb1d3070ca96cf70ec0b9ef5d1612a45bin0 -> 4192 bytes
-rw-r--r--tests/testdata/fuzz_corpus/bf2cb5de1de6ca492f159dc3cce67cf88a6244aabin0 -> 10984 bytes
-rw-r--r--tests/testdata/fuzz_corpus/c121e995dd4575473e468801b301da0f219f5de7bin0 -> 5246 bytes
-rw-r--r--tests/testdata/fuzz_corpus/c561ab1d99f16a04898518914dd1cea4afa7e358bin0 -> 12971 bytes
-rw-r--r--tests/testdata/fuzz_corpus/c98c037db24035a40d40f91084a56f470bb6fbc5bin0 -> 2594 bytes
-rw-r--r--tests/testdata/fuzz_corpus/cd838035892825e361fe0f936f93fa62aaf2fab1bin0 -> 3951 bytes
-rw-r--r--tests/testdata/fuzz_corpus/cdeac0baddcbd150bfec97b7b88bff74f73e99f5bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-4796943898771456bin0 -> 10 bytes
-rw-r--r--tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-5711765729443840bin0 -> 2624 bytes
-rwxr-xr-xtests/testdata/fuzz_corpus/d1128451375207da064d0f332e840af933280610bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/d3cc4e4dddf87cb0d41135b7a22d03fc4ec11bbcbin0 -> 4192 bytes
-rw-r--r--tests/testdata/fuzz_corpus/daebef8f49a59b71cf57d4771b09f9f8545b83d8bin0 -> 173 bytes
-rw-r--r--tests/testdata/fuzz_corpus/dda6875f2313476f402e9a283ecaf2b50cfae316bin0 -> 4192 bytes
-rw-r--r--tests/testdata/fuzz_corpus/df7a639969efbe5943b6a7fa5eff4f732a50a4f6bin0 -> 8 bytes
-rw-r--r--tests/testdata/fuzz_corpus/e08b7c26f946f4761f2cecdc81f4e5e7651db9a7bin0 -> 3104 bytes
-rw-r--r--tests/testdata/fuzz_corpus/e33d3e649729bea900f870b0cd5335f312d9ed42bin0 -> 10985 bytes
-rw-r--r--tests/testdata/fuzz_corpus/e4a3653bac41c8f39cc625286daa813e0ce603b0bin0 -> 6454 bytes
-rw-r--r--tests/testdata/fuzz_corpus/e4c4e1789c10bbfe8ed1c05522483332bf2538f8bin0 -> 12680 bytes
-rw-r--r--tests/testdata/fuzz_corpus/f0ac70f708130bb9cc4aba5ebe1a3c500c4ea11abin0 -> 13116 bytes
-rw-r--r--tests/testdata/fuzz_corpus/f5d4fb20f43ef7131ed49ff29f5c1bee69070ff2bin0 -> 4486 bytes
-rw-r--r--tests/testdata/fuzz_corpus/fc88742708813d5dd57936fad4c6e9bd6ed125acbin0 -> 10984 bytes
-rw-r--r--tests/testdata/linux-x86/01-empty.obin0 -> 786 bytes
-rw-r--r--tests/testdata/linux-x86/02-simple.obin0 -> 3104 bytes
-rw-r--r--tests/testdata/linux-x86/03-simple.abin0 -> 10984 bytes
-rwxr-xr-xtests/testdata/linux-x86/04-simple.sobin0 -> 12149 bytes
-rw-r--r--tests/testdata/linux-x86/05-binary.binbin0 -> 12881 bytes
-rw-r--r--tests/testdata/linux-x86/06-diff.abin0 -> 11124 bytes
-rw-r--r--tests/testdata/linux-x86/07-binary-stripped.binbin0 -> 9576 bytes
-rw-r--r--tests/testdata/linux-x86_64/01-empty.obin0 -> 1134 bytes
-rw-r--r--tests/testdata/linux-x86_64/02-simple.obin0 -> 4192 bytes
-rw-r--r--tests/testdata/linux-x86_64/03-simple.abin0 -> 13116 bytes
-rwxr-xr-xtests/testdata/linux-x86_64/04-simple.sobin0 -> 13523 bytes
-rw-r--r--tests/testdata/linux-x86_64/05-binary.binbin0 -> 14446 bytes
-rw-r--r--tests/testdata/linux-x86_64/06-diff.abin0 -> 13228 bytes
-rw-r--r--tests/testdata/linux-x86_64/07-binary-stripped.binbin0 -> 10280 bytes
-rwxr-xr-xtests/testdata/linux-x86_64/oldbloaty.binbin0 -> 7271869 bytes
-rwxr-xr-xtests/testdata/make_test_files.sh120
-rw-r--r--tests/testdata/misc/01-no-sections.binbin0 -> 160 bytes
-rw-r--r--tests/testdata/misc/02-section-count-overflow.obin0 -> 6314773 bytes
-rw-r--r--tests/testdata/misc/03-small-binary-that-crashed-inlines.binbin0 -> 12680 bytes
-rwxr-xr-xtests/testdata/misc/04-go-binary-with-ref-addr.binbin0 -> 1651325 bytes
m---------third_party/abseil-cpp0
m---------third_party/capstone0
-rw-r--r--third_party/darwin_xnu_macho/LICENSE367
-rw-r--r--third_party/darwin_xnu_macho/METADATA23
-rw-r--r--third_party/darwin_xnu_macho/mach-o/fat.h63
-rw-r--r--third_party/darwin_xnu_macho/mach-o/loader.h1531
-rw-r--r--third_party/darwin_xnu_macho/mach-o/nlist.h318
-rw-r--r--third_party/darwin_xnu_macho/mach-o/reloc.h203
-rw-r--r--third_party/darwin_xnu_macho/mach/machine.h371
-rw-r--r--third_party/darwin_xnu_macho/mach/vm_prot.h160
m---------third_party/demumble0
-rw-r--r--third_party/freebsd_elf/LICENSE126
-rw-r--r--third_party/freebsd_elf/METADATA26
-rw-r--r--third_party/freebsd_elf/elf.h47
-rw-r--r--third_party/freebsd_elf/elf32.h259
-rw-r--r--third_party/freebsd_elf/elf64.h262
-rw-r--r--third_party/freebsd_elf/elf_common.h1336
m---------third_party/googletest0
m---------third_party/protobuf0
m---------third_party/re20
169 files changed, 19901 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2075731
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.a
+*.o
+.*.sw?
+/bloaty
+!tests/testdata/**
+*.dSYM
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..70af010
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+language: cpp
+
+dist: trusty
+
+compiler:
+ - clang
+ - gcc
+
+os:
+ - linux
+ - osx
+
+script: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j4 && make test
diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 0000000..63b889f
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,116 @@
+# Change Log
+
+## [Bloaty McBloatface v1.1](https://github.com/google/bloaty/releases/tag/v1.1) (2020-05-23)
+
+### Added
+
+* **Source Filtering**: Bloaty can now filter the results based on a regex
+ match. See "Source filter" in [README.md](README.md) for details. (#177)
+* **Show only File or VM**: It is possible to restrict the output to show only
+ "VM SIZE" or "FILE SIZE" by passing `--domain=vm` or `--domain=file`. (#165)
+
+### Fixed
+
+* **Use system deps in CMake build**: The CMake build can now use system deps
+ for protobuf, capstone, and re2 if they are present. Hopefully this will make
+ Bloaty easier to package in package managers. (#155)
+* **DWARF 4 Support**: Fixed one place in Bloaty that was not accepting DWARF 4.
+ (#166)
+* **DWARF fix**: Added support for `DW_FORM_ref_udata`. (#197)
+* **Big-endian fix**: Added missing byte swapping when getting the build ID for
+ a big-endian binary. (#182)
+* **WASM demangling**: WASM symbols are now properly demangled. (#149)
+* **New WASM sections**: Added support for new DataCount and Event sections
+ in WASM. (#178)
+* **Scaling fixes**: Fixed integer overflow in 32-bit builds, and other issues
+ that arise when using Bloaty with many files and/or large files. (#193)
+* **Improved coverage**: Bloaty now properly attributes `.eh_frame` in object
+ files, and attributes ELF headers to the corresponding section. (#168)
+* **CSV escaping**: Bloaty now properly escapes CSV output when a field contains
+ a comma or double quote. (#174)
+
+### Changed
+
+* **File size column moved left**: In the default output, the file size now
+ appears on the left. This means that all numbers are now on the left, which
+ leads to more readable output when viewing in a proportional font or in a
+ limited-width window.
+
+ Old:
+ ```
+ VM SIZE FILE SIZE
+ -------------- --------------
+ 0.0% 0 .debug_info 7.97Mi 29.5%
+ 0.0% 0 .debug_loc 6.40Mi 23.7%
+ ```
+
+ New:
+ ```
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 30.0% 8.85Mi 0.0% 0 .debug_info
+ 24.7% 7.29Mi 0.0% 0 .debug_loc
+ ```
+
+ This shouldn't cause breakage, as anything consuming Bloaty's output
+ programmatically should be using `--csv` or `--tsv`. (#165)
+* **ELF Segment labels now contain index**: Previously ELF segment labels looked
+ like `LOAD [RW]` with segment flags only. Now they also contain the segment
+ index, eg. `LOAD #1 [RW]`, so the output can distinguish between different
+ segments with the same flags. (#159)
+
+### Security
+
+Bloaty should not be considered fully hardened against malicious binaries. This
+is one of the major reasons why Bloaty is not offered as an in-process library,
+and should only be used through its command-line interface in a dedicated
+address space. If you do not trust the input, further process sandboxing is
+advisable.
+
+However we do perform fuzzing of the parsers, and fix and crash bugs that are
+found by fuzzing.
+
+* **Fixed crash bugs found by fuzzing** (#173, #175)
+
+## [Bloaty McBloatface v1.0](https://github.com/google/bloaty/releases/tag/v1.0) (2018-08-07)
+
+This is the first formal release of Bloaty.
+
+Changes since Bloaty was [first announced in Nov
+2016](http://blog.reverberate.org/2016/11/07/introducing-bloaty-mcbloatface.html):
+
+* **Much better coverage / data quality**: Bloaty now properly attributes
+ sections of the binary like the symbol table, debugging information,
+ relocations, and frame unwinding info. We even disassemble the binary looking
+ for references to anonymous data members that do not have symbol table
+ entries. This all means higher quality output, and much less of the binary is
+ attributed to `[None]`.
+* **Native Mach-O support**: Bloaty can now parse Mach-O files directly,
+ instead of shelling out to other programs. The result is much faster and
+ higher-quality output for Mach-O. Also the data sources that require debug
+ info (like `-d compileunits`) now work with Mach-O.
+* **WebAssembly support (EXPERIMENTAL)**: Bloaty can analyze sections and
+ symbols in binary WebAssembly files.
+* **Native C++ Demangling**: Bloaty can now demangle C++ symbols without
+ shelling out to `c++filt`. There are two separate demangling modes, one which
+ strips all template parameters and parameter/return types (`shortsymbols`) and
+ one that returns full demangled names (`fullsymbols`).
+* **Profiling stripped binaries**: Bloaty can read symbols and debug info from
+ separate files. This allows you to profile stripped binaries.
+* **Parallel file parsing**: If you pass multiple files to Bloaty, it will
+ scan them in parallel. If you are parsing lots of files and have lots of CPUs,
+ this can save a lot of time.
+* **Configuration files**: All options you can specify on the command-line can
+ be put in a configuration file instead (and vice versa). This is helpful if
+ the options might otherwise overflow the command-line (for example, if you
+ are parsing thousands of files). It also lets you save bits of configuration
+ to a file for reuse.
+* **Custom data sources**: you can create your own data sources by applying
+ regexes to the built-in sources. This lets you bucket symbols, source files,
+ etc. in ways that make sense for your project.
+* **CSV/TSV output**: this is a robust way to parse Bloaty's output and use it
+ in other programs. (The default, human-readable output is not designed to be
+ parsed and its format may change in backward-incompatible ways).
+* **Lots of bugfixes**: Fixed lots of bugs that people reported in various
+ platforms and configurations. Bloaty is fuzzed regularly now, and this has
+ led to many bugfixes also.
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..ac07c0c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,303 @@
+cmake_minimum_required(VERSION 3.5)
+cmake_policy(SET CMP0048 NEW)
+project (Bloaty VERSION 1.1)
+include(CTest)
+set(CMAKE_CXX_STANDARD 11)
+
+# Options we define for users.
+option(BLOATY_ENABLE_ASAN "Enable address sanitizer." OFF)
+option(BLOATY_ENABLE_UBSAN "Enable undefined behavior sanitizer." OFF)
+option(BLOATY_ENABLE_CMAKETARGETS "Enable installing cmake target files." ON)
+option(BLOATY_ENABLE_BUILDID "Enable build id." ON)
+option(BLOATY_ENABLE_RE2 "Enable the support for regular expression functions." ON)
+
+if(UNIX)
+find_package(PkgConfig)
+if(${PKG_CONFIG_FOUND})
+if(BLOATY_ENABLE_RE2)
+ pkg_search_module(RE2 re2)
+endif(BLOATY_ENABLE_RE2)
+pkg_search_module(CAPSTONE capstone)
+pkg_search_module(PROTOBUF protobuf)
+if(BLOATY_ENABLE_RE2)
+ if(${RE2_FOUND})
+ MESSAGE(STATUS "System re2 found, using")
+ else(${RE2_FOUND})
+ MESSAGE(STATUS "System re2 not found, using bundled version")
+ endif(${RE2_FOUND})
+endif(BLOATY_ENABLE_RE2)
+if(${CAPSTONE_FOUND})
+ MESSAGE(STATUS "System capstone found, using")
+else(${CAPSTONE_FOUND})
+ MESSAGE(STATUS "System capstone not found, using bundled version")
+endif(${CAPSTONE_FOUND})
+if(${PROTOBUF_FOUND})
+ MESSAGE(STATUS "System protobuf found, using")
+else(${PROTOBUF_FOUND})
+ MESSAGE(STATUS "System protobuf not found, using bundled version")
+endif(${PROTOBUF_FOUND})
+else(${PKG_CONFIG_FOUND})
+ MESSAGE(STATUS "pkg-config not found, using bundled dependencies")
+endif(${PKG_CONFIG_FOUND})
+endif(UNIX)
+
+# Set default build type.
+if(NOT CMAKE_BUILD_TYPE)
+ message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
+ "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+ FORCE)
+endif()
+
+# Check out Git submodules.
+if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.gitmodules")
+ execute_process (COMMAND git submodule update --init --recursive
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+endif()
+
+# Add third_party libraries, disabling as much as we can of their builds.
+
+add_definitions(-D_LIBCXXABI_FUNC_VIS=) # For Demumble.
+if(BLOATY_ENABLE_RE2)
+ add_definitions(-DUSE_RE2)
+endif(BLOATY_ENABLE_RE2)
+
+if(UNIX)
+ if(BLOATY_ENABLE_RE2)
+ if(${RE2_FOUND})
+ include_directories(${RE2_INCLUDE_DIRS})
+ else(${RE2_FOUND})
+ set(RE2_BUILD_TESTING OFF CACHE BOOL "enable testing for RE2" FORCE)
+ add_subdirectory(third_party/re2)
+ include_directories(third_party/re2)
+ endif(${RE2_FOUND})
+ endif(BLOATY_ENABLE_RE2)
+ if(${CAPSTONE_FOUND})
+ include_directories(${CAPSTONE_INCLUDE_DIRS})
+ else(${CAPSTONE_FOUND})
+ set(CAPSTONE_BUILD_SHARED OFF CACHE BOOL "Build shared library" FORCE)
+ set(CAPSTONE_BUILD_TESTS OFF CACHE BOOL "Build tests" FORCE)
+ add_subdirectory(third_party/capstone)
+ include_directories(third_party/capstone/include)
+ endif(${CAPSTONE_FOUND})
+ if(${PROTOBUF_FOUND})
+ include_directories(${PROTOBUF_INCLUDE_DIRS})
+ else(${PROTOBUF_FOUND})
+ set(protobuf_BUILD_TESTS OFF CACHE BOOL "enable tests for proto2" FORCE)
+ set(protobuf_BUILD_SHARED_LIBS OFF CACHE BOOL "enable shared libs for proto2" FORCE)
+ add_subdirectory(third_party/protobuf/cmake)
+ include_directories(SYSTEM third_party/protobuf/src)
+ endif(${PROTOBUF_FOUND})
+else(UNIX)
+ if(BLOATY_ENABLE_RE2)
+ add_subdirectory(third_party/re2)
+ include_directories(third_party/re2)
+ endif(BLOATY_ENABLE_RE2)
+ add_subdirectory(third_party/capstone)
+ include_directories(third_party/capstone/include)
+ add_subdirectory(third_party/protobuf/cmake)
+ include_directories(SYSTEM third_party/protobuf/src)
+endif(UNIX)
+
+include_directories(.)
+include_directories(src)
+include_directories(third_party/abseil-cpp)
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/src")
+
+# Baseline build flags.
+set(CMAKE_CXX_FLAGS "-std=c++11 -W -Wall -Wno-sign-compare")
+set(CMAKE_CXX_FLAGS_DEBUG "-g1")
+set(CMAKE_CXX_FLAGS_RELEASE "-O2")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g1")
+set_source_files_properties(third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp PROPERTIES COMPILE_FLAGS -Wno-implicit-fallthrough)
+
+if(APPLE)
+elseif(UNIX)
+ if(BLOATY_ENABLE_BUILDID)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id")
+ endif(BLOATY_ENABLE_BUILDID)
+endif()
+
+# When using Ninja, compiler output won't be colorized without this.
+include(CheckCXXCompilerFlag)
+CHECK_CXX_COMPILER_FLAG(-fdiagnostics-color=always SUPPORTS_COLOR_ALWAYS)
+if(SUPPORTS_COLOR_ALWAYS)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
+endif()
+
+# Implement ASAN/UBSAN options
+if(BLOATY_ENABLE_ASAN)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+ set(CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} -fsanitize=address")
+endif()
+
+if(BLOATY_ENABLE_UBSAN)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+ set(CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} -fsanitize=undefined")
+endif()
+
+if(DEFINED ENV{CXXFLAGS})
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} $ENV{CXXFLAGS}")
+endif()
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/src)
+if(${PROTOC_FOUND})
+add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
+ DEPENDS protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
+ COMMAND protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
+ --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
+ -I${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+else(${PROTOC_FOUND})
+add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
+ COMMAND protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
+ --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
+ -I${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+endif(${PROTOC_FOUND})
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty_package.bloaty
+ DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
+add_library(libbloaty STATIC
+ src/bloaty.cc
+ src/demangle.cc
+ src/disassemble.cc
+ ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
+ src/dwarf.cc
+ src/elf.cc
+ src/macho.cc
+ src/range_map.cc
+ src/webassembly.cc
+ # Until Abseil has a proper CMake build system
+ third_party/abseil-cpp/absl/base/internal/raw_logging.cc # Grrrr...
+ third_party/abseil-cpp/absl/base/internal/throw_delegate.cc
+ third_party/abseil-cpp/absl/numeric/int128.cc
+ third_party/abseil-cpp/absl/strings/ascii.cc
+ third_party/abseil-cpp/absl/strings/charconv.cc
+ third_party/abseil-cpp/absl/strings/escaping.cc
+ third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc
+ third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc
+ third_party/abseil-cpp/absl/strings/internal/memutil.cc
+ third_party/abseil-cpp/absl/strings/internal/utf8.cc
+ third_party/abseil-cpp/absl/strings/match.cc
+ third_party/abseil-cpp/absl/strings/numbers.cc
+ third_party/abseil-cpp/absl/strings/str_cat.cc
+ third_party/abseil-cpp/absl/strings/string_view.cc
+ third_party/abseil-cpp/absl/strings/str_split.cc
+ third_party/abseil-cpp/absl/strings/substitute.cc
+ third_party/abseil-cpp/absl/types/bad_optional_access.cc
+ # One source file, no special build system needed.
+ third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
+ )
+
+if(UNIX)
+ set(LIBBLOATY_LIBS libbloaty)
+ if(${PROTOBUF_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${PROTOBUF_LIBRARIES})
+ else(${PROTOBUF_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} libprotoc)
+ endif(${PROTOBUF_FOUND})
+ if(BLOATY_ENABLE_RE2)
+ if(${RE2_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${RE2_LIBRARIES})
+ else(${RE2_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} re2)
+ endif(${RE2_FOUND})
+ endif(BLOATY_ENABLE_RE2)
+ if(${CAPSTONE_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${CAPSTONE_LIBRARIES})
+ else(${CAPSTONE_FOUND})
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} capstone-static)
+ endif(${CAPSTONE_FOUND})
+else(UNIX)
+ set(LIBBLOATY_LIBS libbloaty libprotoc capstone-static)
+ if(BLOATY_ENABLE_RE2)
+ set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} re2)
+ endif(BLOATY_ENABLE_RE2)
+endif(UNIX)
+
+if(UNIX)
+ if(BLOATY_ENABLE_RE2)
+ if(${RE2_FOUND})
+ link_directories(${RE2_LIBRARY_DIRS})
+ endif(${RE2_FOUND})
+ endif(BLOATY_ENABLE_RE2)
+ if(${CAPSTONE_FOUND})
+ link_directories(${CAPSTONE_LIBRARY_DIRS})
+ endif(${CAPSTONE_FOUND})
+ if(${PROTOBUF_FOUND})
+ link_directories(${PROTOBUF_LIBRARY_DIRS})
+ endif(${PROTOBUF_FOUND})
+endif(UNIX)
+
+if(DEFINED ENV{LIB_FUZZING_ENGINE})
+ message("LIB_FUZZING_ENGINE set, building fuzz_target instead of Bloaty")
+ add_executable(fuzz_target tests/fuzz_target.cc)
+ target_link_libraries(fuzz_target "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}" $ENV{LIB_FUZZING_ENGINE})
+else()
+ add_executable(bloaty src/main.cc)
+ target_link_libraries(bloaty "${LIBBLOATY_LIBS}")
+
+ # All of this is to add -pthread, which is required by re2 (not us).
+ find_package(Threads REQUIRED)
+ if(THREADS_HAVE_PTHREAD_ARG)
+ set_property(TARGET bloaty PROPERTY COMPILE_OPTIONS "-pthread")
+ set_property(TARGET bloaty PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
+ endif()
+ if(CMAKE_THREAD_LIBS_INIT)
+ target_link_libraries(bloaty "${CMAKE_THREAD_LIBS_INIT}")
+ endif()
+
+ if(BLOATY_ENABLE_CMAKETARGETS)
+ install(
+ TARGETS bloaty
+ EXPORT ${PROJECT_NAME}Targets
+ RUNTIME DESTINATION bin
+ )
+ else(BLOATY_ENABLE_CMAKETARGETS)
+ install(
+ TARGETS bloaty
+ RUNTIME DESTINATION bin
+ )
+ endif(BLOATY_ENABLE_CMAKETARGETS)
+
+ if (IS_DIRECTORY "${PROJECT_SOURCE_DIR}/tests")
+ enable_testing()
+
+ if(BUILD_TESTING)
+ option(INSTALL_GTEST "" OFF)
+ add_subdirectory(third_party/googletest)
+ include_directories(third_party/googletest/googletest/include)
+ include_directories(third_party/googletest/googlemock/include)
+
+ set(TEST_TARGETS
+ bloaty_test
+ bloaty_misc_test
+ range_map_test
+ )
+
+ foreach(target ${TEST_TARGETS})
+ add_executable(${target} tests/${target}.cc)
+ target_link_libraries(${target} "${LIBBLOATY_LIBS}" gtest_main gmock "${CMAKE_THREAD_LIBS_INIT}")
+ endforeach(target)
+
+ add_executable(fuzz_test tests/fuzz_target.cc tests/fuzz_driver.cc)
+ target_link_libraries(fuzz_test "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}")
+
+ file(GLOB fuzz_corpus tests/testdata/fuzz_corpus/*)
+
+ add_test(NAME range_map_test COMMAND range_map_test)
+ add_test(NAME bloaty_test_x86-64 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86_64)
+ add_test(NAME bloaty_test_x86 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86)
+ add_test(NAME bloaty_misc_test COMMAND bloaty_misc_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/misc)
+ add_test(NAME fuzz_test COMMAND fuzz_test ${fuzz_corpus} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/fuzz_corpus)
+ endif()
+ endif()
+
+ if(BLOATY_ENABLE_CMAKETARGETS)
+ install(EXPORT ${PROJECT_NAME}Targets NAMESPACE ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME})
+ endif(BLOATY_ENABLE_CMAKETARGETS)
+endif()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..0fa0a45
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,53 @@
+Want to contribute? Great! First, read this page (including
+the small print at the end).
+
+### Get in touch
+
+If your idea will take you more than, say, 30 minutes to
+implement, please get in touch first via the issue tracker
+to touch base about your plan. That will give an
+opportunity for early feedback and help avoid wasting your
+time.
+
+### Add tests
+
+Please add tests for any new features or bugfixes. Bloaty
+has a lot of functionality (different data sources, file
+formats, diff mode, hierarchical reports, etc). Having
+tests helps ensure that we don't regress any of these
+features.
+
+### Coding style
+
+Please follow the [Google C++ Style
+Guide](https://google.github.io/styleguide/cppguide.html). I
+recommend using `clang-format`, [which has a preset for the
+Google Style
+Guide](http://clang.llvm.org/docs/ClangFormatStyleOptions.html)
+
+### Code reviews
+
+All submissions, including submissions by project members,
+require review. We use Github pull requests for this
+purpose.
+
+### Legal Requirements
+Before we can use your code, you must sign the [Google
+Individual Contributor License
+Agreement](https://cla.developers.google.com/about/google-individual)
+(CLA), which you can do online. The CLA is necessary mainly
+because you own the copyright to your changes, even after
+your contribution becomes part of our codebase, so we need
+your permission to use and distribute your code. We also
+need to be sure of various other thingsā€”for instance that
+you'll tell us if you know that your code infringes on other
+people's patents. You don't have to sign the CLA until after
+you've submitted your code for review and a member has
+approved it, but you must do it before we can put your code
+into our codebase.
+
+### The small print
+Contributions made by corporations are covered by a
+different agreement than the one above, the [Software Grant
+and Corporate Contributor License
+Agreement](https://cla.developers.google.com/about/google-corporate).
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..096d7a8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,983 @@
+
+# Bloaty McBloatface: a size profiler for binaries
+
+[![Build Status](https://travis-ci.org/google/bloaty.svg?branch=master)](https://travis-ci.org/google/bloaty)
+
+Ever wondered what's making your binary big? Bloaty
+McBloatface will show you a size profile of the binary so
+you can understand what's taking up space inside.
+
+Bloaty performs a deep analysis of the binary. Using custom
+ELF, DWARF, and Mach-O parsers, Bloaty aims to accurately
+attribute every byte of the binary to the symbol or
+compileunit that produced it. It will even disassemble the
+binary looking for references to anonymous data. For more
+information about the analysis performed by Bloaty, please
+see [doc/how-bloaty-works.md](doc/how-bloaty-works.md).
+
+Bloaty works on binaries, shared objects, object files, and
+static libraries (`.a` files). The following file formats
+are supported:
+
+* ELF
+* Mach-O
+* WebAssembly (experimental)
+
+These formats are NOT supported, but I am very interested
+in adding support for them (I may implement these myself but
+would also be happy to get contributions!)
+
+* PE/COFF (not supported)
+* Android APK (not supported, might be tricky due to compression)
+
+This is not an official Google product.
+
+## Building Bloaty
+
+Building Bloaty requires CMake. On Ubuntu, install this with:
+
+```
+$ sudo apt install cmake
+```
+
+and on macOS, this can be installed using homebrew with:
+```
+$ brew install cmake
+```
+
+
+Bloaty bundles ``libprotobuf``, ``re2``, ``capstone``, and ``pkg-config`` as Git submodules, and uses ``protoc`` build from libprotobuf, but it will prefer the system's versions of those dependencies if available. All other dependencies are included as Git submodules.
+
+If the Git repository hasn't been cloned with the `--recursive`, the submodules can be checked out with:
+
+```
+$ git submodule update --init --recursive
+```
+
+To build, run:
+
+```
+$ cmake .
+$ make -j6
+```
+
+To run tests (Git only, these are not included in the release tarball), type:
+
+```
+$ make test
+```
+
+All the normal CMake features are available, like out-of-source builds:
+
+```
+$ mkdir build
+$ cd build
+$ cmake ..
+$ make -j6
+```
+
+## Running Bloaty
+
+Run it directly on a binary target. For example, run it on itself.
+
+```
+$ ./bloaty bloaty
+```
+
+On Linux you'll see output something like:
+
+```cmdoutput
+$ ./bloaty bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 30.0% 8.85Mi 0.0% 0 .debug_info
+ 24.7% 7.29Mi 0.0% 0 .debug_loc
+ 12.8% 3.79Mi 0.0% 0 .debug_str
+ 9.7% 2.86Mi 42.8% 2.86Mi .rodata
+ 6.9% 2.03Mi 30.3% 2.03Mi .text
+ 6.3% 1.85Mi 0.0% 0 .debug_line
+ 4.0% 1.19Mi 0.0% 0 .debug_ranges
+ 0.0% 0 15.0% 1.01Mi .bss
+ 1.6% 473Ki 0.0% 0 .strtab
+ 1.4% 435Ki 6.3% 435Ki .data
+ 0.8% 254Ki 3.7% 254Ki .eh_frame
+ 0.8% 231Ki 0.0% 0 .symtab
+ 0.5% 142Ki 0.0% 0 .debug_abbrev
+ 0.2% 56.8Ki 0.8% 56.8Ki .gcc_except_table
+ 0.1% 41.4Ki 0.6% 41.4Ki .eh_frame_hdr
+ 0.0% 11.4Ki 0.1% 9.45Ki [26 Others]
+ 0.0% 7.20Ki 0.1% 7.14Ki .dynstr
+ 0.0% 6.09Ki 0.1% 6.02Ki .dynsym
+ 0.0% 4.89Ki 0.1% 4.83Ki .rela.plt
+ 0.0% 4.59Ki 0.0% 0 [Unmapped]
+ 0.0% 3.30Ki 0.0% 3.23Ki .plt
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+The "VM SIZE" column tells you how much space the binary
+will take when it is loaded into memory. The "FILE SIZE"
+column tells you about how much space the binary is taking
+on disk. These two can be very different from each other:
+
+- Some data lives in the file but isn't loaded into memory,
+ like debug information.
+- Some data is mapped into memory but doesn't exist in the
+ file. This mainly applies to the `.bss` section
+ (zero-initialized data).
+
+The default breakdown in Bloaty is by sections, but many
+other ways of slicing the binary are supported such as
+symbols and segments. If you compiled with debug info, you
+can even break down by compile units and inlines!
+
+```cmdoutput
+$ ./bloaty bloaty -d compileunits
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 34.8% 10.2Mi 43.4% 2.91Mi [163 Others]
+ 17.2% 5.08Mi 4.3% 295Ki third_party/protobuf/src/google/protobuf/descriptor.cc
+ 7.3% 2.14Mi 2.6% 179Ki third_party/protobuf/src/google/protobuf/descriptor.pb.cc
+ 4.6% 1.36Mi 1.1% 78.4Ki third_party/protobuf/src/google/protobuf/text_format.cc
+ 3.7% 1.10Mi 4.5% 311Ki third_party/capstone/arch/ARM/ARMDisassembler.c
+ 1.3% 399Ki 15.9% 1.07Mi third_party/capstone/arch/M68K/M68KDisassembler.c
+ 3.2% 980Ki 1.1% 75.3Ki third_party/protobuf/src/google/protobuf/generated_message_reflection.cc
+ 3.2% 965Ki 0.6% 40.7Ki third_party/protobuf/src/google/protobuf/descriptor_database.cc
+ 2.8% 854Ki 12.0% 819Ki third_party/capstone/arch/X86/X86Mapping.c
+ 2.8% 846Ki 1.0% 66.4Ki third_party/protobuf/src/google/protobuf/extension_set.cc
+ 2.7% 800Ki 0.6% 41.2Ki third_party/protobuf/src/google/protobuf/generated_message_util.cc
+ 2.3% 709Ki 0.7% 50.7Ki third_party/protobuf/src/google/protobuf/wire_format.cc
+ 2.1% 637Ki 1.7% 117Ki third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
+ 1.8% 549Ki 1.7% 114Ki src/bloaty.cc
+ 1.7% 503Ki 0.7% 48.1Ki third_party/protobuf/src/google/protobuf/repeated_field.cc
+ 1.6% 469Ki 6.2% 427Ki third_party/capstone/arch/X86/X86DisassemblerDecoder.c
+ 1.4% 434Ki 0.2% 15.9Ki third_party/protobuf/src/google/protobuf/message.cc
+ 1.4% 422Ki 0.3% 23.4Ki third_party/re2/re2/dfa.cc
+ 1.3% 407Ki 0.4% 24.9Ki third_party/re2/re2/regexp.cc
+ 1.3% 407Ki 0.4% 29.9Ki third_party/protobuf/src/google/protobuf/map_field.cc
+ 1.3% 397Ki 0.4% 24.8Ki third_party/re2/re2/re2.cc
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+
+Run Bloaty with `--help` to see a list of available options:
+
+```cmdoutput
+$ ./bloaty --help
+Bloaty McBloatface: a size profiler for binaries.
+
+USAGE: bloaty [OPTION]... FILE... [-- BASE_FILE...]
+
+Options:
+
+ --csv Output in CSV format instead of human-readable.
+ --tsv Output in TSV format instead of human-readable.
+ -c FILE Load configuration from <file>.
+ -d SOURCE,SOURCE Comma-separated list of sources to scan.
+ --debug-file=FILE Use this file for debug symbols and/or symbol table.
+ -C MODE How to demangle symbols. Possible values are:
+ --demangle=MODE --demangle=none no demangling, print raw symbols
+ --demangle=short demangle, but omit arg/return types
+ --demangle=full print full demangled type
+ The default is --demangle=short.
+ --disassemble=FUNCTION
+ Disassemble this function (EXPERIMENTAL)
+ --domain=DOMAIN Which domains to show. Possible values are:
+ --domain=vm
+ --domain=file
+ --domain=both (the default)
+ -n NUM How many rows to show per level before collapsing
+ other keys into '[Other]'. Set to '0' for unlimited.
+ Defaults to 20.
+ -s SORTBY Whether to sort by VM or File size. Possible values
+ are:
+ -s vm
+ -s file
+ -s both (the default: sorts by max(vm, file)).
+ -w Wide output; don't truncate long labels.
+ --help Display this message and exit.
+ --list-sources Show a list of available sources and exit.
+ --source-filter=PATTERN
+ Only show keys with names matching this pattern.
+
+Options for debugging Bloaty:
+
+ --debug-vmaddr=ADDR
+ --debug-fileoff=OFF
+ Print extended debugging information for the given
+ VM address and/or file offset.
+ -v Verbose output. Dumps warnings encountered during
+ processing and full VM/file maps at the end.
+ Add more v's (-vv, -vvv) for even more.
+
+```
+
+# Size Diffs
+
+You can use Bloaty to see how the size of a binary changed.
+On the command-line, pass `--` followed by the files you
+want to use as the diff base.
+
+For example, here is a size diff between a couple different versions
+of Bloaty, showing how it grew when I added some features.
+
+```
+$ ./bloaty bloaty -- oldbloaty
+ VM SIZE FILE SIZE
+ -------------- --------------
+ [ = ] 0 .debug_loc +688Ki +9.9%
+ +19% +349Ki .text +349Ki +19%
+ [ = ] 0 .debug_ranges +180Ki +11%
+ [ = ] 0 .debug_info +120Ki +0.9%
+ +23% +73.5Ki .rela.dyn +73.5Ki +23%
+ +3.5% +57.1Ki .rodata +57.1Ki +3.5%
+ +28e3% +53.9Ki .data +53.9Ki +28e3%
+ [ = ] 0 .debug_line +40.2Ki +4.8%
+ +2.3% +5.35Ki .eh_frame +5.35Ki +2.3%
+ -6.0% -5 [Unmapped] +2.65Ki +215%
+ +0.5% +1.70Ki .dynstr +1.70Ki +0.5%
+ [ = ] 0 .symtab +1.59Ki +0.9%
+ [ = ] 0 .debug_abbrev +1.29Ki +0.5%
+ [ = ] 0 .strtab +1.26Ki +0.3%
+ +16% +992 .bss 0 [ = ]
+ +0.2% +642 [13 Others] +849 +0.2%
+ +0.6% +792 .dynsym +792 +0.6%
+ +16% +696 .rela.plt +696 +16%
+ +16% +464 .plt +464 +16%
+ +0.8% +312 .eh_frame_hdr +312 +0.8%
+ [ = ] 0 .debug_str -19.6Ki -0.4%
+ +11% +544Ki TOTAL +1.52Mi +4.6%
+```
+
+Each line shows the how much each part changed compared to
+its previous size. Most sections grew, but one section at
+the bottom (`.debug_str`) shrank. The "TOTAL" line shows
+how much the size changed overall.
+
+# Hierarchical Profiles
+
+Bloaty supports breaking the binary down in lots of
+different ways. You can combine multiple data sources into
+a single hierarchical profile. For example, we can use the
+`segments` and `sections` data sources in a single report:
+
+```cmdoutput
+$ ./bloaty -d segments,sections bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 80.7% 23.8Mi 0.0% 0 [Unmapped]
+ 37.2% 8.85Mi NAN% 0 .debug_info
+ 30.6% 7.29Mi NAN% 0 .debug_loc
+ 15.9% 3.79Mi NAN% 0 .debug_str
+ 7.8% 1.85Mi NAN% 0 .debug_line
+ 5.0% 1.19Mi NAN% 0 .debug_ranges
+ 1.9% 473Ki NAN% 0 .strtab
+ 1.0% 231Ki NAN% 0 .symtab
+ 0.6% 142Ki NAN% 0 .debug_abbrev
+ 0.0% 4.59Ki NAN% 0 [Unmapped]
+ 0.0% 392 NAN% 0 .shstrtab
+ 0.0% 139 NAN% 0 .debug_macinfo
+ 0.0% 68 NAN% 0 .comment
+ 10.9% 3.21Mi 47.9% 3.21Mi LOAD #4 [R]
+ 89.3% 2.86Mi 89.3% 2.86Mi .rodata
+ 7.7% 254Ki 7.7% 254Ki .eh_frame
+ 1.7% 56.8Ki 1.7% 56.8Ki .gcc_except_table
+ 1.3% 41.4Ki 1.3% 41.4Ki .eh_frame_hdr
+ 0.0% 1 0.0% 1 [LOAD #4 [R]]
+ 6.9% 2.03Mi 30.3% 2.03Mi LOAD #3 [RX]
+ 99.8% 2.03Mi 99.8% 2.03Mi .text
+ 0.2% 3.23Ki 0.2% 3.23Ki .plt
+ 0.0% 28 0.0% 28 [LOAD #3 [RX]]
+ 0.0% 23 0.0% 23 .init
+ 0.0% 9 0.0% 9 .fini
+ 1.5% 439Ki 21.4% 1.44Mi LOAD #5 [RW]
+ 0.0% 0 70.1% 1.01Mi .bss
+ 99.1% 435Ki 29.6% 435Ki .data
+ 0.4% 1.63Ki 0.1% 1.63Ki .got.plt
+ 0.3% 1.46Ki 0.1% 1.46Ki .data.rel.ro
+ 0.1% 560 0.0% 560 .dynamic
+ 0.1% 384 0.0% 376 .init_array
+ 0.0% 32 0.0% 56 [LOAD #5 [RW]]
+ 0.0% 32 0.0% 32 .got
+ 0.0% 16 0.0% 16 .tdata
+ 0.0% 8 0.0% 8 .fini_array
+ 0.0% 0 0.0% 8 .tbss
+ 0.1% 23.3Ki 0.3% 23.3Ki LOAD #2 [R]
+ 30.7% 7.14Ki 30.7% 7.14Ki .dynstr
+ 25.9% 6.02Ki 25.9% 6.02Ki .dynsym
+ 20.8% 4.83Ki 20.8% 4.83Ki .rela.plt
+ 7.7% 1.78Ki 7.7% 1.78Ki .hash
+ 5.0% 1.17Ki 5.0% 1.17Ki .rela.dyn
+ 3.1% 741 3.1% 741 [LOAD #2 [R]]
+ 2.7% 632 2.7% 632 .gnu.hash
+ 2.2% 514 2.2% 514 .gnu.version
+ 1.6% 384 1.6% 384 .gnu.version_r
+ 0.2% 36 0.2% 36 .note.gnu.build-id
+ 0.1% 32 0.1% 32 .note.ABI-tag
+ 0.1% 28 0.1% 28 .interp
+ 0.0% 2.56Ki 0.0% 0 [ELF Headers]
+ 46.3% 1.19Ki NAN% 0 [19 Others]
+ 7.3% 192 NAN% 0 [ELF Headers]
+ 2.4% 64 NAN% 0 .comment
+ 2.4% 64 NAN% 0 .data
+ 2.4% 64 NAN% 0 .data.rel.ro
+ 2.4% 64 NAN% 0 .debug_abbrev
+ 2.4% 64 NAN% 0 .debug_info
+ 2.4% 64 NAN% 0 .debug_line
+ 2.4% 64 NAN% 0 .debug_loc
+ 2.4% 64 NAN% 0 .debug_macinfo
+ 2.4% 64 NAN% 0 .debug_ranges
+ 2.4% 64 NAN% 0 .debug_str
+ 2.4% 64 NAN% 0 .dynamic
+ 2.4% 64 NAN% 0 .dynstr
+ 2.4% 64 NAN% 0 .dynsym
+ 2.4% 64 NAN% 0 .eh_frame
+ 2.4% 64 NAN% 0 .eh_frame_hdr
+ 2.4% 64 NAN% 0 .fini
+ 2.4% 64 NAN% 0 .fini_array
+ 2.4% 64 NAN% 0 .gcc_except_table
+ 2.4% 64 NAN% 0 .gnu.hash
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+Bloaty displays a maximum of 20 lines for each level; other
+values are grouped into an `[Other]` bin. Use `-n <num>`
+to override this setting. If you pass `-n 0`, all data
+will be output without collapsing anything into `[Other]`.
+
+# Debugging Stripped Binaries
+
+Bloaty supports reading debuginfo/symbols from separate
+binaries. This lets you profile a stripped binary, even for
+data sources like "compileunits" or "symbols" that require
+this extra information.
+
+Bloaty uses build IDs to verify that the binary and the
+debug file match. Otherwise the results would be nonsense
+(this kind of mismatch might sound unlikely but it's a very
+easy mistake to make, and one that I made several times even
+as Bloaty's author!).
+
+If your binary has a build ID, then using separate debug
+files is as simple as:
+
+```
+$ cp bloaty bloaty.stripped
+$ strip bloaty.stripped
+$ ./bloaty -d symbols --debug-file=bloaty bloaty.stripped
+```
+
+Some format-specific notes follow.
+
+## ELF
+
+For ELF, make sure you are compiling with build IDs enabled.
+With gcc this happens automatically, but [Clang decided not
+to make this the default, since it makes the link
+slower](http://releases.llvm.org/3.9.0/tools/clang/docs/ReleaseNotes.html#major-new-features).
+For Clang add `-Wl,--build-id` to your link line. (If you
+want a slightly faster link and don't care about
+reproducibility, you can use `-Wl,--build-id=uuid` instead).
+
+Bloaty does not currently support the GNU debuglink or
+looking up debug files by build ID, [which are the methods
+GDB uses to find debug
+files](https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html).
+If there are use cases where Bloaty's `--debug-file` option
+won't work, we can reconsider implementing these.
+
+## Mach-O
+
+Mach-O files always have build IDs (as far as I can tell),
+so no special configuration is needed to make sure you get
+them.
+
+Mach-O puts debug information in separate files which you
+can create with `dsymutil`:
+
+```
+$ dsymutil bloaty
+$ strip bloaty (optional)
+$ ./bloaty -d symbols --debug-file=bloaty.dSYM/Contents/Resources/DWARF/bloaty bloaty
+```
+
+# Configuration Files
+
+Any options that you can specify on the command-line, you
+can put into a configuration file instead. Then use can use
+`-c FILE` to load those options from the config file. Also,
+a few features are only available with configuration files
+and cannot be specify on the command-line.
+
+The configuration file is a in Protocol Buffers text format.
+The schema is the `Options` message in
+[src/bloaty.proto](src/bloaty.proto).
+
+The two most useful cases for configuration files are:
+
+1. You have too many input files to put on the command-line.
+ At Google we sometimes run Bloaty over thousands of input
+ files. This can cause the overall command-line to exceed
+ OS limits. With a config file, we can avoid this:
+
+ ```
+ filename: "path/to/long_filename_a.o"
+ filename: "path/to/long_filename_b.o"
+ filename: "path/to/long_filename_c.o"
+ # ...repeat for thousands of files.
+ ```
+2. For custom data sources, it can be very useful to put
+ them in a config file, for greater reusability. For
+ example, see the custom data sources defined in
+ [custom_sources.bloaty](custom_sources.bloaty).
+ Also read more about custom data sources below.
+
+# Data Sources
+
+Bloaty has many data sources built in. These all provide
+different ways of looking at the binary. You can also
+create your own data sources by applying regexes to the
+built-in data sources (see "Custom Data Sources" below).
+
+While Bloaty works on binaries, shared objects, object
+files, and static libraries (`.a` files), some of the data
+sources don't work on object files. This applies especially
+to data sources that read debug info.
+
+## Segments
+
+Segments are what the run-time loader uses to determine what
+parts of the binary need to be loaded/mapped into memory.
+There are usually just a few segments: one for each set of
+`mmap()` permissions required:
+
+```cmdoutput
+$ ./bloaty -d segments bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 80.7% 23.8Mi 0.0% 0 [Unmapped]
+ 10.9% 3.21Mi 47.9% 3.21Mi LOAD #4 [R]
+ 6.9% 2.03Mi 30.3% 2.03Mi LOAD #3 [RX]
+ 1.5% 439Ki 21.4% 1.44Mi LOAD #5 [RW]
+ 0.1% 23.3Ki 0.3% 23.3Ki LOAD #2 [R]
+ 0.0% 2.56Ki 0.0% 0 [ELF Headers]
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+Here we see one segment mapped `[RX]` (read/execute) and
+one segment mapped `[RW]` (read/write). A large part of
+the binary is not loaded into memory, which we see as
+`[Unmapped]`.
+
+Object files and static libraries don't have segments.
+However we fake it by grouping sections by their flags.
+This gives us a break-down sort of like real segments.
+
+```cmdoutput
+$ ./bloaty -d segments CMakeFiles/libbloaty.dir/src/bloaty.cc.o
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 87.5% 972Ki 0.0% 0 Section []
+ 8.2% 90.9Ki 78.3% 90.9Ki Section [AX]
+ 2.3% 25.2Ki 21.7% 25.2Ki Section [A]
+ 2.0% 22.6Ki 0.0% 0 [ELF Headers]
+ 0.1% 844 0.0% 0 [Unmapped]
+ 0.0% 24 0.1% 72 Section [AW]
+ 100.0% 1.09Mi 100.0% 116Ki TOTAL
+```
+
+## Sections
+
+Sections give us a bit more granular look into the binary.
+If we want to find the symbol table, the unwind information,
+or the debug information, each kind of information lives in
+its own section. Bloaty's default output is sections.
+
+```cmdoutput
+$ ./bloaty -d sections bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 30.0% 8.85Mi 0.0% 0 .debug_info
+ 24.7% 7.29Mi 0.0% 0 .debug_loc
+ 12.8% 3.79Mi 0.0% 0 .debug_str
+ 9.7% 2.86Mi 42.8% 2.86Mi .rodata
+ 6.9% 2.03Mi 30.3% 2.03Mi .text
+ 6.3% 1.85Mi 0.0% 0 .debug_line
+ 4.0% 1.19Mi 0.0% 0 .debug_ranges
+ 0.0% 0 15.0% 1.01Mi .bss
+ 1.6% 473Ki 0.0% 0 .strtab
+ 1.4% 435Ki 6.3% 435Ki .data
+ 0.8% 254Ki 3.7% 254Ki .eh_frame
+ 0.8% 231Ki 0.0% 0 .symtab
+ 0.5% 142Ki 0.0% 0 .debug_abbrev
+ 0.2% 56.8Ki 0.8% 56.8Ki .gcc_except_table
+ 0.1% 41.4Ki 0.6% 41.4Ki .eh_frame_hdr
+ 0.0% 11.4Ki 0.1% 9.45Ki [26 Others]
+ 0.0% 7.20Ki 0.1% 7.14Ki .dynstr
+ 0.0% 6.09Ki 0.1% 6.02Ki .dynsym
+ 0.0% 4.89Ki 0.1% 4.83Ki .rela.plt
+ 0.0% 4.59Ki 0.0% 0 [Unmapped]
+ 0.0% 3.30Ki 0.0% 3.23Ki .plt
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+Sections are regions of the binary that are the linker
+treats as atomic when linking. The linker will never break
+apart or rearrange the data within a section. This is why it
+is necessary to compile with `-ffunction-sections` and
+`-fdata-sections` if you want the linker to strip out
+individual functions or variables that have no references.
+However the linker will often combine many input sections
+into a single output section.
+
+## Symbols
+
+Symbols come from the symbol table, and represent individual
+functions or variables.
+
+```cmdoutput
+$ ./bloaty -d symbols bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 30.0% 8.85Mi 0.0% 0 [section .debug_info]
+ 24.7% 7.29Mi 0.0% 0 [section .debug_loc]
+ 12.8% 3.79Mi 0.0% 0 [section .debug_str]
+ 11.7% 3.44Mi 41.2% 2.76Mi [5895 Others]
+ 6.3% 1.85Mi 0.0% 0 [section .debug_line]
+ 4.9% 1.43Mi 21.4% 1.43Mi insns
+ 4.0% 1.19Mi 0.0% 0 [section .debug_ranges]
+ 0.0% 44 14.9% 1024Ki g_instruction_table
+ 0.8% 255Ki 3.7% 255Ki [section .rodata]
+ 0.8% 240Ki 3.5% 240Ki printAliasInstr
+ 0.6% 175Ki 2.6% 175Ki insn_ops
+ 0.5% 153Ki 2.2% 153Ki ARMInsts
+ 0.5% 142Ki 0.0% 0 [section .debug_abbrev]
+ 0.5% 140Ki 2.0% 140Ki x86DisassemblerTwoByteOpcodes
+ 0.4% 113Ki 1.6% 113Ki insn_name_maps
+ 0.4% 106Ki 1.6% 106Ki printInstruction.OpInfo
+ 0.3% 97.1Ki 1.4% 96.9Ki printInstruction.OpInfo2
+ 0.2% 74.0Ki 1.1% 74.0Ki x86DisassemblerThreeByte38Opcodes
+ 0.2% 71.1Ki 1.0% 70.8Ki printInstruction.AsmStrs
+ 0.2% 61.1Ki 0.9% 60.9Ki DecoderTable32
+ 0.2% 56.8Ki 0.8% 56.8Ki [section .gcc_except_table]
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+You can control how symbols are demangled with the `-C MODE`
+or `--demangle=MODE` flag. You can also specify the
+demangling mode explicitly in the `-d` switch. We have
+three different demangling modes:
+
+* `-C none` or `-d rawsymbols`: no, demangling.
+* `-C short` or `-d shortsymbols`: short demangling: return
+ types, template parameters, and function parameter types
+ are omitted. For example:
+ `bloaty::dwarf::FormReader<>::GetFunctionForForm<>()`.
+ This is the default.
+* `-C full` or `-d fullsymbols`: full demangling.
+
+One very handy thing about `-C short` (the default) is that
+it groups all template instantiations together, regardless
+of their parameters. You can use this to determine how much
+code size you are paying by doing multiple instantiations of
+templates. Try `bloaty -d shortsymbols,fullsymbols`.
+
+## Input Files
+
+When you pass multiple files to Bloaty, the `inputfiles`
+source will let you break it down by input file:
+
+```cmdoutput
+$ ./bloaty -d inputfiles CMakeFiles/libbloaty.dir/src/*.o
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 42.8% 1.09Mi 37.9% 116Ki CMakeFiles/libbloaty.dir/src/bloaty.cc.o
+ 15.7% 407Ki 15.5% 47.6Ki CMakeFiles/libbloaty.dir/src/dwarf.cc.o
+ 10.3% 266Ki 10.4% 31.8Ki CMakeFiles/libbloaty.dir/src/bloaty.pb.cc.o
+ 9.0% 232Ki 9.7% 29.8Ki CMakeFiles/libbloaty.dir/src/elf.cc.o
+ 8.0% 207Ki 8.7% 26.6Ki CMakeFiles/libbloaty.dir/src/macho.cc.o
+ 4.4% 114Ki 4.3% 13.1Ki CMakeFiles/libbloaty.dir/src/webassembly.cc.o
+ 4.0% 103Ki 7.5% 22.9Ki CMakeFiles/libbloaty.dir/src/demangle.cc.o
+ 3.4% 87.0Ki 3.3% 10.2Ki CMakeFiles/libbloaty.dir/src/range_map.cc.o
+ 2.5% 64.3Ki 2.6% 7.94Ki CMakeFiles/libbloaty.dir/src/disassemble.cc.o
+ 100.0% 2.53Mi 100.0% 306Ki TOTAL
+```
+
+## Archive Members
+
+When you are running Bloaty on a `.a` file, the `armembers`
+source will let you break it down by `.o` file inside the
+archive.
+
+```cmdoutput
+$ ./bloaty -d armembers liblibbloaty.a
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 28.5% 1.21Mi 23.5% 120Ki cxa_demangle.cpp.o
+ 25.6% 1.09Mi 22.6% 116Ki bloaty.cc.o
+ 9.4% 407Ki 9.3% 47.6Ki dwarf.cc.o
+ 6.2% 266Ki 6.2% 31.8Ki bloaty.pb.cc.o
+ 5.4% 232Ki 5.8% 29.8Ki elf.cc.o
+ 4.8% 207Ki 5.2% 26.6Ki macho.cc.o
+ 2.6% 114Ki 2.6% 13.1Ki webassembly.cc.o
+ 2.4% 103Ki 4.5% 22.9Ki demangle.cc.o
+ 2.0% 87.0Ki 2.0% 10.2Ki range_map.cc.o
+ 1.9% 80.4Ki 3.2% 16.7Ki charconv_bigint.cc.o
+ 1.8% 79.3Ki 2.7% 14.0Ki escaping.cc.o
+ 1.5% 65.0Ki 2.1% 10.9Ki [9 Others]
+ 1.5% 64.3Ki 1.5% 7.94Ki disassemble.cc.o
+ 1.4% 59.9Ki 0.0% 0 [AR Symbol Table]
+ 1.0% 45.2Ki 2.4% 12.4Ki numbers.cc.o
+ 0.9% 40.9Ki 2.2% 11.4Ki charconv.cc.o
+ 0.9% 38.8Ki 1.2% 6.10Ki int128.cc.o
+ 0.7% 30.1Ki 1.1% 5.58Ki str_cat.cc.o
+ 0.6% 24.1Ki 0.8% 3.92Ki string_view.cc.o
+ 0.5% 21.2Ki 0.6% 3.21Ki throw_delegate.cc.o
+ 0.4% 19.2Ki 0.4% 2.26Ki ascii.cc.o
+ 100.0% 4.23Mi 100.0% 512Ki TOTAL
+```
+
+You are free to use this data source even for non-`.a`
+files, but it won't be very useful since it will always just
+resolve to the input file (the `.a` file).
+
+## Compile Units
+
+Using debug information, we can tell what compile unit (and
+corresponding source file) each bit of the binary came from.
+
+```cmdoutput
+$ ./bloaty -d compileunits bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 34.8% 10.2Mi 43.4% 2.91Mi [163 Others]
+ 17.2% 5.08Mi 4.3% 295Ki third_party/protobuf/src/google/protobuf/descriptor.cc
+ 7.3% 2.14Mi 2.6% 179Ki third_party/protobuf/src/google/protobuf/descriptor.pb.cc
+ 4.6% 1.36Mi 1.1% 78.4Ki third_party/protobuf/src/google/protobuf/text_format.cc
+ 3.7% 1.10Mi 4.5% 311Ki third_party/capstone/arch/ARM/ARMDisassembler.c
+ 1.3% 399Ki 15.9% 1.07Mi third_party/capstone/arch/M68K/M68KDisassembler.c
+ 3.2% 980Ki 1.1% 75.3Ki third_party/protobuf/src/google/protobuf/generated_message_reflection.cc
+ 3.2% 965Ki 0.6% 40.7Ki third_party/protobuf/src/google/protobuf/descriptor_database.cc
+ 2.8% 854Ki 12.0% 819Ki third_party/capstone/arch/X86/X86Mapping.c
+ 2.8% 846Ki 1.0% 66.4Ki third_party/protobuf/src/google/protobuf/extension_set.cc
+ 2.7% 800Ki 0.6% 41.2Ki third_party/protobuf/src/google/protobuf/generated_message_util.cc
+ 2.3% 709Ki 0.7% 50.7Ki third_party/protobuf/src/google/protobuf/wire_format.cc
+ 2.1% 637Ki 1.7% 117Ki third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
+ 1.8% 549Ki 1.7% 114Ki src/bloaty.cc
+ 1.7% 503Ki 0.7% 48.1Ki third_party/protobuf/src/google/protobuf/repeated_field.cc
+ 1.6% 469Ki 6.2% 427Ki third_party/capstone/arch/X86/X86DisassemblerDecoder.c
+ 1.4% 434Ki 0.2% 15.9Ki third_party/protobuf/src/google/protobuf/message.cc
+ 1.4% 422Ki 0.3% 23.4Ki third_party/re2/re2/dfa.cc
+ 1.3% 407Ki 0.4% 24.9Ki third_party/re2/re2/regexp.cc
+ 1.3% 407Ki 0.4% 29.9Ki third_party/protobuf/src/google/protobuf/map_field.cc
+ 1.3% 397Ki 0.4% 24.8Ki third_party/re2/re2/re2.cc
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+## Inlines
+
+The DWARF debugging information also contains "line info"
+information that understands inlining. So within a
+function, it will know which instructions came from an
+inlined function from a header file. This is the
+information the debugger uses to point at a specific source
+line as you're tracing through a program.
+
+```cmdoutput
+$ ./bloaty -d inlines bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 30.0% 8.85Mi 0.0% 0 [section .debug_info]
+ 24.7% 7.29Mi 0.0% 0 [section .debug_loc]
+ 12.8% 3.79Mi 0.0% 0 [section .debug_str]
+ 9.7% 2.86Mi 42.8% 2.86Mi [section .rodata]
+ 6.6% 1.96Mi 29.1% 1.95Mi [44060 Others]
+ 6.3% 1.85Mi 0.0% 0 [section .debug_line]
+ 4.0% 1.19Mi 0.0% 0 [section .debug_ranges]
+ 0.0% 0 15.0% 1.01Mi [section .bss]
+ 1.6% 473Ki 0.0% 0 [section .strtab]
+ 1.4% 435Ki 6.3% 435Ki [section .data]
+ 0.8% 254Ki 3.7% 254Ki [section .eh_frame]
+ 0.8% 231Ki 0.0% 0 [section .symtab]
+ 0.5% 142Ki 0.0% 0 [section .debug_abbrev]
+ 0.2% 56.8Ki 0.8% 56.8Ki [section .gcc_except_table]
+ 0.1% 41.4Ki 0.6% 41.4Ki [section .eh_frame_hdr]
+ 0.1% 27.4Ki 0.4% 27.4Ki /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/basic_string.h:187
+ 0.1% 19.1Ki 0.3% 19.1Ki /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/basic_string.h:183
+ 0.1% 16.8Ki 0.2% 16.8Ki /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/ext/new_allocator.h:128
+ 0.1% 16.0Ki 0.2% 16.0Ki /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/char_traits.h:300
+ 0.1% 15.8Ki 0.2% 15.8Ki /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/basic_string.h:222
+ 0.0% 14.7Ki 0.2% 14.7Ki [section .text]
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+# Custom Data Sources
+
+Sometimes you want to munge the labels from an existing data
+source. For example, when we use "compileunits" on Bloaty
+itself, we see files from all our dependencies mixed
+together:
+
+```cmdoutput
+$ ./bloaty -d compileunits bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 34.8% 10.2Mi 43.4% 2.91Mi [163 Others]
+ 17.2% 5.08Mi 4.3% 295Ki third_party/protobuf/src/google/protobuf/descriptor.cc
+ 7.3% 2.14Mi 2.6% 179Ki third_party/protobuf/src/google/protobuf/descriptor.pb.cc
+ 4.6% 1.36Mi 1.1% 78.4Ki third_party/protobuf/src/google/protobuf/text_format.cc
+ 3.7% 1.10Mi 4.5% 311Ki third_party/capstone/arch/ARM/ARMDisassembler.c
+ 1.3% 399Ki 15.9% 1.07Mi third_party/capstone/arch/M68K/M68KDisassembler.c
+ 3.2% 980Ki 1.1% 75.3Ki third_party/protobuf/src/google/protobuf/generated_message_reflection.cc
+ 3.2% 965Ki 0.6% 40.7Ki third_party/protobuf/src/google/protobuf/descriptor_database.cc
+ 2.8% 854Ki 12.0% 819Ki third_party/capstone/arch/X86/X86Mapping.c
+ 2.8% 846Ki 1.0% 66.4Ki third_party/protobuf/src/google/protobuf/extension_set.cc
+ 2.7% 800Ki 0.6% 41.2Ki third_party/protobuf/src/google/protobuf/generated_message_util.cc
+ 2.3% 709Ki 0.7% 50.7Ki third_party/protobuf/src/google/protobuf/wire_format.cc
+ 2.1% 637Ki 1.7% 117Ki third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
+ 1.8% 549Ki 1.7% 114Ki src/bloaty.cc
+ 1.7% 503Ki 0.7% 48.1Ki third_party/protobuf/src/google/protobuf/repeated_field.cc
+ 1.6% 469Ki 6.2% 427Ki third_party/capstone/arch/X86/X86DisassemblerDecoder.c
+ 1.4% 434Ki 0.2% 15.9Ki third_party/protobuf/src/google/protobuf/message.cc
+ 1.4% 422Ki 0.3% 23.4Ki third_party/re2/re2/dfa.cc
+ 1.3% 407Ki 0.4% 24.9Ki third_party/re2/re2/regexp.cc
+ 1.3% 407Ki 0.4% 29.9Ki third_party/protobuf/src/google/protobuf/map_field.cc
+ 1.3% 397Ki 0.4% 24.8Ki third_party/re2/re2/re2.cc
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+If we want to bucket all of these by which library they came
+from, we can write a custom data source. It specifies the
+base data source and a set of regexes to apply to it. The
+regexes are tried in order, and the first matching regex
+will cause the entire label to be rewritten to the
+replacement text. Regexes follow [RE2
+syntax](https://github.com/google/re2/wiki/Syntax) and the
+replacement can refer to capture groups.
+
+```cmdoutput
+$ cat bloaty_package.bloaty
+custom_data_source: {
+ name: "bloaty_package"
+ base_data_source: "compileunits"
+
+ rewrite: {
+ pattern: "^(\\.\\./)?src"
+ replacement: "src"
+ }
+ rewrite: {
+ pattern: "^(\\.\\./)?(third_party/\\w+)"
+ replacement: "\\2"
+ }
+}
+```
+
+Then use the data source like so:
+
+```cmdoutput
+$ ./bloaty -c bloaty_package.bloaty -d bloaty_package bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 56.6% 16.7Mi 16.6% 1.11Mi third_party/protobuf
+ 24.9% 7.35Mi 68.5% 4.58Mi third_party/capstone
+ 9.4% 2.77Mi 3.2% 221Ki third_party/re2
+ 4.6% 1.36Mi 4.1% 280Ki src
+ 2.1% 637Ki 1.7% 117Ki third_party/demumble
+ 0.7% 209Ki 1.1% 73.8Ki third_party/abseil
+ 0.7% 204Ki 3.0% 204Ki [section .rodata]
+ 0.2% 56.8Ki 0.8% 56.8Ki [section .gcc_except_table]
+ 0.2% 47.7Ki 0.0% 0 [section .debug_str]
+ 0.2% 46.3Ki 0.0% 0 [section .symtab]
+ 0.1% 42.0Ki 0.6% 42.0Ki [section .text]
+ 0.1% 41.4Ki 0.0% 0 [section .debug_loc]
+ 0.1% 29.3Ki 0.0% 0 [section .strtab]
+ 0.0% 12.0Ki 0.2% 11.5Ki [30 Others]
+ 0.0% 7.36Ki 0.0% 0 [section .debug_ranges]
+ 0.0% 6.10Ki 0.1% 6.10Ki [section .dynstr]
+ 0.0% 4.99Ki 0.1% 4.99Ki [section .dynsym]
+ 0.0% 4.77Ki 0.1% 4.77Ki [section .eh_frame]
+ 0.0% 4.59Ki 0.0% 0 [Unmapped]
+ 0.0% 3.23Ki 0.0% 3.23Ki [section .plt]
+ 0.0% 2.50Ki 0.0% 0 [ELF Headers]
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+We can get an even richer report by combining the
+`bloaty_package` source with the original `compileunits`
+source:
+
+```cmdoutput
+$ ./bloaty -c config.bloaty -d bloaty_package,compileunits bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 56.6% 16.7Mi 16.6% 1.11Mi third_party/protobuf
+ 30.5% 5.08Mi 26.0% 295Ki third_party/protobuf/src/google/protobuf/descriptor.cc
+ 12.8% 2.14Mi 15.8% 179Ki third_party/protobuf/src/google/protobuf/descriptor.pb.cc
+ 8.2% 1.36Mi 6.9% 78.4Ki third_party/protobuf/src/google/protobuf/text_format.cc
+ 5.7% 980Ki 6.6% 75.3Ki third_party/protobuf/src/google/protobuf/generated_message_reflection.cc
+ 5.7% 965Ki 3.6% 40.7Ki third_party/protobuf/src/google/protobuf/descriptor_database.cc
+ 5.0% 846Ki 5.8% 66.4Ki third_party/protobuf/src/google/protobuf/extension_set.cc
+ 4.7% 800Ki 3.6% 41.2Ki third_party/protobuf/src/google/protobuf/generated_message_util.cc
+ 4.7% 798Ki 6.1% 69.3Ki [16 Others]
+ 4.2% 709Ki 4.5% 50.7Ki third_party/protobuf/src/google/protobuf/wire_format.cc
+ 2.9% 503Ki 4.2% 48.1Ki third_party/protobuf/src/google/protobuf/repeated_field.cc
+ 2.5% 434Ki 1.4% 15.9Ki third_party/protobuf/src/google/protobuf/message.cc
+ 2.4% 407Ki 2.6% 29.9Ki third_party/protobuf/src/google/protobuf/map_field.cc
+ 1.8% 309Ki 2.4% 27.5Ki third_party/protobuf/src/google/protobuf/stubs/strutil.cc
+ 1.5% 256Ki 0.8% 9.19Ki third_party/protobuf/src/google/protobuf/dynamic_message.cc
+ 1.2% 208Ki 1.2% 13.2Ki third_party/protobuf/src/google/protobuf/extension_set_heavy.cc
+ 1.2% 206Ki 2.4% 27.4Ki third_party/protobuf/src/google/protobuf/wire_format_lite.cc
+ 1.1% 192Ki 1.6% 17.7Ki third_party/protobuf/src/google/protobuf/parse_context.cc
+ 1.1% 187Ki 0.8% 9.33Ki third_party/protobuf/src/google/protobuf/reflection_ops.cc
+ 1.0% 167Ki 1.2% 13.9Ki third_party/protobuf/src/google/protobuf/message_lite.cc
+ 1.0% 165Ki 1.6% 18.7Ki third_party/protobuf/src/google/protobuf/io/tokenizer.cc
+ 0.9% 152Ki 0.7% 7.57Ki third_party/protobuf/src/google/protobuf/unknown_field_set.cc
+ 24.9% 7.35Mi 68.5% 4.58Mi third_party/capstone
+ 17.4% 1.28Mi 6.5% 303Ki [38 Others]
+ 14.9% 1.10Mi 6.6% 311Ki third_party/capstone/arch/ARM/ARMDisassembler.c
+ 5.3% 399Ki 23.3% 1.07Mi third_party/capstone/arch/M68K/M68KDisassembler.c
+ 11.4% 854Ki 17.5% 819Ki third_party/capstone/arch/X86/X86Mapping.c
+ 6.2% 469Ki 9.1% 427Ki third_party/capstone/arch/X86/X86DisassemblerDecoder.c
+ 4.8% 363Ki 1.3% 59.1Ki third_party/capstone/arch/SystemZ/SystemZDisassembler.c
+ 4.4% 329Ki 1.2% 54.2Ki third_party/capstone/arch/Mips/MipsDisassembler.c
+ 4.2% 314Ki 1.6% 73.0Ki third_party/capstone/arch/AArch64/AArch64Disassembler.c
+ 3.4% 256Ki 3.1% 145Ki third_party/capstone/arch/AArch64/AArch64InstPrinter.c
+ 3.2% 243Ki 4.7% 219Ki third_party/capstone/arch/AArch64/AArch64Mapping.c
+ 3.2% 241Ki 4.7% 220Ki third_party/capstone/arch/SystemZ/SystemZMapping.c
+ 2.9% 219Ki 4.2% 196Ki third_party/capstone/arch/ARM/ARMMapping.c
+ 2.7% 205Ki 1.8% 83.3Ki third_party/capstone/arch/ARM/ARMInstPrinter.c
+ 2.2% 166Ki 2.0% 95.4Ki third_party/capstone/arch/PowerPC/PPCInstPrinter.c
+ 2.0% 153Ki 2.8% 132Ki third_party/capstone/arch/Mips/MipsMapping.c
+ 2.0% 153Ki 0.4% 17.7Ki third_party/capstone/arch/TMS320C64x/TMS320C64xDisassembler.c
+ 2.0% 151Ki 2.1% 99.0Ki third_party/capstone/arch/X86/X86ATTInstPrinter.c
+ 2.0% 149Ki 1.9% 90.6Ki third_party/capstone/arch/Sparc/SparcInstPrinter.c
+ 2.0% 148Ki 2.7% 126Ki third_party/capstone/arch/PowerPC/PPCMapping.c
+ 1.9% 146Ki 2.1% 96.2Ki third_party/capstone/arch/X86/X86IntelInstPrinter.c
+ 1.7% 124Ki 0.6% 28.7Ki third_party/capstone/arch/PowerPC/PPCDisassembler.c
+ 9.4% 2.77Mi 3.2% 221Ki third_party/re2
+ 14.9% 422Ki 10.6% 23.4Ki third_party/re2/re2/dfa.cc
+ 14.4% 407Ki 11.3% 24.9Ki third_party/re2/re2/regexp.cc
+ 14.0% 397Ki 11.2% 24.8Ki third_party/re2/re2/re2.cc
+ 12.2% 345Ki 10.0% 22.1Ki third_party/re2/re2/prog.cc
+ 11.4% 322Ki 33.1% 73.2Ki third_party/re2/re2/parse.cc
+ 10.3% 292Ki 8.9% 19.6Ki third_party/re2/re2/compile.cc
+ 5.6% 159Ki 3.7% 8.08Ki third_party/re2/re2/nfa.cc
+ 4.6% 130Ki 4.7% 10.5Ki third_party/re2/re2/simplify.cc
+ 3.7% 106Ki 1.9% 4.19Ki third_party/re2/re2/onepass.cc
+ 3.1% 88.7Ki 1.4% 3.08Ki third_party/re2/re2/bitstate.cc
+ 2.9% 83.6Ki 1.6% 3.50Ki third_party/re2/re2/tostring.cc
+ 1.1% 31.3Ki 0.6% 1.41Ki third_party/re2/re2/stringpiece.cc
+ 0.9% 24.3Ki 0.6% 1.22Ki third_party/re2/util/strutil.cc
+ 0.6% 16.2Ki 0.0% 0 third_party/re2/re2/unicode_groups.cc
+ 0.2% 5.36Ki 0.5% 1.09Ki third_party/re2/util/rune.cc
+ 0.1% 1.50Ki 0.0% 0 third_party/re2/re2/perl_groups.cc
+ 0.0% 661 0.0% 0 third_party/re2/re2/unicode_casefold.cc
+ 4.6% 1.36Mi 4.1% 280Ki src
+ 39.4% 549Ki 40.7% 114Ki src/bloaty.cc
+ 13.9% 193Ki 15.0% 42.1Ki src/dwarf.cc
+ 10.8% 150Ki 0.5% 1.28Ki src/main.cc
+ 8.1% 113Ki 8.9% 25.0Ki src/bloaty.pb.cc
+ 7.7% 108Ki 9.0% 25.2Ki src/elf.cc
+ 7.2% 99.9Ki 10.3% 29.0Ki src/macho.cc
+ 4.7% 66.2Ki 7.0% 19.5Ki src/demangle.cc
+ 3.5% 49.5Ki 3.8% 10.5Ki src/webassembly.cc
+ 2.8% 38.8Ki 2.7% 7.50Ki src/range_map.cc
+ 1.9% 26.2Ki 2.1% 5.98Ki src/disassemble.cc
+ 2.1% 637Ki 1.7% 117Ki third_party/demumble
+ 100.0% 637Ki 100.0% 117Ki third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
+ 0.7% 209Ki 1.1% 73.8Ki third_party/abseil
+ 19.0% 39.8Ki 19.0% 14.0Ki third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc
+ 15.6% 32.6Ki 13.6% 10.1Ki third_party/abseil-cpp/absl/strings/escaping.cc
+ 14.9% 31.1Ki 25.0% 18.5Ki third_party/abseil-cpp/absl/strings/charconv.cc
+ 12.3% 25.7Ki 10.6% 7.79Ki third_party/abseil-cpp/absl/numeric/int128.cc
+ 8.5% 17.9Ki 9.0% 6.65Ki third_party/abseil-cpp/absl/strings/numbers.cc
+ 6.8% 14.3Ki 5.2% 3.87Ki third_party/abseil-cpp/absl/strings/str_cat.cc
+ 6.1% 12.7Ki 5.1% 3.75Ki third_party/abseil-cpp/absl/strings/string_view.cc
+ 3.6% 7.49Ki 1.7% 1.23Ki third_party/abseil-cpp/absl/strings/ascii.cc
+ 2.9% 6.10Ki 3.5% 2.56Ki third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc
+ 2.7% 5.75Ki 1.5% 1.13Ki third_party/abseil-cpp/absl/strings/str_split.cc
+ 2.3% 4.84Ki 1.9% 1.40Ki third_party/abseil-cpp/absl/strings/substitute.cc
+ 1.4% 3.03Ki 1.0% 754 third_party/abseil-cpp/absl/base/internal/raw_logging.cc
+ 1.1% 2.28Ki 0.4% 302 third_party/abseil-cpp/absl/base/internal/throw_delegate.cc
+ 0.9% 1.97Ki 1.0% 788 third_party/abseil-cpp/absl/strings/internal/memutil.cc
+ 0.9% 1.93Ki 0.9% 701 third_party/abseil-cpp/absl/strings/internal/escaping.cc
+ 0.7% 1.41Ki 0.4% 293 third_party/abseil-cpp/absl/strings/match.cc
+ 0.3% 556 0.2% 161 third_party/abseil-cpp/absl/strings/internal/utf8.cc
+ 0.7% 204Ki 3.0% 204Ki [section .rodata]
+ 0.2% 56.8Ki 0.8% 56.8Ki [section .gcc_except_table]
+ 0.2% 47.7Ki 0.0% 0 [section .debug_str]
+ 0.2% 46.3Ki 0.0% 0 [section .symtab]
+ 0.1% 42.0Ki 0.6% 42.0Ki [section .text]
+ 0.1% 41.4Ki 0.0% 0 [section .debug_loc]
+ 0.1% 29.3Ki 0.0% 0 [section .strtab]
+ 0.0% 12.0Ki 0.2% 11.5Ki [30 Others]
+ 0.0% 7.36Ki 0.0% 0 [section .debug_ranges]
+ 0.0% 6.10Ki 0.1% 6.10Ki [section .dynstr]
+ 0.0% 4.99Ki 0.1% 4.99Ki [section .dynsym]
+ 0.0% 4.77Ki 0.1% 4.77Ki [section .eh_frame]
+ 0.0% 4.59Ki 0.0% 0 [Unmapped]
+ 0.0% 3.23Ki 0.0% 3.23Ki [section .plt]
+ 0.0% 2.50Ki 0.0% 0 [ELF Headers]
+ 100.0% 29.5Mi 100.0% 6.69Mi TOTAL
+```
+
+# Source filter
+
+Sometimes, you are only interested in parts of the binary
+instead of the whole package. This is common in embedded
+programming, where ELF files are used only as a container
+format, and only a few sections are actually loaded onto
+the device.
+
+For this, Bloaty provides a `--source-filter` option which
+allows filtering out irrelevant data. It takes a regex
+which is applied to each of the symbol names in a data
+source. Only symbols which match the regex are displayed
+in the output. This is especially powerful when combined
+with custom data sources, as the rewriting occurs before
+the filtering.
+
+In the case of hierarchical data source profiles, the regex
+is applied to all symbol names in the hierarchy. If any
+name matches, all of its parents will be displayed as well.
+
+For example, given the above scenario, maybe we are only
+interested in how large the first-party Bloaty code is.
+This can be displayed using a source filter on the `src`
+directory.
+
+```cmdoutput
+$ ./bloaty -c config.bloaty -d bloaty_package,compileunits --source-filter ^src bloaty
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 100.0% 1.36Mi 100.0% 280Ki src
+ 39.4% 549Ki 40.7% 114Ki src/bloaty.cc
+ 13.9% 193Ki 15.0% 42.1Ki src/dwarf.cc
+ 10.8% 150Ki 0.5% 1.28Ki src/main.cc
+ 8.1% 113Ki 8.9% 25.0Ki src/bloaty.pb.cc
+ 7.7% 108Ki 9.0% 25.2Ki src/elf.cc
+ 7.2% 99.9Ki 10.3% 29.0Ki src/macho.cc
+ 4.7% 66.2Ki 7.0% 19.5Ki src/demangle.cc
+ 3.5% 49.5Ki 3.8% 10.5Ki src/webassembly.cc
+ 2.8% 38.8Ki 2.7% 7.50Ki src/range_map.cc
+ 1.9% 26.2Ki 2.1% 5.98Ki src/disassemble.cc
+ 100.0% 1.36Mi 100.0% 280Ki TOTAL
+Filtering enabled (source_filter); omitted file = 28.1Mi, vm = 6.42Mi of entries
+```
+
+# Future Work
+
+Here are some tentative plans for future features.
+
+## Understanding Symbol References
+
+If we can analyze references between symbols, this would
+enable a lot of features:
+
+- Detect garbage symbols (ie. how much would the binary
+ shrink if we compiled with `-ffunction-sections
+ -fdata-sections -Wl,-gc-sections`).
+- Understand why a particular symbol can't be
+ garbage-collected (like `ld -why_live` on OS X).
+- Visualize the dependency tree of symbols (probably as a
+ dominator tree) so users can see the weight of their
+ binary in this way.
diff --git a/config.bloaty b/config.bloaty
new file mode 100644
index 0000000..df290fc
--- /dev/null
+++ b/config.bloaty
@@ -0,0 +1,13 @@
+custom_data_source: {
+ name: "bloaty_package"
+ base_data_source: "compileunits"
+
+ rewrite: {
+ pattern: "^(\\.\\./)?src"
+ replacement: "src"
+ }
+ rewrite: {
+ pattern: "^(\\.\\./)?(third_party/\\w+)"
+ replacement: "\\2"
+ }
+}
diff --git a/custom_sources.bloaty b/custom_sources.bloaty
new file mode 100644
index 0000000..9265572
--- /dev/null
+++ b/custom_sources.bloaty
@@ -0,0 +1,113 @@
+
+# Custom data sources that are generally useful, so we ship them
+# with Bloaty.
+
+##
+# class_or_namespace: grabs everything up until the last "::".
+#
+# This is especially useful when paired with shortsymbols, eg.
+#
+# VM SIZE FILE SIZE
+# -------------- --------------
+# 37.6% 4.18Mi [None] 4.17Mi 37.6%
+# 30.9% 3.44Mi [Other] 3.44Mi 30.9%
+# 5.7% 648Ki insns 648Ki 5.7%
+# 2.9% 330Ki printAliasInstr 330Ki 2.9%
+# 2.9% 326Ki __cxxabiv1::(anonymous namespace) 326Ki 2.9%
+# 23.6% 77.1Ki [Other] 77.1Ki 23.6%
+# 12.7% 41.5Ki __cxxabiv1::(anonymous namespace)::parse_type<>() 41.5Ki 12.7%
+# 10.1% 32.9Ki __cxxabiv1::(anonymous namespace)::parse_expression<>() 32.9Ki 10.1%
+# <...>
+# 2.7% 306Ki std::__1::__tree<> 306Ki 2.7%
+# 27.2% 83.3Ki std::__1::__tree<>::__find_equal<>() 83.3Ki 27.2%
+# 20.9% 64.0Ki std::__1::__tree<>::__construct_node() 64.0Ki 20.9%
+# 12.6% 38.8Ki std::__1::__tree<>::find<>() 38.8Ki 12.6%
+# <...>
+# 2.2% 247Ki std::__1::__hash_table<> 247Ki 2.2%
+# 23.6% 58.5Ki std::__1::__hash_table<>::__rehash() 58.5Ki 23.6%
+# 20.3% 50.2Ki std::__1::__hash_table<>::__construct_node_hash() 50.2Ki 20.3%
+# 15.4% 38.2Ki std::__1::__hash_table<>::find<>() 38.2Ki 15.4%
+# <...>
+# 1.8% 208Ki google::protobuf 207Ki 1.8%
+# 35.2% 73.2Ki google::protobuf::InsertIfNotPresent<>() 73.2Ki 35.2%
+# 25.8% 53.7Ki [Other] 53.4Ki 25.7%
+# 6.1% 12.7Ki google::protobuf::StrCat() 12.7Ki 6.1%
+# <...>
+#
+# etc.
+
+custom_data_source: {
+ name: "class_or_namespace"
+ base_data_source: "shortsymbols"
+
+ rewrite: {
+ pattern: "^(.*)::[^:]+$"
+ replacement: "\\1"
+ }
+}
+
+##
+# stripped_sections: the base section name
+#
+# This is useful when you are profiling object files that you have compiled
+# with -ffunction-sections/-fdata-sections. Normally this makes "-d sections"
+# very noisy, because every function has a distinct section name. This
+# removes the noise and gives you a base section name.
+custom_data_source: {
+ name: "stripped_sections"
+ base_data_source: "sections"
+
+ rewrite: {
+ pattern: "^\\.text"
+ replacement: ".text"
+ }
+ rewrite: {
+ pattern: "^\\.data.rel.ro"
+ replacement: ".data.rel.ro"
+ }
+ rewrite: {
+ pattern: "^\\.data"
+ replacement: ".data"
+ }
+ rewrite: {
+ pattern: "^\\.rela\\.text"
+ replacement: ".rela.text"
+ }
+ rewrite: {
+ pattern: "^.rela.data.rel.ro"
+ replacement: ".rela.data.rel.ro"
+ }
+ rewrite: {
+ pattern: "^.rela.rodata"
+ replacement: ".rela.rodata"
+ }
+ rewrite: {
+ pattern: "^.rela.data"
+ replacement: ".rela.data"
+ }
+ rewrite: {
+ pattern: "^.tdata"
+ replacement: ".tdata"
+ }
+ rewrite: {
+ pattern: "^.rodata"
+ replacement: ".rodata"
+ }
+ rewrite: {
+ pattern: "^.bss"
+ replacement: ".bss"
+ }
+ rewrite: {
+ pattern: "^.tbss"
+ replacement: ".tbss"
+ }
+}
+
+custom_data_source: {
+ name: "section_prefix"
+ base_data_source: "sections"
+ rewrite: {
+ pattern: "^(\\.[^.]+)"
+ replacement: "\\1"
+ }
+}
diff --git a/doc/how-bloaty-works.md b/doc/how-bloaty-works.md
new file mode 100644
index 0000000..099f540
--- /dev/null
+++ b/doc/how-bloaty-works.md
@@ -0,0 +1,373 @@
+
+# How Bloaty Works
+
+At a high level, Bloaty's goal is to create a map of the binary where every
+byte has a label attached to it. Every byte starts out as unknown
+(unattributed). As we scan the binary we assign labels to different ranges of
+the file. For example, if the user selected the "sections" data source we scan
+the section table and use the section name as the label for each range.
+
+Ideally these labeled ranges will cover the entire file by the time we are
+done. In practice we usually can't achieve perfect 100% coverage. To
+compensate for this, we have various kinds of "fallback" labels we attach to
+mystery regions of the file. This is how we guarantee an important invariant
+of Bloaty: the totals given in Bloaty's output will always match the total size
+of the file. This ensures that we always account for the entire file, even if
+we don't have detailed information for every byte.
+
+The ELF/Mach-O/etc. data structures we are traversing were not designed to
+enable size profiling. They were designed to assist linkers, loaders,
+debuggers, stack unwinders, etc. to run and debug the binary. This means that
+Bloaty's size analysis is inherently an unconventional use of ELF/Mach-O
+metadata. Bloaty has to be clever about how to use the available information to
+achieve its goal. This can pose a challenge, but also makes Bloaty fun to work
+on. Getting the coverage close to 100% requires a lot of ingenuity (and some
+heuristics).
+
+## Range Map
+
+RangeMap (as defined in [range_map.h](https://github.com/google/bloaty/blob/master/src/range_map.h))
+is the core data structure of Bloaty. It is a sparse map of
+`[start, end) -> std::string` that associates regions of VM or file space to
+a label.
+
+By the time Bloaty is finished, it has built a complete map of both VM and file
+space for the binary. You can view these maps by running Bloaty with '-v':
+
+```
+$ ./bloaty bloaty -v -d sections
+FILE MAP:
+0000000-00002e0 736 [LOAD #2 [R]]
+00002e0-00002fc 28 .interp
+00002fc-0000320 36 .note.gnu.build-id
+0000320-0000340 32 .note.ABI-tag
+0000340-0000510 464 .gnu.hash
+0000510-0001db8 6312 .dynsym
+0001db8-0003c8d 7893 .dynstr
+0003c8d-0003c8e 1 [LOAD #2 [R]]
+0003c8e-0003e9c 526 .gnu.version
+0003e9c-0003ea0 4 [LOAD #2 [R]]
+0003ea0-0004020 384 .gnu.version_r
+0004020-0066f30 405264 .rela.dyn
+0066f30-00680b8 4488 .rela.plt
+00680b8-0069000 3912 [Unmapped]
+0069000-0069017 23 .init
+0069017-0069020 9 [LOAD #3 [RX]]
+0069020-0069be0 3008 .plt
+0069be0-0069c40 96 .plt.got
+0069c40-02874d1 2218129 .text
+[...]
+
+VM MAP:
+000000-0002e0 736 [LOAD #2 [R]]
+0002e0-0002fc 28 .interp
+0002fc-000320 36 .note.gnu.build-id
+000320-000340 32 .note.ABI-tag
+000340-000510 464 .gnu.hash
+000510-001db8 6312 .dynsym
+001db8-003c8d 7893 .dynstr
+003c8d-003c8e 1 [LOAD #2 [R]]
+003c8e-003e9c 526 .gnu.version
+003e9c-003ea0 4 [LOAD #2 [R]]
+003ea0-004020 384 .gnu.version_r
+004020-066f30 405264 .rela.dyn
+066f30-0680b8 4488 .rela.plt
+0680b8-069000 3912 [-- Nothing mapped --]
+069000-069017 23 .init
+069017-069020 9 [LOAD #3 [RX]]
+[...]
+```
+
+The file map refers to file offsets, and these always run from 0 to the size of
+the file. The VM map refers to VM addresses; these start at 0 for shared
+libraries and position-independent binaries, but these will start at some
+non-zero address if the binary was linked to be loaded at a fixed address.
+
+Note that some of the regions in the map have labels like `[LOAD #2 [R]]`
+instead of a true section name. This is because the section table does not
+always cover every byte of the file. Bloaty gives these regions a fallback label
+that contains the segment name instead. We must attach some kind of label to
+every byte of the file, otherwise Bloaty's totals would not match the file size.
+
+Also notice that there is an entry in the VM map that says `[-- Nothing mapped
+--]`. This is calling attention to the fact that there is a gap in the address
+space here. Since nothing is mapped, these regions of the VM space don't
+actually need to accessible in the target process image. However, unless this
+unused space aligns with page boundaries, it will probably end up getting
+mapped anyway.
+
+Sometimes we know a region's start but not its end. For example, Mach-O
+symbols have an address but not a size (whereas ELF symbols have both).
+To support this case, `RangeMap` supports adding an address with `kUnknownSize`.
+A range with unknown size will automatically extend to the beginning of the
+next region, even if the next region is added later.
+
+If we try to add a label to a range of the binary that has already been assigned
+a label, the first label assigned takes precedence. This means that the order
+in which we scan data structures is significant. So our general strategy is to
+scan our most granular and detailed information first. We scan generic
+information as a last resort, to give at least some information for parts of the
+binary that we couldn't find any more specific information about.
+
+## VM Space and File Space
+
+Loadable binaries have two fundamental domains of space we are trying to map:
+*VM space* and *file space*. File space is the bytes of the input file. VM
+space is the bytes of memory when the executable is loaded at runtime. Some
+regions of the binary exist only in file space (like debug info) and some
+regions exist only in VM space (like `.bss`, zero-initialized data). Even
+entities that exist in both spaces can have different sizes in each.
+
+We create two separate `RangeMap` structures for these two domains. For
+convenience, we put them together into a single structure called `DualMap`:
+
+```cpp
+struct DualMap {
+ RangeMap vm_map;
+ RangeMap file_map;
+};
+```
+
+We populate these two maps simultaneously as we scan the file. We must populate
+both maps even if we only care about one of them, because most of the metadata we're
+scanning gives us VM addresses *or* file offsets, not both. For example,
+debug info always refers to VM addresses, because it's intended for debugging at
+runtime. Even if we only care about file size, we still have to scan VM addresses
+and translate them to file offsets.
+
+Bloaty's overall analysis algorithm (in pseudo-code) is:
+
+```c++
+for (auto f : files) {
+ // Always start by creating the base map.
+ DualMap base_map = ScanBaseMap(f);
+
+ // Scan once for every data source the user selected with '-d'.
+ std::vector<DualMap> maps;
+ for (auto s : data_sources) {
+ maps.push_back(ScanDataSource(f, s));
+ }
+}
+```
+
+## Base Map
+
+To translate between VM and file space, we always begin by creating a "base
+map." The base map is just a `DualMap` like any other, but we give it special
+meaning:
+
+* It defines what ranges of file and VM space constitute "the entire binary"
+ (ie. the "TOTALS" row of the final report).
+* We use it to translate between VM space and File space.
+
+This means that the base map must be exhaustive, and must also provide
+translation for any entity that exists in both VM and file space. For example,
+suppose we are scanning the "symbols" data source and we see in the symbol
+table that address `0x12345` corresponds to symbol `foo`. We will add that to
+VM map immediately, but we will also use the base map to translate address
+`0x12355` to a file offset so we can add that range to the file map.
+
+How does the base map store translation info? I left one thing out about
+`RangeMap` above. In addition to storing a label for every region, it can also
+(optionally) store a member called `other_start`. This stores the
+corresponding offset in the other space, and lets you translate addresses from
+one to the other. The `other_start` member is only used in the base map.
+
+We build the base map by scanning either the segments (program headers) or
+sections of the binary. These give both VM address and file offset for regions
+of the binary that are loaded into memory. To make sure we cover the entire
+file space, we use `[Unmapped]` as a last ditch fallback for any regions of the
+on-disk binary that didn't have any segment/section data associated with them.
+This ensures that Bloaty always accounts for the entire physical binary, even if
+we can't find any information about it.
+
+## Scanning Data Sources
+
+Once we have built the base map, we can get on to the meat of Bloaty's work.
+We can now scan the binary according to whatever data source(s) the user has
+selected.
+
+### Segments and Sections
+
+The `segments` and `sections` data sources are relatively straightforward. For
+the most part we can simply scan the segments/sections table and call it a day.
+
+For ELF, segments and sections have distinct tables in the binary that can be
+scanned independently. This means that technically a section could span multiple
+segments, but in practice segments/sections form a 1:many relationship, where
+each section is contained entirely within a single segment.
+
+Currently Bloaty only reports `PT_LOAD` and `PT_TLS` segments. We scan `PT_LOAD`
+segments first, so if there is overlap with `PT_TLS` the `PT_LOAD` label will
+win. In the future It may make sense to scan `PT_TLS` first, as this is more
+granular data that can give insight into the per-thread runtime overhead of TLS
+variables. It may also make sense to scan other segment types, to give more
+granular info.
+
+ELF segments do not have names. To distinguish between different `PT_LOAD`
+segments, we include both a segment offset and the segment flags in the label,
+eg. `LOAD #2 [R]`.
+
+For Mach-O, segments are contained within a file-level table of "load
+commands." Each load command has a type, and technically speaking, segments are
+a subset of all load commands. However Bloaty's `segments` data source reports
+many non-segment load commands such as the symbol table (`LC_SYMTAB`,
+`LC_DYSYMTAB`), code signature (`LC_CODE_SIGNATURE`), and more. Segments can
+have zero or more sections, so in Mach-O files the 1:many nature of segments
+and sections is enforced by the file format.
+
+For `segments` and `sections` we have to decide how to attribute the regions of
+the file that correspond to the segment/section headers themselves. Bloaty's
+general philosophy is to include the metadata with the data, so each label
+shows the true weight of everything associated with that label. This would
+suggest that the `.text` label should include the `.text` section as well as
+the section header entry for the `.text` section. However this would hide the
+overhead of the ELF headers, which can be significant if there are many
+sections. Bloaty currently has no higher-level data source that could show the
+ELF headers separately from the ELF data, and even if there was such a data
+source it would have narrow usefulness so people would probably not think to
+use it very often.
+
+There is not an easy answer to this question. At the moment Bloaty will
+include section headers with the corresponding section, but will *not* include
+segment headers with the corresponding segment. This may or may not be the
+best solution to this problem, and this may change if another solution proves
+to work better.
+
+### Symbols
+
+The `symbols` data source is where Bloaty's deep parsing of the binary delivers
+the most benefit, as it provides detailed information that you cannot get
+from a linker map or symbol table.
+
+For example, take the following data from running Bloaty on itself:
+
+```
+$ ./bloaty bloaty -d symbols,sections
+ FILE SIZE VM SIZE
+ -------------- --------------
+[...]
+ 0.2% 116Ki 1.6% 116Ki AArch64_printInst
+ 84.9% 98.8Ki 84.9% 98.8Ki .text
+ 14.9% 17.4Ki 14.9% 17.4Ki .rodata
+ 0.1% 156 0.1% 156 .eh_frame
+ 0.0% 24 0.0% 0 .symtab
+ 0.0% 18 0.0% 0 .strtab
+ 0.0% 8 0.0% 8 .eh_frame_hdr
+[...]
+ 0.1% 50.1Ki 0.7% 49.8Ki reg_name_maps
+ 59.6% 29.8Ki 59.8% 29.8Ki .rela.dyn
+ 40.0% 20.0Ki 40.2% 20.0Ki .data.rel.ro
+ 0.4% 216 0.0% 0 .symtab
+ 0.0% 14 0.0% 0 .strtab
+```
+
+I excerpted two symbols from the report. Between these two symbols, Bloaty has
+found seven distinct kinds of data that contributed to these two symbols. If
+you wrote a tool that naively just parsed the symbol table, you would only find
+the first of these seven:
+
+1. `.text.`/`.data.rel.ro`: this is the data we obtain by simply following the
+ symbol table entry. This is the primary code or data emitted by the function
+ or variable.
+2. `.eh_frame`: this is the "unwind information" for a function. [It is used for
+ many things](https://stackoverflow.com/a/26302715/77070), including C++
+ exceptions and stack traces when no frame pointer is available.
+3. `.eh_frame_hdr`: this is metadata about the `.eh_frame` section.
+4. `.symtab`: this is the function/variable's symbol table entry itself. It is a
+ fixed size for every entry. The fact that `reg_name_maps` above has a
+ `.symtab` size of 216 indicates that there must actually be 9 different
+ symbols being represented by this entry. Bloaty has combined them because
+ they all have the same name. We can break them apart if we want using:
+
+ ```
+ $ ./bloaty bloaty -d compileunits,symbols --source-filter=reg_name_maps$
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 20.3% 10.2Ki 20.3% 10.1Ki ../third_party/capstone/arch/AArch64/AArch64Mapping.c
+ 100.0% 10.2Ki 100.0% 10.1Ki reg_name_maps
+ 18.9% 9.45Ki 18.9% 9.43Ki ../third_party/capstone/arch/X86/X86Mapping.c
+ 100.0% 9.45Ki 100.0% 9.43Ki reg_name_maps
+ 16.4% 8.20Ki 16.4% 8.18Ki ../third_party/capstone/arch/PowerPC/PPCMapping.c
+ 100.0% 8.20Ki 100.0% 8.18Ki reg_name_maps
+ 10.7% 5.35Ki 10.7% 5.33Ki ../third_party/capstone/arch/Mips/MipsMapping.c
+ 100.0% 5.35Ki 100.0% 5.33Ki reg_name_maps
+ 9.1% 4.57Ki 9.1% 4.55Ki ../third_party/capstone/arch/SystemZ/SystemZMapping.c
+ 100.0% 4.57Ki 100.0% 4.55Ki reg_name_maps
+ 8.7% 4.35Ki 8.7% 4.31Ki ../third_party/capstone/arch/ARM/ARMMapping.c
+ 100.0% 4.35Ki 100.0% 4.31Ki reg_name_maps
+ 7.0% 3.52Ki 7.0% 3.49Ki ../third_party/capstone/arch/TMS320C64x/TMS320C64xMapping.c
+ 100.0% 3.52Ki 100.0% 3.49Ki reg_name_maps
+ 6.9% 3.44Ki 6.9% 3.41Ki ../third_party/capstone/arch/Sparc/SparcMapping.c
+ 100.0% 3.44Ki 100.0% 3.41Ki reg_name_maps
+ 2.0% 1.02Ki 2.0% 1016 ../third_party/capstone/arch/XCore/XCoreMapping.c
+ 100.0% 1.02Ki 100.0% 1016 reg_name_maps
+ 100.0% 50.1Ki 100.0% 49.8Ki TOTAL
+ Filtering enabled (source_filter); omitted file = 46.7Mi, vm = 7.08Mi of entries
+ ```
+5. `.strtab`: this is the text of the function/variables's name itself in the
+ string table. Longer names take up more space in the binary, and Bloaty's
+ analysis here reflects that (though the symbol table is not loaded at
+ runtime, so it's not costing RAM).
+6. `.rela.dyn`: these are relocations embedded into the executable. Normally we
+ would associate relocations with `.o` files and not the final linked binary.
+ However shared objects and position-independent executables must also emit
+ relocations for any global variable that is initialized to an address of some
+ other data. These relocations can take up a significant amount of space,
+ indeed more space than the data itself in this case! Without this deep
+ analysis of the binary, this cost would be invisible. Bloaty scans all
+ relocation tables and "charges" each relocation entry to the function/data
+ that requires the relocation (*not* the function being pointed to).
+7. `.rodata`: Bloaty has found some data associated with this function.
+ Sometimes data doesn't get its own symbol table entry, for whatever reason.
+ Bloaty can attribute anonymous data to the function that uses it by
+ disassembling the binary looking for instructions that reference a different
+ part of the binary. If the same anonymous data is used by more than one
+ function, then the first one scanned will "win" and assume the whole cost,
+ as Bloaty has no concept of sharing the cost. Every byte of the file must
+ have exactly one label associated with it.
+
+Note that this is more granular information than you can get from a linker map
+file. A linker map file will break down some of these sections by compile unit,
+but the symbol-level granularity is limited to the primary code/data for each
+symbol (#1 in the list above).
+
+### Compile Units
+
+Like symbols, we can see that Bloaty is capable of breaking down lots of
+sections by compile unit:
+
+```
+$ ./bloaty bloaty -d compileunits,sections
+ FILE SIZE VM SIZE
+ -------------- --------------
+ 37.9% 17.7Mi 49.4% 3.52Mi [160 Others]
+ 15.0% 7.04Mi 3.4% 246Ki ../third_party/protobuf/src/google/protobuf/descriptor.cc
+ 33.9% 2.38Mi 0.0% 0 .debug_info
+ 32.6% 2.29Mi 0.0% 0 .debug_loc
+ 17.2% 1.21Mi 0.0% 0 .debug_str
+ 6.5% 468Ki 0.0% 0 .debug_ranges
+ 5.3% 381Ki 0.0% 0 .debug_line
+ 2.8% 204Ki 83.1% 204Ki .text
+ 1.0% 70.9Ki 0.0% 0 .strtab
+ 0.4% 25.7Ki 10.4% 25.7Ki .eh_frame
+ 0.2% 13.3Ki 0.0% 0 .symtab
+ 0.1% 10.6Ki 4.3% 10.6Ki .rodata
+ 0.1% 3.97Ki 1.6% 3.97Ki .eh_frame_hdr
+ 0.0% 1.03Ki 0.4% 1.03Ki .rela.dyn
+ 0.0% 368 0.1% 368 .data.rel.ro
+ 0.0% 0 0.0% 81 .bss
+[...]
+```
+
+To attribute all of the different `.debug_*` sections, Bloaty includes parsers
+for all of the different DWARF formats that live in these sections. We also use
+the DWARF data to find which symbols belong to which compile units.
+
+The `compileunits` data source contains much of the same data that you could get
+from a linker map. Since each compile unit generally comes from a separate `.o`
+file, a linker map can often give good data about which parts of the binary came
+from which translation units. However Bloaty is able to derive this data without
+needing a linker map file, which may be tricky to obtain. The `compileunits`
+data source is also useful when combined with other data sources in hierarchical
+profiles.
diff --git a/make-release-tarball.sh b/make-release-tarball.sh
new file mode 100755
index 0000000..45873be
--- /dev/null
+++ b/make-release-tarball.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Makes a release tarball. We include our dependencies/submodules,
+# but we heavily prune their file lists to avoid including lots of
+# extraneous baggage. We also leave out Bloaty's tests, especially
+# because some of the test data is large.
+
+set -e
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: make-release.tarball.sh VERSION"
+ exit 1
+fi
+
+VERSION=$1
+
+FILES=$(git ls-files --exclude-standard --recurse-submodules |
+ grep -v googletest |
+ grep -v ^tests |
+ grep -v third_party/protobuf |
+ grep -v 'third_party/capstone/\(suite\|bindings\|xcode\|msvc\|contrib\)' |
+ grep -v third_party/abseil-cpp/absl/time/internal/cctz/testdata |
+ grep -v ^.git)
+FILES="$FILES $(git ls-files --exclude-standard --recurse-submodules |
+ grep 'third_party/protobuf/\(src\|cmake\|configure.ac\)')"
+
+# Unfortunately tar on Mac doesn't support --transform, so we have to
+# actually move our files to a different directory to get the prefix.
+DIR=/tmp/bloaty-$VERSION
+rm -rf $DIR
+mkdir $DIR
+rsync -R $FILES $DIR
+
+BASE=$PWD
+cd /tmp
+OUT=bloaty-$VERSION.tar.bz2
+tar cjf $BASE/$OUT bloaty-$VERSION
+
+echo "Created $OUT"
+
diff --git a/regen-readme.py b/regen-readme.py
new file mode 100755
index 0000000..15a8f5c
--- /dev/null
+++ b/regen-readme.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+import subprocess
+import os
+import sys
+
+basedir = os.path.dirname(sys.argv[0])
+readme = os.path.join(basedir, "README.md")
+
+with open(readme) as f:
+ inp = f.read()
+
+out = ""
+
+it = iter(inp.splitlines(True))
+
+for line in it:
+ out += line
+ if line.startswith("```cmdoutput"):
+ # Get command.
+ cmd = next(it)
+ assert cmd.startswith("$ "), cmd
+ real_cmd = cmd[2:].strip()
+ out += cmd
+
+ print("Running: " + real_cmd)
+ out += subprocess.check_output(real_cmd, shell=True)
+
+ # Skip pre-existing command output.
+ line = next(it)
+ while not line.startswith("```"):
+ line = next(it)
+ out += line
+
+with open(readme, "w") as f:
+ f.write(out)
diff --git a/src/bloaty.cc b/src/bloaty.cc
new file mode 100644
index 0000000..30607a2
--- /dev/null
+++ b/src/bloaty.cc
@@ -0,0 +1,2166 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+#include <cmath>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <math.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "absl/memory/memory.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/substitute.h"
+#include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/text_format.h"
+
+#include "bloaty.h"
+#include "bloaty.pb.h"
+#include "demangle.h"
+#include "re.h"
+
+using absl::string_view;
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#define CHECK_SYSCALL(call) \
+ if (call < 0) { \
+ perror(#call " " __FILE__ ":" TOSTRING(__LINE__)); \
+ exit(1); \
+ }
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(...) \
+ if (verbose_level > 0) { \
+ printf("WARNING: %s\n", absl::Substitute(__VA_ARGS__).c_str()); \
+ }
+
+namespace bloaty {
+
+// Use a global since we would have to plumb it through so many call-stacks
+// otherwise. We would make this thread_local but that's not supported on OS X
+// right now.
+int verbose_level = 0;
+
+struct DataSourceDefinition {
+ DataSource number;
+ const char* name;
+ const char* description;
+};
+
+constexpr DataSourceDefinition data_sources[] = {
+ {DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"},
+ {DataSource::kCompileUnits, "compileunits",
+ "source file for the .o file (translation unit). requires debug info."},
+ {DataSource::kInputFiles, "inputfiles",
+ "the filename specified on the Bloaty command-line"},
+ {DataSource::kInlines, "inlines",
+ "source line/file where inlined code came from. requires debug info."},
+ {DataSource::kRawRanges, "rawranges",
+ "raw ranges of previous data source."},
+ {DataSource::kSections, "sections", "object file section"},
+ {DataSource::kSegments, "segments", "load commands in the binary"},
+ // We require that all symbols sources are >= kSymbols.
+ {DataSource::kSymbols, "symbols",
+ "symbols from symbol table (configure demangling with --demangle)"},
+ {DataSource::kRawSymbols, "rawsymbols", "unmangled symbols"},
+ {DataSource::kFullSymbols, "fullsymbols", "full demangled symbols"},
+ {DataSource::kShortSymbols, "shortsymbols", "short demangled symbols"},
+};
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
+
+const char* GetDataSourceLabel(DataSource source) {
+ for (size_t i = 0; i < ARRAY_SIZE(data_sources); i++) {
+ if (data_sources[i].number == source) {
+ return data_sources[i].name;
+ }
+ }
+ fprintf(stderr, "Unknown data source label: %d\n", static_cast<int>(source));
+ exit(1);
+ return nullptr;
+}
+
+int SignOf(long val) {
+ if (val < 0) {
+ return -1;
+ } else if (val > 0) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+template <typename A, typename B>
+void CheckedAdd(A* accum, B val) {
+ // We've only implemented the portable version for a subset of possible types.
+ static_assert(std::is_signed<A>::value, "requires signed A");
+ static_assert(sizeof(A) == sizeof(B), "requires integers of the same type");
+#if ABSL_HAVE_BUILTIN(__builtin_add_overflow)
+ if (__builtin_add_overflow(*accum, val, accum)) {
+ THROW("integer overflow");
+ }
+#else
+ bool safe = *accum < 0 ? (val >= std::numeric_limits<B>::max() - *accum)
+ : (val <= std::numeric_limits<B>::max() - *accum);
+ if (!safe) {
+ THROW("integer overflow");
+ }
+ *accum += val;
+#endif
+}
+
+static std::string CSVEscape(string_view str) {
+ bool need_escape = false;
+
+ for (char ch : str) {
+ if (ch == '"' || ch == ',') {
+ need_escape = true;
+ break;
+ }
+ }
+
+ if (need_escape) {
+ std::string ret = "\"";
+ for (char ch : str) {
+ if (ch == '"') {
+ ret += "\"\"";
+ } else {
+ ret += ch;
+ }
+ }
+ ret += "\"";
+ return ret;
+ } else {
+ return std::string(str);
+ }
+}
+
+
+// LineReader / LineIterator ///////////////////////////////////////////////////
+
+// Convenience code for iterating over lines of a pipe.
+
+LineReader::LineReader(LineReader&& other) {
+ Close();
+
+ file_ = other.file_;
+ pclose_ = other.pclose_;
+
+ other.file_ = nullptr;
+}
+
+void LineReader::Close() {
+ if (!file_) return;
+
+ if (pclose_) {
+ pclose(file_);
+ } else {
+ fclose(file_);
+ }
+}
+
+void LineReader::Next() {
+ char buf[256];
+ line_.clear();
+ do {
+ if (!fgets(buf, sizeof(buf), file_)) {
+ if (feof(file_)) {
+ eof_ = true;
+ break;
+ } else {
+ std::cerr << "Error reading from file.\n";
+ exit(1);
+ }
+ }
+ line_.append(buf);
+ } while(!eof_ && line_[line_.size() - 1] != '\n');
+
+ if (!eof_) {
+ line_.resize(line_.size() - 1);
+ }
+}
+
+LineIterator LineReader::begin() { return LineIterator(this); }
+LineIterator LineReader::end() { return LineIterator(nullptr); }
+
+LineReader ReadLinesFromPipe(const std::string& cmd) {
+ FILE* pipe = popen(cmd.c_str(), "r");
+
+ if (!pipe) {
+ std::cerr << "Failed to run command: " << cmd << "\n";
+ exit(1);
+ }
+
+ return LineReader(pipe, true);
+}
+
+extern "C" char* __cxa_demangle(const char* mangled_name, char* buf, size_t* n,
+ int* status);
+
+std::string ItaniumDemangle(string_view symbol, DataSource source) {
+ if (source == DataSource::kRawSymbols) {
+ // No demangling.
+ return std::string(symbol);
+ }
+
+ string_view demangle_from = symbol;
+ if (absl::StartsWith(demangle_from, "__Z")) {
+ demangle_from.remove_prefix(1);
+ }
+
+ if (source == DataSource::kShortSymbols) {
+ char demangled[1024];
+ if (::Demangle(demangle_from.data(), demangled, sizeof(demangled))) {
+ return std::string(demangled);
+ } else {
+ return std::string(symbol);
+ }
+ } else if (source == DataSource::kFullSymbols) {
+ char* demangled =
+ __cxa_demangle(demangle_from.data(), NULL, NULL, NULL);
+ if (demangled) {
+ std::string ret(demangled);
+ free(demangled);
+ return ret;
+ } else {
+ return std::string(symbol);
+ }
+ } else {
+ printf("Unexpected source: %d\n", (int)source);
+ BLOATY_UNREACHABLE();
+ }
+}
+
+
+// NameMunger //////////////////////////////////////////////////////////////////
+
+void NameMunger::AddRegex(const std::string& regex, const std::string& replacement) {
+ auto reg = absl::make_unique<ReImpl>(regex);
+ regexes_.push_back(std::make_pair(std::move(reg), replacement));
+}
+
+std::string NameMunger::Munge(string_view name) const {
+ std::string name_str(name);
+ std::string ret(name);
+
+ for (const auto& pair : regexes_) {
+ if (ReImpl::Extract(name_str, *pair.first, pair.second, &ret)) {
+ return ret;
+ }
+ }
+
+ return name_str;
+}
+
+
+// Rollup //////////////////////////////////////////////////////////////////////
+
+// A Rollup is a hierarchical tally of sizes. Its graphical representation is
+// something like this:
+//
+// 93.3% 93.3% 3.02M Unmapped
+// 38.2% 38.2% 1.16M .debug_info
+// 23.9% 62.1% 740k .debug_str
+// 12.1% 74.2% 374k .debug_pubnames
+// 11.7% 86.0% 363k .debug_loc
+// 8.9% 94.9% 275k [Other]
+// 5.1% 100.0% 158k .debug_ranges
+// 6.7% 100.0% 222k LOAD [R E]
+// 61.0% 61.0% 135k .text
+// 21.4% 82.3% 47.5k .rodata
+// 6.2% 88.5% 13.8k .gcc_except_table
+// 5.9% 94.4% 13.2k .eh_frame
+// 5.6% 100.0% 12.4k [Other]
+// 0.0% 100.0% 1.40k [Other]
+// 100.0% 3.24M TOTAL
+//
+// Rollup is the generic data structure, before we apply output massaging like
+// collapsing excess elements into "[Other]" or sorting.
+
+std::string others_label = "[Other]";
+
+class Rollup {
+ public:
+ Rollup() {}
+
+ Rollup(Rollup&& other) = default;
+ Rollup& operator=(Rollup&& other) = default;
+
+ void AddSizes(const std::vector<std::string>& names,
+ uint64_t size, bool is_vmsize) {
+ // We start at 1 to exclude the base map (see base_map_).
+ AddInternal(names, 1, size, is_vmsize);
+ }
+
+ // Prints a graphical representation of the rollup.
+ void CreateRollupOutput(const Options& options, RollupOutput* output) const {
+ CreateDiffModeRollupOutput(nullptr, options, output);
+ output->diff_mode_ = false;
+ }
+
+ void CreateDiffModeRollupOutput(Rollup* base, const Options& options,
+ RollupOutput* output) const {
+ RollupRow* row = &output->toplevel_row_;
+ row->vmsize = vm_total_;
+ row->filesize = file_total_;
+ row->filtered_vmsize = filtered_vm_total_;
+ row->filtered_filesize = filtered_file_total_;
+ row->vmpercent = 100;
+ row->filepercent = 100;
+ output->diff_mode_ = true;
+ CreateRows(row, base, options, true);
+ }
+
+ void SetFilterRegex(const ReImpl* regex) {
+ filter_regex_ = regex;
+ }
+
+ // Subtract the values in "other" from this.
+ void Subtract(const Rollup& other) {
+ vm_total_ -= other.vm_total_;
+ file_total_ -= other.file_total_;
+
+ for (const auto& other_child : other.children_) {
+ auto& child = children_[other_child.first];
+ if (child.get() == NULL) {
+ child.reset(new Rollup());
+ }
+ child->Subtract(*other_child.second);
+ }
+ }
+
+ // Add the values in "other" from this.
+ void Add(const Rollup& other) {
+ vm_total_ += other.vm_total_;
+ file_total_ += other.file_total_;
+
+ for (const auto& other_child : other.children_) {
+ auto& child = children_[other_child.first];
+ if (child.get() == NULL) {
+ child.reset(new Rollup());
+ }
+ child->Add(*other_child.second);
+ }
+ }
+
+ int64_t file_total() const { return file_total_; }
+ int64_t filtered_file_total() const { return filtered_file_total_; }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(Rollup);
+
+ int64_t vm_total_ = 0;
+ int64_t file_total_ = 0;
+ int64_t filtered_vm_total_ = 0;
+ int64_t filtered_file_total_ = 0;
+
+ const ReImpl* filter_regex_ = nullptr;
+
+ // Putting Rollup by value seems to work on some compilers/libs but not
+ // others.
+ typedef std::unordered_map<std::string, std::unique_ptr<Rollup>> ChildMap;
+ ChildMap children_;
+ static Rollup* empty_;
+
+ static Rollup* GetEmpty() {
+ if (!empty_) {
+ empty_ = new Rollup();
+ }
+ return empty_;
+ }
+
+ // Adds "size" bytes to the rollup under the label names[i].
+ // If there are more entries names[i+1, i+2, etc] add them to sub-rollups.
+ void AddInternal(const std::vector<std::string>& names, size_t i,
+ uint64_t size, bool is_vmsize) {
+ if (filter_regex_ != nullptr) {
+ // filter_regex_ is only set in the root rollup, which checks the full
+ // label hierarchy for a match to determine whether a region should be
+ // considered.
+ bool any_matched = false;
+
+ for (const auto& name : names) {
+ if (ReImpl::PartialMatch(name, *filter_regex_)) {
+ any_matched = true;
+ break;
+ }
+ }
+
+ if (!any_matched) {
+ // Ignore this region in the rollup and don't visit sub-rollups.
+ if (is_vmsize) {
+ CheckedAdd(&filtered_vm_total_, size);
+ } else {
+ CheckedAdd(&filtered_file_total_, size);
+ }
+ return;
+ }
+ }
+
+ if (is_vmsize) {
+ CheckedAdd(&vm_total_, size);
+ } else {
+ CheckedAdd(&file_total_, size);
+ }
+
+ if (i < names.size()) {
+ auto& child = children_[names[i]];
+ if (child.get() == nullptr) {
+ child.reset(new Rollup());
+ }
+ child->AddInternal(names, i + 1, size, is_vmsize);
+ }
+ }
+
+ static double Percent(int64_t part, int64_t whole) {
+ if (whole == 0) {
+ if (part == 0) {
+ return NAN;
+ } else if (part > 0) {
+ return INFINITY;
+ } else {
+ return -INFINITY;
+ }
+ } else {
+ return static_cast<double>(part) / static_cast<double>(whole) * 100;
+ }
+ }
+
+ void CreateRows(RollupRow* row, const Rollup* base, const Options& options,
+ bool is_toplevel) const;
+ void SortAndAggregateRows(RollupRow* row, const Rollup* base,
+ const Options& options, bool is_toplevel) const;
+};
+
+void Rollup::CreateRows(RollupRow* row, const Rollup* base,
+ const Options& options, bool is_toplevel) const {
+ if (base) {
+ // For a diff, the percentage is a comparison against the previous size of
+ // the same label at the same level.
+ row->vmpercent = Percent(vm_total_, base->vm_total_);
+ row->filepercent = Percent(file_total_, base->file_total_);
+ }
+
+ for (const auto& value : children_) {
+ if (value.second->vm_total_ != 0 || value.second->file_total_ != 0) {
+ row->sorted_children.emplace_back(value.first);
+ RollupRow& child_row = row->sorted_children.back();
+ child_row.vmsize = value.second->vm_total_;
+ child_row.filesize = value.second->file_total_;
+ }
+ }
+
+ SortAndAggregateRows(row, base, options, is_toplevel);
+}
+
+Rollup* Rollup::empty_;
+
+void Rollup::SortAndAggregateRows(RollupRow* row, const Rollup* base,
+ const Options& options,
+ bool is_toplevel) const {
+ std::vector<RollupRow>& child_rows = row->sorted_children;
+
+ // We don't want to output a solitary "[None]" or "[Unmapped]" row except at
+ // the top level.
+ if (!is_toplevel && child_rows.size() == 1 &&
+ (child_rows[0].name == "[None]" || child_rows[0].name == "[Unmapped]")) {
+ child_rows.clear();
+ }
+
+ // We don't want to output a single row that has exactly the same size and
+ // label as the parent.
+ if (child_rows.size() == 1 && child_rows[0].name == row->name) {
+ child_rows.clear();
+ }
+
+ if (child_rows.empty()) {
+ return;
+ }
+
+ // First sort by magnitude.
+ for (auto& child : child_rows) {
+ switch (options.sort_by()) {
+ case Options::SORTBY_VMSIZE:
+ child.sortkey = std::abs(child.vmsize);
+ break;
+ case Options::SORTBY_FILESIZE:
+ child.sortkey = std::abs(child.filesize);
+ break;
+ case Options::SORTBY_BOTH:
+ child.sortkey =
+ std::max(std::abs(child.vmsize), std::abs(child.filesize));
+ break;
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ }
+
+ std::sort(child_rows.begin(), child_rows.end(), &RollupRow::Compare);
+
+ RollupRow others_row(others_label);
+ others_row.other_count = child_rows.size() - options.max_rows_per_level();
+ others_row.name = absl::Substitute("[$0 Others]", others_row.other_count);
+ Rollup others_rollup;
+ Rollup others_base;
+
+ // Filter out everything but the top 'row_limit'. Add rows that were filtered
+ // out to "others_row".
+ size_t i = child_rows.size() - 1;
+ while (i >= options.max_rows_per_level()) {
+ CheckedAdd(&others_row.vmsize, child_rows[i].vmsize);
+ CheckedAdd(&others_row.filesize, child_rows[i].filesize);
+ if (base) {
+ auto it = base->children_.find(child_rows[i].name);
+ if (it != base->children_.end()) {
+ CheckedAdd(&others_base.vm_total_, it->second->vm_total_);
+ CheckedAdd(&others_base.file_total_, it->second->file_total_);
+ }
+ }
+
+ child_rows.erase(child_rows.end() - 1);
+ i--;
+ }
+
+ if (std::abs(others_row.vmsize) > 0 || std::abs(others_row.filesize) > 0) {
+ child_rows.push_back(others_row);
+ CheckedAdd(&others_rollup.vm_total_, others_row.vmsize);
+ CheckedAdd(&others_rollup.file_total_, others_row.filesize);
+ }
+
+ // Now sort by actual value (positive or negative).
+ for (auto& child : child_rows) {
+ switch (options.sort_by()) {
+ case Options::SORTBY_VMSIZE:
+ child.sortkey = child.vmsize;
+ break;
+ case Options::SORTBY_FILESIZE:
+ child.sortkey = child.filesize;
+ break;
+ case Options::SORTBY_BOTH:
+ if (std::abs(child.vmsize) > std::abs(child.filesize)) {
+ child.sortkey = child.vmsize;
+ } else {
+ child.sortkey = child.filesize;
+ }
+ break;
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ }
+
+ std::sort(child_rows.begin(), child_rows.end(), &RollupRow::Compare);
+
+ // For a non-diff, the percentage is compared to the total size of the parent.
+ if (!base) {
+ for (auto& child_row : child_rows) {
+ child_row.vmpercent = Percent(child_row.vmsize, row->vmsize);
+ child_row.filepercent = Percent(child_row.filesize, row->filesize);
+ }
+ }
+
+ // Recurse into sub-rows, (except "Other", which isn't a real row).
+ for (auto& child_row : child_rows) {
+ const Rollup* child_rollup;
+ const Rollup* child_base = nullptr;
+
+ if (child_row.other_count > 0) {
+ child_rollup = &others_rollup;
+ if (base) {
+ child_base = &others_base;
+ }
+ } else {
+ auto it = children_.find(child_row.name);
+ if (it == children_.end()) {
+ THROWF("internal error, couldn't find name $0", child_row.name);
+ }
+ child_rollup = it->second.get();
+ assert(child_rollup);
+
+ if (base) {
+ auto it = base->children_.find(child_row.name);
+ if (it == base->children_.end()) {
+ child_base = GetEmpty();
+ } else {
+ child_base = it->second.get();
+ }
+ }
+ }
+
+ child_rollup->CreateRows(&child_row, child_base, options, false);
+ }
+}
+
+
+// RollupOutput ////////////////////////////////////////////////////////////////
+
+// RollupOutput represents rollup data after we have applied output massaging
+// like collapsing excess rows into "[Other]" and sorted the output. Once the
+// data is in this format, we can print it to the screen (or verify the output
+// in unit tests).
+
+namespace {
+
+std::string FixedWidthString(const std::string& input, size_t size) {
+ if (input.size() < size) {
+ std::string ret = input;
+ while (ret.size() < size) {
+ ret += " ";
+ }
+ return ret;
+ } else {
+ return input.substr(0, size);
+ }
+}
+
+bool ShowFile(const OutputOptions& options) {
+ return options.show != ShowDomain::kShowVM;
+}
+
+bool ShowVM(const OutputOptions& options) {
+ return options.show != ShowDomain::kShowFile;
+}
+
+std::string LeftPad(const std::string& input, size_t size) {
+ std::string ret = input;
+ while (ret.size() < size) {
+ ret = " " + ret;
+ }
+
+ return ret;
+}
+
+std::string DoubleStringPrintf(const char *fmt, double d) {
+ char buf[1024];
+ snprintf(buf, sizeof(buf), fmt, d);
+ return std::string(buf);
+}
+
+std::string SiPrint(int64_t size, bool force_sign) {
+ const char *prefixes[] = {"", "Ki", "Mi", "Gi", "Ti"};
+ size_t num_prefixes = 5;
+ size_t n = 0;
+ double size_d = size;
+ while (fabs(size_d) > 1024 && n < num_prefixes - 2) {
+ size_d /= 1024;
+ n++;
+ }
+
+ std::string ret;
+
+ if (fabs(size_d) > 100 || n == 0) {
+ ret = std::to_string(static_cast<int64_t>(size_d)) + prefixes[n];
+ if (force_sign && size > 0) {
+ ret = "+" + ret;
+ }
+ } else if (fabs(size_d) > 10) {
+ if (force_sign) {
+ ret = DoubleStringPrintf("%+0.1f", size_d) + prefixes[n];
+ } else {
+ ret = DoubleStringPrintf("%0.1f", size_d) + prefixes[n];
+ }
+ } else {
+ if (force_sign) {
+ ret = DoubleStringPrintf("%+0.2f", size_d) + prefixes[n];
+ } else {
+ ret = DoubleStringPrintf("%0.2f", size_d) + prefixes[n];
+ }
+ }
+
+ return LeftPad(ret, 7);
+}
+
+std::string PercentString(double percent, bool diff_mode) {
+ if (diff_mode) {
+ if (percent == 0 || std::isnan(percent)) {
+ return " [ = ]";
+ } else if (percent == -100) {
+ return " [DEL]";
+ } else if (std::isinf(percent)) {
+ return " [NEW]";
+ } else {
+ // We want to keep this fixed-width even if the percent is very large.
+ std::string str;
+ if (percent > 1000) {
+ int digits = log10(percent) - 1;
+ str = DoubleStringPrintf("%+2.0f", percent / pow(10, digits)) + "e" +
+ std::to_string(digits) + "%";
+ } else if (percent > 10) {
+ str = DoubleStringPrintf("%+4.0f%%", percent);
+ } else {
+ str = DoubleStringPrintf("%+5.1F%%", percent);
+ }
+
+ return LeftPad(str, 6);
+ }
+ } else {
+ return DoubleStringPrintf("%5.1F%%", percent);
+ }
+}
+
+} // namespace
+
+void RollupOutput::PrettyPrintRow(const RollupRow& row, size_t indent,
+ const OutputOptions& options,
+ std::ostream* out) const {
+ if (&row != &toplevel_row_) {
+ // Avoid printing this row if it is only zero.
+ // This can happen when using --domain if the row is zero for this domain.
+ if ((!ShowFile(options) && row.vmsize == 0) ||
+ (!ShowVM(options) && row.filesize == 0)) {
+ return;
+ }
+ }
+
+ *out << FixedWidthString("", indent) << " ";
+
+ if (ShowFile(options)) {
+ *out << PercentString(row.filepercent, diff_mode_) << " "
+ << SiPrint(row.filesize, diff_mode_) << " ";
+ }
+
+ if (ShowVM(options)) {
+ *out << PercentString(row.vmpercent, diff_mode_) << " "
+ << SiPrint(row.vmsize, diff_mode_) << " ";
+ }
+
+ *out << " " << row.name << "\n";
+}
+
+bool RollupOutput::IsSame(const std::string& a, const std::string& b) {
+ if (a == b) {
+ return true;
+ }
+
+ if (absl::EndsWith(b, a + "]") || absl::EndsWith(a, b + "]")) {
+ return true;
+ }
+
+ return false;
+}
+
+void RollupOutput::PrettyPrintTree(const RollupRow& row, size_t indent,
+ const OutputOptions& options,
+ std::ostream* out) const {
+ // Rows are printed before their sub-rows.
+ PrettyPrintRow(row, indent, options, out);
+
+ if (!row.vmsize && !row.filesize) {
+ return;
+ }
+
+ if (row.sorted_children.size() == 1 &&
+ row.sorted_children[0].sorted_children.size() == 0 &&
+ IsSame(row.name, row.sorted_children[0].name)) {
+ return;
+ }
+
+ for (const auto& child : row.sorted_children) {
+ PrettyPrintTree(child, indent + 2, options, out);
+ }
+}
+
+void RollupOutput::PrettyPrint(const OutputOptions& options,
+ std::ostream* out) const {
+ if (ShowFile(options)) {
+ *out << " FILE SIZE ";
+ }
+
+ if (ShowVM(options)) {
+ *out << " VM SIZE ";
+ }
+
+ *out << "\n";
+
+ if (ShowFile(options)) {
+ *out << " -------------- ";
+ }
+
+ if (ShowVM(options)) {
+ *out << " -------------- ";
+ }
+
+ *out << "\n";
+
+ for (const auto& child : toplevel_row_.sorted_children) {
+ PrettyPrintTree(child, 0, options, out);
+ }
+
+ // The "TOTAL" row comes after all other rows.
+ PrettyPrintRow(toplevel_row_, 0, options, out);
+
+ uint64_t file_filtered = 0;
+ uint64_t vm_filtered = 0;
+ if (ShowFile(options)) {
+ file_filtered = toplevel_row_.filtered_filesize;
+ }
+ if (ShowVM(options)) {
+ vm_filtered = toplevel_row_.filtered_vmsize;
+ }
+
+ if (vm_filtered == 0 && file_filtered == 0) {
+ return;
+ }
+
+ *out << "Filtering enabled (source_filter); omitted";
+
+ if (file_filtered > 0 && vm_filtered > 0) {
+ *out << " file =" << SiPrint(file_filtered, /*force_sign=*/false)
+ << ", vm =" << SiPrint(vm_filtered, /*force_sign=*/false);
+ } else if (file_filtered > 0) {
+ *out << SiPrint(file_filtered, /*force_sign=*/false);
+ } else {
+ *out << SiPrint(vm_filtered, /*force_sign=*/false);
+ }
+
+ *out << " of entries\n";
+}
+
+void RollupOutput::PrintRowToCSV(const RollupRow& row,
+ std::vector<std::string> parent_labels,
+ std::ostream* out, bool tabs) const {
+ while (parent_labels.size() < source_names_.size()) {
+ // If this label had no data at this level, append an empty string.
+ parent_labels.push_back("");
+ }
+
+ parent_labels.push_back(std::to_string(row.vmsize));
+ parent_labels.push_back(std::to_string(row.filesize));
+
+ std::string sep = tabs ? "\t" : ",";
+ *out << absl::StrJoin(parent_labels, sep) << "\n";
+}
+
+void RollupOutput::PrintTreeToCSV(const RollupRow& row,
+ std::vector<std::string> parent_labels,
+ std::ostream* out, bool tabs) const {
+ if (tabs) {
+ parent_labels.push_back(row.name);
+ } else {
+ parent_labels.push_back(CSVEscape(row.name));
+ }
+
+ if (row.sorted_children.size() > 0) {
+ for (const auto& child_row : row.sorted_children) {
+ PrintTreeToCSV(child_row, parent_labels, out, tabs);
+ }
+ } else {
+ PrintRowToCSV(row, parent_labels, out, tabs);
+ }
+}
+
+void RollupOutput::PrintToCSV(std::ostream* out, bool tabs) const {
+ std::vector<std::string> names(source_names_);
+ names.push_back("vmsize");
+ names.push_back("filesize");
+ std::string sep = tabs ? "\t" : ",";
+ *out << absl::StrJoin(names, sep) << "\n";
+ for (const auto& child_row : toplevel_row_.sorted_children) {
+ PrintTreeToCSV(child_row, std::vector<std::string>(), out, tabs);
+ }
+}
+
+// RangeMap ////////////////////////////////////////////////////////////////////
+
+constexpr uint64_t RangeSink::kUnknownSize;
+
+
+// MmapInputFile ///////////////////////////////////////////////////////////////
+
+class MmapInputFile : public InputFile {
+ public:
+ MmapInputFile(const std::string& filename);
+ ~MmapInputFile() override;
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(MmapInputFile);
+};
+
+class FileDescriptor {
+ public:
+ FileDescriptor(int fd) : fd_(fd) {}
+
+ ~FileDescriptor() {
+ if (fd_ >= 0 && close(fd_) < 0) {
+ fprintf(stderr, "bloaty: error calling close(): %s\n", strerror(errno));
+ }
+ }
+
+ int fd() { return fd_; }
+
+ private:
+ int fd_;
+};
+
+MmapInputFile::MmapInputFile(const std::string& filename)
+ : InputFile(filename) {
+ FileDescriptor fd(open(filename.c_str(), O_RDONLY));
+ struct stat buf;
+ const char *map;
+
+ if (fd.fd() < 0) {
+ THROWF("couldn't open file '$0': $1", filename, strerror(errno));
+ }
+
+ if (fstat(fd.fd(), &buf) < 0) {
+ THROWF("couldn't stat file '$0': $1", filename, strerror(errno));
+ }
+
+ map = static_cast<char*>(
+ mmap(nullptr, buf.st_size, PROT_READ, MAP_SHARED, fd.fd(), 0));
+
+ if (map == MAP_FAILED) {
+ THROWF("couldn't mmap file '$0': $1", filename, strerror(errno));
+ }
+
+ data_ = string_view(map, buf.st_size);
+}
+
+MmapInputFile::~MmapInputFile() {
+ if (data_.data() != nullptr &&
+ munmap(const_cast<char*>(data_.data()), data_.size()) != 0) {
+ fprintf(stderr, "bloaty: error calling munmap(): %s\n", strerror(errno));
+ }
+}
+
+std::unique_ptr<InputFile> MmapInputFileFactory::OpenFile(
+ const std::string& filename) const {
+ return absl::make_unique<MmapInputFile>(filename);
+}
+
+
+// RangeSink ///////////////////////////////////////////////////////////////////
+
+RangeSink::RangeSink(const InputFile* file, const Options& options,
+ DataSource data_source, const DualMap* translator)
+ : file_(file),
+ options_(options),
+ data_source_(data_source),
+ translator_(translator) {}
+
+RangeSink::~RangeSink() {}
+
+uint64_t debug_vmaddr = -1;
+uint64_t debug_fileoff = -1;
+
+bool RangeSink::ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize) {
+ return options_.verbose_level() > 2 ||
+ (options_.has_debug_vmaddr() && options_.debug_vmaddr() >= vmaddr &&
+ options_.debug_vmaddr() < (vmaddr + vmsize));
+}
+
+bool RangeSink::ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize) {
+ return options_.verbose_level() > 2 ||
+ (options_.has_debug_fileoff() && options_.debug_fileoff() >= fileoff &&
+ options_.debug_fileoff() < (fileoff + filesize));
+}
+
+bool RangeSink::IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize) {
+ if (vmsize == RangeMap::kUnknownSize) {
+ vmsize = UINT64_MAX - vmaddr;
+ }
+
+ if (vmaddr + vmsize < vmaddr) {
+ THROWF("Overflow in vm range, vmaddr=$0, vmsize=$1", vmaddr, vmsize);
+ }
+
+ if (ContainsVerboseVMAddr(vmaddr, vmsize)) {
+ return true;
+ }
+
+ if (translator_ && options_.has_debug_fileoff()) {
+ RangeMap vm_map;
+ RangeMap file_map;
+ bool contains = false;
+ vm_map.AddRangeWithTranslation(vmaddr, vmsize, "", translator_->vm_map,
+ false, &file_map);
+ file_map.ForEachRange(
+ [this, &contains](uint64_t fileoff, uint64_t filesize) {
+ if (ContainsVerboseFileOffset(fileoff, filesize)) {
+ contains = true;
+ }
+ });
+ return contains;
+ }
+
+ return false;
+}
+
+bool RangeSink::IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize) {
+ if (filesize == RangeMap::kUnknownSize) {
+ filesize = UINT64_MAX - fileoff;
+ }
+
+ if (fileoff + filesize < fileoff) {
+ THROWF("Overflow in file range, fileoff=$0, filesize=$1", fileoff,
+ filesize);
+ }
+
+ if (ContainsVerboseFileOffset(fileoff, filesize)) {
+ return true;
+ }
+
+ if (translator_ && options_.has_debug_vmaddr()) {
+ RangeMap vm_map;
+ RangeMap file_map;
+ bool contains = false;
+ file_map.AddRangeWithTranslation(fileoff, filesize, "",
+ translator_->file_map, false, &vm_map);
+ vm_map.ForEachRange([this, &contains](uint64_t vmaddr, uint64_t vmsize) {
+ if (ContainsVerboseVMAddr(vmaddr, vmsize)) {
+ contains = true;
+ }
+ });
+ return contains;
+ }
+
+ return false;
+}
+
+void RangeSink::AddOutput(DualMap* map, const NameMunger* munger) {
+ outputs_.push_back(std::make_pair(map, munger));
+}
+
+void RangeSink::AddFileRange(const char* analyzer, string_view name,
+ uint64_t fileoff, uint64_t filesize) {
+ bool verbose = IsVerboseForFileRange(fileoff, filesize);
+ if (verbose) {
+ printf("[%s, %s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+ GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+ name.data(), fileoff, filesize);
+ }
+ for (auto& pair : outputs_) {
+ const std::string label = pair.second->Munge(name);
+ if (translator_) {
+ bool ok = pair.first->file_map.AddRangeWithTranslation(
+ fileoff, filesize, label, translator_->file_map, verbose,
+ &pair.first->vm_map);
+ if (!ok) {
+ WARN("File range ($0, $1) for label $2 extends beyond base map",
+ fileoff, filesize, name);
+ }
+ } else {
+ pair.first->file_map.AddRange(fileoff, filesize, label);
+ }
+ }
+}
+
+void RangeSink::AddFileRangeForVMAddr(const char* analyzer,
+ uint64_t label_from_vmaddr,
+ string_view file_range) {
+ uint64_t file_offset = file_range.data() - file_->data().data();
+ bool verbose = IsVerboseForFileRange(file_offset, file_range.size());
+ if (verbose) {
+ printf("[%s, %s] AddFileRangeForVMAddr(%" PRIx64 ", [%" PRIx64 ", %zx])\n",
+ GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr,
+ file_offset, file_range.size());
+ }
+ assert(translator_);
+ for (auto& pair : outputs_) {
+ std::string label;
+ if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label)) {
+ bool ok = pair.first->file_map.AddRangeWithTranslation(
+ file_offset, file_range.size(), label, translator_->file_map, verbose,
+ &pair.first->vm_map);
+ if (!ok) {
+ WARN("File range ($0, $1) for label $2 extends beyond base map",
+ file_offset, file_range.size(), label);
+ }
+ } else if (verbose_level > 2) {
+ printf("No label found for vmaddr %" PRIx64 "\n", label_from_vmaddr);
+ }
+ }
+}
+
+void RangeSink::AddFileRangeForFileRange(const char* analyzer,
+ absl::string_view from_file_range,
+ absl::string_view file_range) {
+ uint64_t file_offset = file_range.data() - file_->data().data();
+ uint64_t from_file_offset = from_file_range.data() - file_->data().data();
+ bool verbose = IsVerboseForFileRange(file_offset, file_range.size());
+ if (verbose) {
+ printf("[%s, %s] AddFileRangeForFileRange([%" PRIx64 ", %zx], [%" PRIx64
+ ", %zx])\n",
+ GetDataSourceLabel(data_source_), analyzer, from_file_offset,
+ from_file_range.size(), file_offset, file_range.size());
+ }
+ assert(translator_);
+ for (auto& pair : outputs_) {
+ std::string label;
+ if (pair.first->file_map.TryGetLabelForRange(
+ from_file_offset, from_file_range.size(), &label)) {
+ bool ok = pair.first->file_map.AddRangeWithTranslation(
+ file_offset, file_range.size(), label, translator_->file_map, verbose,
+ &pair.first->vm_map);
+ if (!ok) {
+ WARN("File range ($0, $1) for label $2 extends beyond base map",
+ file_offset, file_range.size(), label);
+ }
+ } else if (verbose_level > 2) {
+ printf("No label found for file range [%" PRIx64 ", %zx]\n",
+ from_file_offset, from_file_range.size());
+ }
+ }
+}
+
+void RangeSink::AddVMRangeForVMAddr(const char* analyzer,
+ uint64_t label_from_vmaddr, uint64_t addr,
+ uint64_t size) {
+ bool verbose = IsVerboseForVMRange(addr, size);
+ if (verbose) {
+ printf("[%s, %s] AddVMRangeForVMAddr(%" PRIx64 ", [%" PRIx64 ", %" PRIx64
+ "])\n",
+ GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr, addr,
+ size);
+ }
+ assert(translator_);
+ for (auto& pair : outputs_) {
+ std::string label;
+ if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label)) {
+ bool ok = pair.first->vm_map.AddRangeWithTranslation(
+ addr, size, label, translator_->vm_map, verbose,
+ &pair.first->file_map);
+ if (!ok && verbose_level > 0) {
+ WARN("VM range ($0, $1) for label $2 extends beyond base map", addr,
+ size, label);
+ }
+ } else if (verbose_level > 2) {
+ printf("No label found for vmaddr %" PRIx64 "\n", label_from_vmaddr);
+ }
+ }
+}
+
+void RangeSink::AddVMRange(const char* analyzer, uint64_t vmaddr,
+ uint64_t vmsize, const std::string& name) {
+ bool verbose = IsVerboseForVMRange(vmaddr, vmsize);
+ if (verbose) {
+ printf("[%s, %s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+ GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+ name.data(), vmaddr, vmsize);
+ }
+ assert(translator_);
+ for (auto& pair : outputs_) {
+ const std::string label = pair.second->Munge(name);
+ bool ok = pair.first->vm_map.AddRangeWithTranslation(
+ vmaddr, vmsize, label, translator_->vm_map, verbose,
+ &pair.first->file_map);
+ if (!ok) {
+ WARN("VM range ($0, $1) for label $2 extends beyond base map", vmaddr,
+ vmsize, name);
+ }
+ }
+}
+
+void RangeSink::AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
+ uint64_t size, const std::string& name) {
+ // TODO: maybe track alias (but what would we use it for?)
+ // TODO: verify that it is in fact an alias.
+ AddVMRange(analyzer, vmaddr, size, name);
+}
+
+void RangeSink::AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
+ uint64_t vmsize,
+ const std::string& name) {
+ // TODO suppress warning that AddVMRange alone might trigger.
+ AddVMRange(analyzer, vmaddr, vmsize, name);
+}
+
+void RangeSink::AddRange(const char* analyzer, string_view name,
+ uint64_t vmaddr, uint64_t vmsize, uint64_t fileoff,
+ uint64_t filesize) {
+ if (vmsize == RangeMap::kUnknownSize || filesize == RangeMap::kUnknownSize) {
+ // AddRange() is used for segments and sections; the mappings that establish
+ // the file <-> vm mapping. The size should always be known. Moreover it
+ // would be unclear how the logic should work if the size was *not* known.
+ THROW("AddRange() does not allow unknown size.");
+ }
+
+ if (IsVerboseForVMRange(vmaddr, vmsize) ||
+ IsVerboseForFileRange(fileoff, filesize)) {
+ printf("[%s, %s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
+ ", %" PRIx64 ")\n",
+ GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+ name.data(), vmaddr, vmsize, fileoff, filesize);
+ }
+
+ if (translator_) {
+ if (!translator_->vm_map.CoversRange(vmaddr, vmsize) ||
+ !translator_->file_map.CoversRange(fileoff, filesize)) {
+ THROW("Tried to add range that is not covered by base map.");
+ }
+ }
+
+ for (auto& pair : outputs_) {
+ const std::string label = pair.second->Munge(name);
+ uint64_t common = std::min(vmsize, filesize);
+
+ pair.first->vm_map.AddDualRange(vmaddr, common, fileoff, label);
+ pair.first->file_map.AddDualRange(fileoff, common, vmaddr, label);
+
+ pair.first->vm_map.AddRange(vmaddr + common, vmsize - common, label);
+ pair.first->file_map.AddRange(fileoff + common, filesize - common, label);
+ }
+}
+
+uint64_t RangeSink::TranslateFileToVM(const char* ptr) {
+ assert(translator_);
+ uint64_t offset = ptr - file_->data().data();
+ uint64_t translated;
+ if (!FileContainsPointer(ptr) ||
+ !translator_->file_map.Translate(offset, &translated)) {
+ THROWF("Can't translate file offset ($0) to VM, contains: $1, map:\n$2",
+ offset, FileContainsPointer(ptr),
+ translator_->file_map.DebugString().c_str());
+ }
+ return translated;
+}
+
+absl::string_view RangeSink::TranslateVMToFile(uint64_t address) {
+ assert(translator_);
+ uint64_t translated;
+ if (!translator_->vm_map.Translate(address, &translated) ||
+ translated > file_->data().size()) {
+ THROW("Can't translate VM pointer to file");
+ }
+ return file_->data().substr(translated);
+}
+
+// ThreadSafeIterIndex /////////////////////////////////////////////////////////
+
+class ThreadSafeIterIndex {
+ public:
+ ThreadSafeIterIndex(int max) : index_(0), max_(max) {}
+
+ bool TryGetNext(int* index) {
+ int ret = index_.fetch_add(1, std::memory_order_relaxed);
+ if (ret >= max_) {
+ return false;
+ } else {
+ *index = ret;
+ return true;
+ }
+ }
+
+ void Abort(string_view error) {
+ std::lock_guard<std::mutex> lock(mutex_);
+ index_ = max_;
+ error_ = std::string(error);
+ }
+
+ bool TryGetError(std::string* error) {
+ std::lock_guard<std::mutex> lock(mutex_);
+ if (error_.empty()) {
+ return false;
+ } else {
+ *error = error_;
+ return true;
+ }
+ }
+
+ private:
+ std::atomic<int> index_;
+ std::string error_;
+ std::mutex mutex_;
+ const int max_;
+};
+
+
+// Bloaty //////////////////////////////////////////////////////////////////////
+
+// Represents a program execution and associated state.
+
+struct ConfiguredDataSource {
+ ConfiguredDataSource(const DataSourceDefinition& definition_)
+ : definition(definition_),
+ effective_source(definition_.number),
+ munger(new NameMunger()) {}
+
+ const DataSourceDefinition& definition;
+ // This will differ from definition.number for kSymbols, where we use the
+ // --demangle flag to set the true/effective source.
+ DataSource effective_source;
+ std::unique_ptr<NameMunger> munger;
+};
+
+class Bloaty {
+ public:
+ Bloaty(const InputFileFactory& factory, const Options& options);
+
+ void AddFilename(const std::string& filename, bool base_file);
+ void AddDebugFilename(const std::string& filename);
+
+ size_t GetSourceCount() const { return sources_.size(); }
+
+ void DefineCustomDataSource(const CustomDataSource& source);
+
+ void AddDataSource(const std::string& name);
+ void ScanAndRollup(const Options& options, RollupOutput* output);
+ void DisassembleFunction(string_view function, const Options& options,
+ RollupOutput* output);
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(Bloaty);
+
+ template <size_t T>
+ void AddBuiltInSources(const DataSourceDefinition (&sources)[T],
+ const Options& options) {
+ for (size_t i = 0; i < T; i++) {
+ const DataSourceDefinition& source = sources[i];
+ auto configured_source = absl::make_unique<ConfiguredDataSource>(source);
+
+ if (configured_source->effective_source == DataSource::kSymbols) {
+ configured_source->effective_source = EffectiveSymbolSource(options);
+ }
+
+ all_known_sources_[source.name] = std::move(configured_source);
+ }
+ }
+
+ static DataSource EffectiveSymbolSource(const Options& options) {
+ switch (options.demangle()) {
+ case Options::DEMANGLE_NONE:
+ return DataSource::kRawSymbols;
+ case Options::DEMANGLE_SHORT:
+ return DataSource::kShortSymbols;
+ case Options::DEMANGLE_FULL:
+ return DataSource::kFullSymbols;
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ }
+
+ void ScanAndRollupFiles(const std::vector<std::string>& filenames,
+ std::vector<std::string>* build_ids,
+ Rollup* rollup) const;
+ void ScanAndRollupFile(const std::string& filename, Rollup* rollup,
+ std::vector<std::string>* out_build_ids) const;
+
+ std::unique_ptr<ObjectFile> GetObjectFile(const std::string& filename) const;
+
+ const InputFileFactory& file_factory_;
+ const Options options_;
+
+ // All data sources, indexed by name.
+ // Contains both built-in sources and custom sources.
+ std::map<std::string, std::unique_ptr<ConfiguredDataSource>>
+ all_known_sources_;
+
+ // Sources the user has actually selected, in the order selected.
+ // Points to entries in all_known_sources_.
+ std::vector<ConfiguredDataSource*> sources_;
+ std::vector<std::string> source_names_;
+
+ struct InputFileInfo {
+ std::string filename_;
+ std::string build_id_;
+ };
+ std::vector<InputFileInfo> input_files_;
+ std::vector<InputFileInfo> base_files_;
+ std::map<std::string, std::string> debug_files_;
+};
+
+Bloaty::Bloaty(const InputFileFactory& factory, const Options& options)
+ : file_factory_(factory), options_(options) {
+ AddBuiltInSources(data_sources, options);
+}
+
+std::unique_ptr<ObjectFile> Bloaty::GetObjectFile(
+ const std::string& filename) const {
+ std::unique_ptr<InputFile> file(file_factory_.OpenFile(filename));
+ auto object_file = TryOpenELFFile(file);
+
+ if (!object_file.get()) {
+ object_file = TryOpenMachOFile(file);
+ }
+
+ if (!object_file.get()) {
+ object_file = TryOpenWebAssemblyFile(file);
+ }
+
+ if (!object_file.get()) {
+ THROWF("unknown file type for file '$0'", filename.c_str());
+ }
+
+ return object_file;
+}
+
+void Bloaty::AddFilename(const std::string& filename, bool is_base) {
+ auto object_file = GetObjectFile(filename);
+ std::string build_id = object_file->GetBuildId();
+
+ if (is_base) {
+ base_files_.push_back({filename, build_id});
+ } else {
+ input_files_.push_back({filename, build_id});
+ }
+}
+
+void Bloaty::AddDebugFilename(const std::string& filename) {
+ auto object_file = GetObjectFile(filename);
+ std::string build_id = object_file->GetBuildId();
+ if (build_id.size() == 0) {
+ THROWF("File '$0' has no build ID, cannot be used as a debug file",
+ filename);
+ }
+ debug_files_[build_id] = filename;
+}
+
+void Bloaty::DefineCustomDataSource(const CustomDataSource& source) {
+ if (source.base_data_source() == "symbols") {
+ THROW(
+ "For custom data sources, use one of {rawsymbols, shortsymbols, "
+ "fullsymbols} for base_data_source instead of 'symbols', so you aren't "
+ "sensitive to the --demangle parameter.");
+ }
+
+ auto iter = all_known_sources_.find(source.base_data_source());
+
+ if (iter == all_known_sources_.end()) {
+ THROWF("custom data source '$0': no such base source '$1'.\nTry --list-sources to see valid sources.", source.name(),
+ source.base_data_source());
+ } else if (!iter->second->munger->IsEmpty()) {
+ THROWF("custom data source '$0' tries to depend on custom data source '$1'",
+ source.name(), source.base_data_source());
+ }
+
+ all_known_sources_[source.name()] =
+ absl::make_unique<ConfiguredDataSource>(iter->second->definition);
+ NameMunger* munger = all_known_sources_[source.name()]->munger.get();
+ for (const auto& regex : source.rewrite()) {
+ munger->AddRegex(regex.pattern(), regex.replacement());
+ }
+}
+
+void Bloaty::AddDataSource(const std::string& name) {
+ source_names_.emplace_back(name);
+ auto it = all_known_sources_.find(name);
+ if (it == all_known_sources_.end()) {
+ THROWF("no such data source: $0.\nTry --list-sources to see valid sources.", name);
+ }
+
+ sources_.emplace_back(it->second.get());
+}
+
+// All of the DualMaps for a given file.
+struct DualMaps {
+ public:
+ DualMaps() {
+ // Base map.
+ AppendMap();
+ }
+
+ DualMap* AppendMap() {
+ maps_.emplace_back(new DualMap);
+ return maps_.back().get();
+ }
+
+ void ComputeRollup(Rollup* rollup) {
+ for (auto& map : maps_) {
+ map->vm_map.Compress();
+ map->file_map.Compress();
+ }
+ RangeMap::ComputeRollup(VmMaps(), [=](const std::vector<std::string>& keys,
+ uint64_t addr, uint64_t end) {
+ return rollup->AddSizes(keys, end - addr, true);
+ });
+ RangeMap::ComputeRollup(
+ FileMaps(),
+ [=](const std::vector<std::string>& keys, uint64_t addr, uint64_t end) {
+ return rollup->AddSizes(keys, end - addr, false);
+ });
+ }
+
+ void PrintMaps(const std::vector<const RangeMap*> maps) {
+ uint64_t last = 0;
+ uint64_t max = maps[0]->GetMaxAddress();
+ int hex_digits = std::ceil(std::log2(max) / 4);
+ RangeMap::ComputeRollup(maps, [&](const std::vector<std::string>& keys,
+ uint64_t addr, uint64_t end) {
+ if (addr > last) {
+ PrintMapRow("[-- Nothing mapped --]", last, addr, hex_digits);
+ }
+ PrintMapRow(KeysToString(keys), addr, end, hex_digits);
+ last = end;
+ });
+ printf("\n");
+ }
+
+ void PrintFileMaps() { PrintMaps(FileMaps()); }
+ void PrintVMMaps() { PrintMaps(VmMaps()); }
+
+ std::string KeysToString(const std::vector<std::string>& keys) {
+ std::string ret;
+
+ // Start at offset 1 to skip the base map.
+ for (size_t i = 1; i < keys.size(); i++) {
+ if (i > 1) {
+ ret += "\t";
+ }
+ ret += keys[i];
+ }
+
+ return ret;
+ }
+
+ void PrintMapRow(string_view str, uint64_t start, uint64_t end, int hex_digits) {
+ printf("%.*" PRIx64 "-%.*" PRIx64 "\t %s\t\t%.*s\n", hex_digits, start,
+ hex_digits, end, LeftPad(std::to_string(end - start), 10).c_str(),
+ (int)str.size(), str.data());
+ }
+
+ DualMap* base_map() { return maps_[0].get(); }
+
+ private:
+ std::vector<const RangeMap*> VmMaps() const {
+ std::vector<const RangeMap*> ret;
+ for (const auto& map : maps_) {
+ ret.push_back(&map->vm_map);
+ }
+ return ret;
+ }
+
+ std::vector<const RangeMap*> FileMaps() const {
+ std::vector<const RangeMap*> ret;
+ for (const auto& map : maps_) {
+ ret.push_back(&map->file_map);
+ }
+ return ret;
+ }
+
+ std::vector<std::unique_ptr<DualMap>> maps_;
+};
+
+void Bloaty::ScanAndRollupFile(const std::string &filename, Rollup* rollup,
+ std::vector<std::string>* out_build_ids) const {
+ auto file = GetObjectFile(filename);
+
+ DualMaps maps;
+ std::vector<std::unique_ptr<RangeSink>> sinks;
+ std::vector<RangeSink*> sink_ptrs;
+ std::vector<RangeSink*> filename_sink_ptrs;
+
+ // Base map always goes first.
+ sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
+ DataSource::kSegments, nullptr));
+ NameMunger empty_munger;
+ sinks.back()->AddOutput(maps.base_map(), &empty_munger);
+ sink_ptrs.push_back(sinks.back().get());
+
+ for (auto source : sources_) {
+ sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
+ source->effective_source,
+ maps.base_map()));
+ sinks.back()->AddOutput(maps.AppendMap(), source->munger.get());
+ // We handle the kInputFiles data source internally, without handing it off
+ // to the file format implementation. This seems slightly simpler, since
+ // the file format has to deal with armembers too.
+ if (source->effective_source == DataSource::kRawRanges) {
+ // Do nothing, we'll fill this in later.
+ } else if (source->effective_source == DataSource::kInputFiles) {
+ filename_sink_ptrs.push_back(sinks.back().get());
+ } else {
+ sink_ptrs.push_back(sinks.back().get());
+ }
+ }
+
+ std::unique_ptr<ObjectFile> debug_file;
+ std::string build_id = file->GetBuildId();
+ if (!build_id.empty()) {
+ auto iter = debug_files_.find(build_id);
+ if (iter != debug_files_.end()) {
+ debug_file = GetObjectFile(iter->second);
+ file->set_debug_file(debug_file.get());
+ out_build_ids->push_back(build_id);
+ }
+ }
+
+ int64_t filesize_before = rollup->file_total() +
+ rollup->filtered_file_total();
+ file->ProcessFile(sink_ptrs);
+
+ // kInputFile source: Copy the base map to the filename sink(s).
+ for (auto sink : filename_sink_ptrs) {
+ maps.base_map()->vm_map.ForEachRange(
+ [sink](uint64_t start, uint64_t length) {
+ sink->AddVMRange("inputfile_vmcopier", start, length,
+ sink->input_file().filename());
+ });
+ maps.base_map()->file_map.ForEachRange(
+ [sink](uint64_t start, uint64_t length) {
+ sink->AddFileRange("inputfile_filecopier",
+ sink->input_file().filename(), start, length);
+ });
+ }
+
+ // kRawRange source: add the directly preceding map's ranges, with labels
+ // indicating the range.
+ for (size_t i = 1; i < sinks.size(); i++) {
+ if (sinks[i]->data_source() == DataSource::kRawRanges) {
+ RangeSink* ranges_sink = sinks[i].get();
+ RangeSink* from = sinks[i - 1].get();
+ from->MapAtIndex(0).vm_map.ForEachRange([ranges_sink](uint64_t start,
+ uint64_t length) {
+ ranges_sink->AddVMRange("rawrange_vmcopier", start, length,
+ absl::StrCat("vm: [", absl::Hex(start), ", ",
+ absl::Hex(start + length), "]"));
+ });
+ from->MapAtIndex(0).file_map.ForEachRange(
+ [ranges_sink](uint64_t start, uint64_t length) {
+ ranges_sink->AddFileRange(
+ "rawrange_filecopier",
+ absl::StrCat("file: [", absl::Hex(start), ", ",
+ absl::Hex(start + length), "]"),
+ start, length);
+ });
+ }
+ }
+
+ maps.ComputeRollup(rollup);
+
+ // The ObjectFile implementation must guarantee this.
+ int64_t filesize = rollup->file_total() +
+ rollup->filtered_file_total() - filesize_before;
+ (void)filesize;
+ assert(filesize == file->file_data().data().size());
+
+ if (verbose_level > 0) {
+ printf("FILE MAP:\n");
+ maps.PrintFileMaps();
+ printf("VM MAP:\n");
+ maps.PrintVMMaps();
+ }
+}
+
+void Bloaty::ScanAndRollupFiles(
+ const std::vector<std::string>& filenames,
+ std::vector<std::string>* build_ids,
+ Rollup * rollup) const {
+ int num_cpus = std::thread::hardware_concurrency();
+ int num_threads = std::min(num_cpus, static_cast<int>(filenames.size()));
+
+ struct PerThreadData {
+ Rollup rollup;
+ std::vector<std::string> build_ids;
+ };
+
+ std::vector<PerThreadData> thread_data(num_threads);
+ std::vector<std::thread> threads(num_threads);
+ ThreadSafeIterIndex index(filenames.size());
+
+ std::unique_ptr<ReImpl> regex = nullptr;
+ if (options_.has_source_filter()) {
+ regex = absl::make_unique<ReImpl>(options_.source_filter());
+ }
+
+ for (int i = 0; i < num_threads; i++) {
+ thread_data[i].rollup.SetFilterRegex(regex.get());
+
+ threads[i] = std::thread([this, &index, &filenames](PerThreadData* data) {
+ try {
+ int j;
+ while (index.TryGetNext(&j)) {
+ ScanAndRollupFile(filenames[j], &data->rollup, &data->build_ids);
+ }
+ } catch (const bloaty::Error& e) {
+ index.Abort(e.what());
+ }
+ }, &thread_data[i]);
+ }
+
+ for (int i = 0; i < num_threads; i++) {
+ threads[i].join();
+ PerThreadData* data = &thread_data[i];
+ if (i == 0) {
+ *rollup = std::move(data->rollup);
+ } else {
+ rollup->Add(data->rollup);
+ }
+
+ build_ids->insert(build_ids->end(),
+ data->build_ids.begin(),
+ data->build_ids.end());
+ }
+
+ std::string error;
+ if (index.TryGetError(&error)) {
+ THROW(error.c_str());
+ }
+}
+
+void Bloaty::ScanAndRollup(const Options& options, RollupOutput* output) {
+ if (input_files_.empty()) {
+ THROW("no filename specified");
+ }
+
+ for (const auto& name : source_names_) {
+ output->AddDataSourceName(name);
+ }
+
+ Rollup rollup;
+ std::vector<std::string> build_ids;
+ std::vector<std::string> input_filenames;
+ for (const auto& file_info : input_files_) {
+ input_filenames.push_back(file_info.filename_);
+ }
+ ScanAndRollupFiles(input_filenames, &build_ids, &rollup);
+
+ if (!base_files_.empty()) {
+ Rollup base;
+ std::vector<std::string> base_filenames;
+ for (const auto& file_info : base_files_) {
+ base_filenames.push_back(file_info.filename_);
+ }
+ ScanAndRollupFiles(base_filenames, &build_ids, &base);
+ rollup.Subtract(base);
+ rollup.CreateDiffModeRollupOutput(&base, options, output);
+ } else {
+ rollup.CreateRollupOutput(options, output);
+ }
+
+ for (const auto& build_id : build_ids) {
+ debug_files_.erase(build_id);
+ }
+
+ // Error out if some --debug-files were not used.
+ if (!debug_files_.empty()) {
+ std::string input_files;
+ std::string unused_debug;
+ for (const auto& pair : debug_files_) {
+ unused_debug += absl::Substitute(
+ "$0 $1\n",
+ absl::BytesToHexString(pair.first).c_str(),
+ pair.second.c_str());
+ }
+
+ for (const auto& file_info : input_files_) {
+ input_files += absl::Substitute(
+ "$0 $1\n", absl::BytesToHexString(file_info.build_id_).c_str(),
+ file_info.filename_.c_str());
+ }
+ for (const auto& file_info : base_files_) {
+ input_files += absl::Substitute(
+ "$0 $1\n", absl::BytesToHexString(file_info.build_id_).c_str(),
+ file_info.filename_.c_str());
+ }
+ THROWF(
+ "Debug file(s) did not match any input file:\n$0\nInput Files:\n$1",
+ unused_debug.c_str(), input_files.c_str());
+ }
+}
+
+void Bloaty::DisassembleFunction(string_view function, const Options& options,
+ RollupOutput* output) {
+ DisassemblyInfo info;
+ for (const auto& file_info : input_files_) {
+ auto file = GetObjectFile(file_info.filename_);
+ if (file->GetDisassemblyInfo(function, EffectiveSymbolSource(options),
+ &info)) {
+ output->SetDisassembly(::bloaty::DisassembleFunction(info));
+ return;
+ }
+ }
+
+ THROWF("Couldn't find function $0 to disassemble", function);
+}
+
+const char usage[] = R"(Bloaty McBloatface: a size profiler for binaries.
+
+USAGE: bloaty [OPTION]... FILE... [-- BASE_FILE...]
+
+Options:
+
+ --csv Output in CSV format instead of human-readable.
+ --tsv Output in TSV format instead of human-readable.
+ -c FILE Load configuration from <file>.
+ -d SOURCE,SOURCE Comma-separated list of sources to scan.
+ --debug-file=FILE Use this file for debug symbols and/or symbol table.
+ -C MODE How to demangle symbols. Possible values are:
+ --demangle=MODE --demangle=none no demangling, print raw symbols
+ --demangle=short demangle, but omit arg/return types
+ --demangle=full print full demangled type
+ The default is --demangle=short.
+ --disassemble=FUNCTION
+ Disassemble this function (EXPERIMENTAL)
+ --domain=DOMAIN Which domains to show. Possible values are:
+ --domain=vm
+ --domain=file
+ --domain=both (the default)
+ -n NUM How many rows to show per level before collapsing
+ other keys into '[Other]'. Set to '0' for unlimited.
+ Defaults to 20.
+ -s SORTBY Whether to sort by VM or File size. Possible values
+ are:
+ -s vm
+ -s file
+ -s both (the default: sorts by max(vm, file)).
+ -w Wide output; don't truncate long labels.
+ --help Display this message and exit.
+ --list-sources Show a list of available sources and exit.
+ --source-filter=PATTERN
+ Only show keys with names matching this pattern.
+
+Options for debugging Bloaty:
+
+ --debug-vmaddr=ADDR
+ --debug-fileoff=OFF
+ Print extended debugging information for the given
+ VM address and/or file offset.
+ -v Verbose output. Dumps warnings encountered during
+ processing and full VM/file maps at the end.
+ Add more v's (-vv, -vvv) for even more.
+)";
+
+class ArgParser {
+ public:
+ ArgParser(int* argc, char** argv[])
+ : argc_(*argc),
+ argv_(*argv, *argv + *argc),
+ out_argc_(argc),
+ out_argv_(argv) {
+ *out_argc_ = 0;
+ ConsumeAndSaveArg(); // Executable name.
+ }
+
+ bool IsDone() { return index_ == argc_; }
+
+ string_view Arg() {
+ assert(!IsDone());
+ return string_view(argv_[index_]);
+ }
+
+ string_view ConsumeArg() {
+ string_view ret = Arg();
+ index_++;
+ return ret;
+ }
+
+ void ConsumeAndSaveArg() {
+ (*out_argv_)[(*out_argc_)++] = argv_[index_++];
+ }
+
+ // Singular flag like --csv or -v.
+ bool TryParseFlag(string_view flag) {
+ if (Arg() == flag) {
+ ConsumeArg();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Option taking an argument, for example:
+ // -n 20
+ // --config=file.bloaty
+ //
+ // For --long-options we accept both:
+ // --long_option value
+ // --long_option=value
+ bool TryParseOption(string_view flag, string_view* val) {
+ assert(flag.size() > 1);
+ bool is_long = flag[1] == '-';
+ string_view arg = Arg();
+ if (TryParseFlag(flag)) {
+ if (IsDone()) {
+ THROWF("option '$0' requires an argument", flag);
+ }
+ *val = ConsumeArg();
+ return true;
+ } else if (is_long && absl::ConsumePrefix(&arg, std::string(flag) + "=")) {
+ *val = arg;
+ index_++;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool TryParseIntegerOption(string_view flag, int* val) {
+ string_view val_str;
+ if (!TryParseOption(flag, &val_str)) {
+ return false;
+ }
+
+ if (!absl::SimpleAtoi(val_str, val)) {
+ THROWF("option '$0' had non-integral argument: $1", flag, val_str);
+ }
+
+ return true;
+ }
+
+ bool TryParseUint64Option(string_view flag, uint64_t* val) {
+ string_view val_str;
+ if (!TryParseOption(flag, &val_str)) {
+ return false;
+ }
+
+ try {
+ *val = std::stoull(std::string(val_str), nullptr, 0);
+ } catch (...) {
+ THROWF("option '$0' had non-integral argument: $1", flag, val_str);
+ }
+
+ return true;
+ }
+
+ public:
+ int argc_;
+ std::vector<char*> argv_;
+ int* out_argc_;
+ char*** out_argv_;
+ int index_ = 0;
+};
+
+bool DoParseOptions(bool skip_unknown, int* argc, char** argv[],
+ Options* options, OutputOptions* output_options) {
+ bool saw_separator = false;
+ ArgParser args(argc, argv);
+ string_view option;
+ int int_option;
+ uint64_t uint64_option;
+ bool has_domain = false;
+
+ while (!args.IsDone()) {
+ if (args.TryParseFlag("--")) {
+ if (saw_separator) {
+ THROW("'--' option should only be specified once");
+ }
+ saw_separator = true;
+ } else if (args.TryParseFlag("--csv")) {
+ output_options->output_format = OutputFormat::kCSV;
+ } else if (args.TryParseFlag("--tsv")) {
+ output_options->output_format = OutputFormat::kTSV;
+ } else if (args.TryParseOption("-c", &option)) {
+ std::ifstream input_file(std::string(option), std::ios::in);
+ if (!input_file.is_open()) {
+ THROWF("couldn't open file $0", option);
+ }
+ google::protobuf::io::IstreamInputStream stream(&input_file);
+ if (!google::protobuf::TextFormat::Merge(&stream, options)) {
+ THROWF("error parsing configuration out of file $0", option);
+ }
+ } else if (args.TryParseOption("-d", &option)) {
+ std::vector<std::string> names = absl::StrSplit(option, ',');
+ for (const auto& name : names) {
+ options->add_data_source(name);
+ }
+ } else if (args.TryParseOption("-C", &option) ||
+ args.TryParseOption("--demangle", &option)) {
+ if (option == "none") {
+ options->set_demangle(Options::DEMANGLE_NONE);
+ } else if (option == "short") {
+ options->set_demangle(Options::DEMANGLE_SHORT);
+ } else if (option == "full") {
+ options->set_demangle(Options::DEMANGLE_FULL);
+ } else {
+ THROWF("unknown value for --demangle: $0", option);
+ }
+ } else if (args.TryParseOption("--debug-file", &option)) {
+ options->add_debug_filename(std::string(option));
+ } else if (args.TryParseUint64Option("--debug-fileoff", &uint64_option)) {
+ if (options->has_debug_fileoff()) {
+ THROW("currently we only support a single debug fileoff");
+ }
+ options->set_debug_fileoff(uint64_option);
+ } else if (args.TryParseUint64Option("--debug-vmaddr", &uint64_option)) {
+ if (options->has_debug_vmaddr()) {
+ THROW("currently we only support a single debug vmaddr");
+ }
+ options->set_debug_vmaddr(uint64_option);
+ } else if (args.TryParseOption("--disassemble", &option)) {
+ options->mutable_disassemble_function()->assign(std::string(option));
+ } else if (args.TryParseIntegerOption("-n", &int_option)) {
+ if (int_option == 0) {
+ options->set_max_rows_per_level(INT64_MAX);
+ } else {
+ options->set_max_rows_per_level(int_option);
+ }
+ } else if (args.TryParseOption("--domain", &option)) {
+ has_domain = true;
+ if (option == "vm") {
+ output_options->show = ShowDomain::kShowVM;
+ } else if (option == "file") {
+ output_options->show = ShowDomain::kShowFile;
+ } else if (option == "both") {
+ output_options->show = ShowDomain::kShowBoth;
+ } else {
+ THROWF("unknown value for --domain: $0", option);
+ }
+ } else if (args.TryParseOption("-s", &option)) {
+ if (option == "vm") {
+ options->set_sort_by(Options::SORTBY_VMSIZE);
+ } else if (option == "file") {
+ options->set_sort_by(Options::SORTBY_FILESIZE);
+ } else if (option == "both") {
+ options->set_sort_by(Options::SORTBY_BOTH);
+ } else {
+ THROWF("unknown value for -s: $0", option);
+ }
+ } else if (args.TryParseOption("--source-filter", &option)) {
+ options->set_source_filter(std::string(option));
+ } else if (args.TryParseFlag("-v")) {
+ options->set_verbose_level(1);
+ } else if (args.TryParseFlag("-vv")) {
+ options->set_verbose_level(2);
+ } else if (args.TryParseFlag("-vvv")) {
+ options->set_verbose_level(3);
+ } else if (args.TryParseFlag("-w")) {
+ output_options->max_label_len = SIZE_MAX;
+ } else if (args.TryParseFlag("--list-sources")) {
+ for (const auto& source : data_sources) {
+ fprintf(stderr, "%s %s\n", FixedWidthString(source.name, 15).c_str(),
+ source.description);
+ }
+ return false;
+ } else if (args.TryParseFlag("--help")) {
+ puts(usage);
+ return false;
+ } else if (args.TryParseFlag("--version")) {
+ printf("Bloaty McBloatface 1.1\n");
+ exit(0);
+ } else if (absl::StartsWith(args.Arg(), "-")) {
+ if (skip_unknown) {
+ args.ConsumeAndSaveArg();
+ } else {
+ THROWF("Unknown option: $0", args.Arg());
+ }
+ } else {
+ if (saw_separator) {
+ options->add_base_filename(std::string(args.ConsumeArg()));
+ } else {
+ options->add_filename(std::string(args.ConsumeArg()));
+ }
+ }
+ }
+
+ if (options->data_source_size() == 0 &&
+ !options->has_disassemble_function()) {
+ // Default when no sources are specified.
+ options->add_data_source("sections");
+ }
+
+ if (has_domain && !options->has_sort_by()) {
+ // Default to sorting by what we are showing.
+ switch (output_options->show) {
+ case ShowDomain::kShowFile:
+ options->set_sort_by(Options::SORTBY_FILESIZE);
+ break;
+ case ShowDomain::kShowVM:
+ options->set_sort_by(Options::SORTBY_VMSIZE);
+ break;
+ case ShowDomain::kShowBoth:
+ options->set_sort_by(Options::SORTBY_BOTH);
+ break;
+ }
+ }
+
+ return true;
+}
+
+bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
+ OutputOptions* output_options, std::string* error) {
+ try {
+ return DoParseOptions(skip_unknown, argc, argv, options, output_options);
+ } catch (const bloaty::Error& e) {
+ error->assign(e.what());
+ return false;
+ }
+}
+
+void BloatyDoMain(const Options& options, const InputFileFactory& file_factory,
+ RollupOutput* output) {
+ bloaty::Bloaty bloaty(file_factory, options);
+
+ if (options.filename_size() == 0) {
+ THROW("must specify at least one file");
+ }
+
+ if (options.max_rows_per_level() < 1) {
+ THROW("max_rows_per_level must be at least 1");
+ }
+
+ for (auto& filename : options.filename()) {
+ bloaty.AddFilename(filename, false);
+ }
+
+ for (auto& base_filename : options.base_filename()) {
+ bloaty.AddFilename(base_filename, true);
+ }
+
+ for (auto& debug_filename : options.debug_filename()) {
+ bloaty.AddDebugFilename(debug_filename);
+ }
+
+ for (const auto& custom_data_source : options.custom_data_source()) {
+ bloaty.DefineCustomDataSource(custom_data_source);
+ }
+
+ for (const auto& data_source : options.data_source()) {
+ bloaty.AddDataSource(data_source);
+ }
+
+ if (options.has_source_filter()) {
+ ReImpl re(options.source_filter());
+ if (!re.ok()) {
+ THROW("invalid regex for source_filter");
+ }
+ }
+
+ verbose_level = options.verbose_level();
+
+ if (options.data_source_size() > 0) {
+ bloaty.ScanAndRollup(options, output);
+ } else if (options.has_disassemble_function()) {
+ bloaty.DisassembleFunction(options.disassemble_function(), options, output);
+ }
+}
+
+bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
+ RollupOutput* output, std::string* error) {
+ try {
+ BloatyDoMain(options, file_factory, output);
+ return true;
+ } catch (const bloaty::Error& e) {
+ error->assign(e.what());
+ return false;
+ }
+}
+
+} // namespace bloaty
diff --git a/src/bloaty.h b/src/bloaty.h
new file mode 100644
index 0000000..bf90945
--- /dev/null
+++ b/src/bloaty.h
@@ -0,0 +1,604 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains APIs for use within Bloaty. None of these APIs have any
+// guarantees whatsoever about their stability! The public API for bloaty is
+// its command-line interface.
+
+#ifndef BLOATY_H_
+#define BLOATY_H_
+
+#include <stdlib.h>
+#define __STDC_LIMIT_MACROS
+#define __STDC_FORMAT_MACROS
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+#include "capstone/capstone.h"
+
+#include "bloaty.pb.h"
+#include "range_map.h"
+#include "re.h"
+
+#define BLOATY_DISALLOW_COPY_AND_ASSIGN(class_name) \
+ class_name(const class_name&) = delete; \
+ void operator=(const class_name&) = delete;
+
+#define BLOATY_UNREACHABLE() do { \
+ assert(false); \
+ __builtin_unreachable(); \
+} while (0)
+
+#ifdef NDEBUG
+// Prevent "unused variable" warnings.
+#define BLOATY_ASSERT(expr) do {} while (false && (expr))
+#else
+#define BLOATY_ASSERT(expr) assert(expr)
+#endif
+
+namespace bloaty {
+
+extern int verbose_level;
+
+class NameMunger;
+class Options;
+struct DualMap;
+struct DisassemblyInfo;
+
+enum class DataSource {
+ kArchiveMembers,
+ kCompileUnits,
+ kInlines,
+ kInputFiles,
+ kRawRanges,
+ kSections,
+ kSegments,
+
+ // We always set this to one of the concrete symbol types below before
+ // setting it on a sink.
+ kSymbols,
+
+ kRawSymbols,
+ kFullSymbols,
+ kShortSymbols
+};
+
+class Error : public std::runtime_error {
+ public:
+ Error(const char* msg, const char* file, int line)
+ : std::runtime_error(msg), file_(file), line_(line) {}
+
+ // TODO(haberman): add these to Bloaty's error message when verbose is
+ // enabled.
+ const char* file() const { return file_; }
+ int line() const { return line_; }
+
+ private:
+ const char* file_;
+ int line_;
+};
+
+class InputFile {
+ public:
+ InputFile(const std::string& filename) : filename_(filename) {}
+ virtual ~InputFile() {}
+
+ const std::string& filename() const { return filename_; }
+ absl::string_view data() const { return data_; }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
+ const std::string filename_;
+
+ protected:
+ absl::string_view data_;
+};
+
+class InputFileFactory {
+ public:
+ virtual ~InputFileFactory() {}
+
+ // Throws if the file could not be opened.
+ virtual std::unique_ptr<InputFile> OpenFile(
+ const std::string& filename) const = 0;
+};
+
+class MmapInputFileFactory : public InputFileFactory {
+ public:
+ std::unique_ptr<InputFile> OpenFile(
+ const std::string& filename) const override;
+};
+
+// NOTE: all sizes are uint64, even on 32-bit platforms:
+// - 32-bit platforms can have files >4GB in some cases.
+// - for object files (not executables/shared libs) we pack both a section
+// index and an address into the "vmaddr" value, and we need enough bits to
+// safely do this.
+
+// A RangeSink allows data sources to assign labels to ranges of VM address
+// space and/or file offsets.
+class RangeSink {
+ public:
+ RangeSink(const InputFile* file, const Options& options,
+ DataSource data_source, const DualMap* translator);
+ ~RangeSink();
+
+ const Options& options() const { return options_; }
+
+ void AddOutput(DualMap* map, const NameMunger* munger);
+
+ DataSource data_source() const { return data_source_; }
+ const InputFile& input_file() const { return *file_; }
+ bool IsBaseMap() const { return translator_ == nullptr; }
+
+ // If vmsize or filesize is zero, this mapping is presumed not to exist in
+ // that domain. For example, .bss mappings don't exist in the file, and
+ // .debug_* mappings don't exist in memory.
+ void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
+ uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
+
+ void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
+ uint64_t vmsize, absl::string_view file_range) {
+ AddRange(analyzer, name, vmaddr, vmsize,
+ file_range.data() - file_->data().data(), file_range.size());
+ }
+
+ void AddFileRange(const char* analyzer, absl::string_view name,
+ uint64_t fileoff, uint64_t filesize);
+
+ // Like AddFileRange(), but the label is whatever label was previously
+ // assigned to VM address |label_from_vmaddr|. If no existing label is
+ // assigned to |label_from_vmaddr|, this function does nothing.
+ void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
+ absl::string_view file_range);
+ void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
+ uint64_t addr, uint64_t size);
+
+ // Applies this label from |from_file_range| to |file_range|, but only if the
+ // entire |from_file_range| has a single label. If not, this does nothing.
+ void AddFileRangeForFileRange(const char* analyzer,
+ absl::string_view from_file_range,
+ absl::string_view file_range);
+
+ void AddFileRange(const char* analyzer, absl::string_view name,
+ absl::string_view file_range) {
+ // When separate debug files are being used, the DWARF analyzer will try to
+ // add sections of the debug file. We want to prevent this because we only
+ // want to profile the main file (not the debug file), so we filter these
+ // out. This approach is simple to implement, but does result in some
+ // useless work being done. We may want to avoid doing this useless work in
+ // the first place.
+ if (FileContainsPointer(file_range.data())) {
+ AddFileRange(analyzer, name, file_range.data() - file_->data().data(),
+ file_range.size());
+ }
+ }
+
+ // The VM-only functions below may not be used to populate the base map!
+
+ // Adds a region to the memory map. It should not overlap any previous
+ // region added with Add(), but it should overlap the base memory map.
+ void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize,
+ const std::string& name);
+
+ // Like Add(), but allows that this addr/size might have previously been added
+ // already under a different name. If so, this name becomes an alias of the
+ // previous name.
+ //
+ // This is for things like symbol tables that sometimes map multiple names to
+ // the same physical function.
+ void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
+ uint64_t size, const std::string& name);
+
+ // Like Add(), but allows that this addr/size might have previously been added
+ // already under a different name. If so, this add is simply ignored.
+ //
+ // This is for cases like sourcefiles. Sometimes a single function appears to
+ // come from multiple source files. But if it does, we don't want to alias
+ // the entire source file to another, because it's probably only part of the
+ // source file that overlaps.
+ void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
+ uint64_t size, const std::string& name);
+
+ const DualMap& MapAtIndex(size_t index) const {
+ return *outputs_[index].first;
+ }
+
+ // Translates the given pointer (which must be within the range of
+ // input_file().data()) to a VM address.
+ uint64_t TranslateFileToVM(const char* ptr);
+ absl::string_view TranslateVMToFile(uint64_t address);
+
+ static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
+
+ bool FileContainsPointer(const void* ptr) const {
+ absl::string_view file_data = file_->data();
+ return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
+ }
+
+ bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize);
+ bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
+ bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize);
+ bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
+
+ const InputFile* file_;
+ const Options options_;
+ DataSource data_source_;
+ const DualMap* translator_;
+ std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
+};
+
+
+// NameMunger //////////////////////////////////////////////////////////////////
+
+// Use to transform input names according to the user's configuration.
+// For example, the user can use regexes.
+class NameMunger {
+ public:
+ NameMunger() {}
+
+ // Adds a regex that will be applied to all names. All regexes will be
+ // applied in sequence.
+ void AddRegex(const std::string& regex, const std::string& replacement);
+ std::string Munge(absl::string_view name) const;
+
+ bool IsEmpty() const { return regexes_.empty(); }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(NameMunger);
+ std::vector<std::pair<std::unique_ptr<ReImpl>, std::string>> regexes_;
+};
+
+typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
+
+// Represents an object/executable file in a format like ELF, Mach-O, PE, etc.
+// To support a new file type, implement this interface.
+class ObjectFile {
+ public:
+ ObjectFile(std::unique_ptr<InputFile> file_data)
+ : file_data_(std::move(file_data)), debug_file_(this) {}
+ virtual ~ObjectFile() {}
+
+ virtual std::string GetBuildId() const = 0;
+
+ // Process this file, pushing data to |sinks| as appropriate for each data
+ // source. If any debug files match the build id for this file, it will be
+ // given here, otherwise it is |this|.
+ virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0;
+
+ virtual bool GetDisassemblyInfo(absl::string_view symbol,
+ DataSource symbol_source,
+ DisassemblyInfo* info) const = 0;
+
+ const InputFile& file_data() const { return *file_data_; }
+
+ // Sets the debug file for |this|. |file| must outlive this instance.
+ void set_debug_file(const ObjectFile* file) {
+ assert(debug_file_->GetBuildId() == GetBuildId());
+ debug_file_ = file;
+ }
+
+ const ObjectFile& debug_file() const { return *debug_file_; }
+
+ private:
+ std::unique_ptr<InputFile> file_data_;
+ const ObjectFile* debug_file_;
+};
+
+std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
+std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
+
+namespace dwarf {
+
+struct File {
+ absl::string_view debug_info;
+ absl::string_view debug_types;
+ absl::string_view debug_str;
+ absl::string_view debug_abbrev;
+ absl::string_view debug_aranges;
+ absl::string_view debug_line;
+ absl::string_view debug_loc;
+ absl::string_view debug_pubnames;
+ absl::string_view debug_pubtypes;
+ absl::string_view debug_ranges;
+};
+
+} // namespace dwarf
+
+// Provided by dwarf.cc. To use these, a module should fill in a dwarf::File
+// and then call these functions.
+void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
+ const DualMap& map, RangeSink* sink);
+void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
+ bool include_line);
+void ReadEhFrame(absl::string_view contents, RangeSink* sink);
+void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink);
+
+
+// LineReader //////////////////////////////////////////////////////////////////
+
+// Provides range-based for to iterate over lines in a pipe.
+//
+// for ( auto& line : ReadLinesFromPipe("ls -l") ) {
+// }
+
+class LineIterator;
+
+class LineReader {
+ public:
+ LineReader(FILE* file, bool pclose) : file_(file), pclose_(pclose) {}
+ LineReader(LineReader&& other);
+
+ ~LineReader() { Close(); }
+
+ LineIterator begin();
+ LineIterator end();
+
+ void Next();
+
+ const std::string& line() const { return line_; }
+ bool eof() { return eof_; }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(LineReader);
+
+ void Close();
+
+ FILE* file_;
+ std::string line_;
+ bool eof_ = false;
+ bool pclose_;
+};
+
+class LineIterator {
+ public:
+ LineIterator(LineReader* reader) : reader_(reader) {}
+
+ bool operator!=(const LineIterator& /*other*/) const {
+ // Hack for range-based for.
+ return !reader_->eof();
+ }
+
+ void operator++() { reader_->Next(); }
+
+ const std::string& operator*() const {
+ return reader_->line();
+ }
+
+ private:
+ LineReader* reader_;
+};
+
+LineReader ReadLinesFromPipe(const std::string& cmd);
+
+// Demangle C++ symbols according to the Itanium ABI. The |source| argument
+// controls what demangling mode we are using.
+std::string ItaniumDemangle(absl::string_view symbol, DataSource source);
+
+
+// DualMap /////////////////////////////////////////////////////////////////////
+
+// Contains a RangeMap for VM space and file space for a given file.
+
+struct DualMap {
+ RangeMap vm_map;
+ RangeMap file_map;
+};
+
+struct DisassemblyInfo {
+ absl::string_view text;
+ DualMap symbol_map;
+ cs_arch arch;
+ cs_mode mode;
+ uint64_t start_address;
+};
+
+std::string DisassembleFunction(const DisassemblyInfo& info);
+void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink);
+
+// Top-level API ///////////////////////////////////////////////////////////////
+
+// This should only be used by main.cc and unit tests.
+
+class Rollup;
+
+struct RollupRow {
+ RollupRow(const std::string& name_) : name(name_) {}
+
+ std::string name;
+ int64_t vmsize = 0;
+ int64_t filesize = 0;
+ int64_t filtered_vmsize = 0;
+ int64_t filtered_filesize = 0;
+ int64_t other_count = 0;
+ int64_t sortkey;
+ double vmpercent;
+ double filepercent;
+ std::vector<RollupRow> sorted_children;
+
+ static bool Compare(const RollupRow& a, const RollupRow& b) {
+ // Sort value high-to-low.
+ if (a.sortkey != b.sortkey) {
+ return a.sortkey > b.sortkey;
+ }
+ // Sort name low to high.
+ return a.name < b.name;
+ }
+};
+
+enum class OutputFormat {
+ kPrettyPrint,
+ kCSV,
+ kTSV,
+};
+
+enum class ShowDomain {
+ kShowFile,
+ kShowVM,
+ kShowBoth,
+};
+
+struct OutputOptions {
+ OutputFormat output_format = OutputFormat::kPrettyPrint;
+ size_t max_label_len = 80;
+ ShowDomain show = ShowDomain::kShowBoth;
+};
+
+struct RollupOutput {
+ public:
+ RollupOutput() : toplevel_row_("TOTAL") {}
+
+ void AddDataSourceName(absl::string_view name) {
+ source_names_.emplace_back(std::string(name));
+ }
+
+ const std::vector<std::string>& source_names() const { return source_names_; }
+
+ void Print(const OutputOptions& options, std::ostream* out) {
+ if (!source_names_.empty()) {
+ switch (options.output_format) {
+ case bloaty::OutputFormat::kPrettyPrint:
+ PrettyPrint(options, out);
+ break;
+ case bloaty::OutputFormat::kCSV:
+ PrintToCSV(out, /*tabs=*/false);
+ break;
+ case bloaty::OutputFormat::kTSV:
+ PrintToCSV(out, /*tabs=*/true);
+ break;
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ }
+
+ if (!disassembly_.empty()) {
+ *out << disassembly_;
+ }
+ }
+
+ void SetDisassembly(absl::string_view disassembly) {
+ disassembly_ = std::string(disassembly);
+ }
+
+ absl::string_view GetDisassembly() { return disassembly_; }
+
+ // For debugging.
+ const RollupRow& toplevel_row() const { return toplevel_row_; }
+ bool diff_mode() const { return diff_mode_; }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(RollupOutput);
+ friend class Rollup;
+
+ std::vector<std::string> source_names_;
+ RollupRow toplevel_row_;
+ std::string disassembly_;
+
+ // When we are in diff mode, rollup sizes are relative to the baseline.
+ bool diff_mode_ = false;
+
+ static bool IsSame(const std::string& a, const std::string& b);
+ void PrettyPrint(const OutputOptions& options, std::ostream* out) const;
+ void PrintToCSV(std::ostream* out, bool tabs) const;
+ void PrettyPrintRow(const RollupRow& row, size_t indent,
+ const OutputOptions& options, std::ostream* out) const;
+ void PrettyPrintTree(const RollupRow& row, size_t indent,
+ const OutputOptions& options, std::ostream* out) const;
+ void PrintRowToCSV(const RollupRow& row,
+ std::vector<std::string> parent_labels,
+ std::ostream* out, bool tabs) const;
+ void PrintTreeToCSV(const RollupRow& row,
+ std::vector<std::string> parent_labels,
+ std::ostream* out, bool tabs) const;
+};
+
+bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
+ OutputOptions* output_options, std::string* error);
+bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
+ RollupOutput* output, std::string* error);
+
+// Endianness utilities ////////////////////////////////////////////////////////
+
+inline bool IsLittleEndian() {
+ int x = 1;
+ return *(char*)&x == 1;
+}
+
+// It seems like it would be simpler to just specialize on:
+// template <class T> T ByteSwap(T val);
+// template <> T ByteSwap<uint16>(T val) { /* ... */ }
+// template <> T ByteSwap<uint32>(T val) { /* ... */ }
+// // etc...
+//
+// But this doesn't work out so well. Consider that on LP32, uint32 could
+// be either "unsigned int" or "unsigned long". Specializing ByteSwap<uint32>
+// will leave one of those two unspecialized. C++ is annoying in this regard.
+// Our approach here handles both cases with just one specialization.
+template <class T, size_t size> struct ByteSwapper { T operator()(T val); };
+
+template <class T>
+struct ByteSwapper<T, 1> {
+ T operator()(T val) { return val; }
+};
+
+template <class T>
+struct ByteSwapper<T, 2> {
+ T operator()(T val) {
+ return ((val & 0xff) << 8) |
+ ((val & 0xff00) >> 8);
+ }
+};
+
+template <class T>
+struct ByteSwapper<T, 4> {
+ T operator()(T val) {
+ return ((val & 0xff) << 24) |
+ ((val & 0xff00) << 8) |
+ ((val & 0xff0000ULL) >> 8) |
+ ((val & 0xff000000ULL) >> 24);
+ }
+};
+
+template <class T>
+struct ByteSwapper<T, 8> {
+ T operator()(T val) {
+ return ((val & 0xff) << 56) |
+ ((val & 0xff00) << 40) |
+ ((val & 0xff0000) << 24) |
+ ((val & 0xff000000) << 8) |
+ ((val & 0xff00000000ULL) >> 8) |
+ ((val & 0xff0000000000ULL) >> 24) |
+ ((val & 0xff000000000000ULL) >> 40) |
+ ((val & 0xff00000000000000ULL) >> 56);
+ }
+};
+
+template <class T>
+T ByteSwap(T val) { return ByteSwapper<T, sizeof(T)>()(val); }
+
+} // namespace bloaty
+
+#endif
diff --git a/src/bloaty.proto b/src/bloaty.proto
new file mode 100644
index 0000000..59db641
--- /dev/null
+++ b/src/bloaty.proto
@@ -0,0 +1,100 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package bloaty;
+
+message Options {
+ // The files to analyze. At least one file must be provided.
+ repeated string filename = 1;
+
+ // If non-empty, we are in diff mode and these files should provide the
+ // baseline.
+ repeated string base_filename = 2;
+
+ // Files to use for debug information. We will match these to files from
+ // filename or base_filename by build id. Files that are only in
+ // debug_filename will *not* have their file size counted.
+ repeated string debug_filename = 10;
+
+ // The data sources to scan in each file. At least one data source must be
+ // specified. If more than one source is specified, the output is
+ // hierarchical.
+ //
+ // These can be built-in data sources or custom data sources.
+ repeated string data_source = 3;
+
+ // The maximum number of rows to show at each level before collapsing the rest
+ // into '[Other]'.
+ optional int64 max_rows_per_level = 4 [default = 20];
+
+ enum Demangle {
+ DEMANGLE_SHORT = 0;
+ DEMANGLE_FULL = 1;
+ DEMANGLE_NONE = 2;
+ }
+ optional Demangle demangle = 5 [default = DEMANGLE_SHORT];
+
+ // What to sort the output by.
+ enum SortBy {
+ SORTBY_BOTH = 0;
+ SORTBY_VMSIZE = 1;
+ SORTBY_FILESIZE = 2;
+ }
+ optional SortBy sort_by = 6 [default = SORTBY_BOTH];
+
+ // When greater than zero, Bloaty will print verbose output to stdout.
+ // TODO(haberman): should this be in the output object instead?
+ optional int32 verbose_level = 7;
+
+ // If set, these will print verbose information pertaining to the given
+ // vm address and/or file offset.
+ optional uint64 debug_vmaddr = 11;
+ optional uint64 debug_fileoff = 12;
+
+ // Custom data sources for this analysis.
+ repeated CustomDataSource custom_data_source = 8;
+
+ // Disassemble this function.
+ optional string disassemble_function = 9;
+
+ // Regex with which to filter names in the data sources.
+ optional string source_filter = 13;
+}
+
+// A custom data source allows users to create their own label space by
+// rewriting a set of existing labels according to a set of regexes.
+message CustomDataSource {
+ // The name of this data source.
+ optional string name = 1;
+
+ // The underlying data source. We will scan this data source and apply
+ // rewrites to define the new data source. This must be a built-in data
+ // source, not another custom data source.
+ optional string base_data_source = 2;
+
+ // The rewrites that we will apply to the underlying labels to define the new
+ // data source.
+ repeated Regex rewrite = 3;
+}
+
+// For rewriting labels from a data source. If the regex matches, then the
+// entire string is replaced with the replacement. Any groups captured from the
+// pattern can be substituted into the replacement with \1, \2, etc. and \0
+// refers to the entire match.
+message Regex {
+ optional string pattern = 1;
+ optional string replacement = 2;
+}
diff --git a/src/bloaty_package.bloaty b/src/bloaty_package.bloaty
new file mode 100644
index 0000000..df290fc
--- /dev/null
+++ b/src/bloaty_package.bloaty
@@ -0,0 +1,13 @@
+custom_data_source: {
+ name: "bloaty_package"
+ base_data_source: "compileunits"
+
+ rewrite: {
+ pattern: "^(\\.\\./)?src"
+ replacement: "src"
+ }
+ rewrite: {
+ pattern: "^(\\.\\./)?(third_party/\\w+)"
+ replacement: "\\2"
+ }
+}
diff --git a/src/demangle.cc b/src/demangle.cc
new file mode 100644
index 0000000..7c71a95
--- /dev/null
+++ b/src/demangle.cc
@@ -0,0 +1,1885 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// For reference check out:
+// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
+//
+// Note that we only have partial C++0x support yet.
+
+#include "demangle.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <limits>
+
+typedef struct {
+ const char *abbrev;
+ const char *real_name;
+ // Number of arguments in <expression> context, or 0 if disallowed.
+ int arity;
+} AbbrevPair;
+
+// List of operators from Itanium C++ ABI.
+static const AbbrevPair kOperatorList[] = {
+ // New has special syntax (not currently supported).
+ {"nw", "new", 0},
+ {"na", "new[]", 0},
+
+ // Works except that the 'gs' prefix is not supported.
+ {"dl", "delete", 1},
+ {"da", "delete[]", 1},
+
+ {"ps", "+", 1}, // "positive"
+ {"ng", "-", 1}, // "negative"
+ {"ad", "&", 1}, // "address-of"
+ {"de", "*", 1}, // "dereference"
+ {"co", "~", 1},
+
+ {"pl", "+", 2},
+ {"mi", "-", 2},
+ {"ml", "*", 2},
+ {"dv", "/", 2},
+ {"rm", "%", 2},
+ {"an", "&", 2},
+ {"or", "|", 2},
+ {"eo", "^", 2},
+ {"aS", "=", 2},
+ {"pL", "+=", 2},
+ {"mI", "-=", 2},
+ {"mL", "*=", 2},
+ {"dV", "/=", 2},
+ {"rM", "%=", 2},
+ {"aN", "&=", 2},
+ {"oR", "|=", 2},
+ {"eO", "^=", 2},
+ {"ls", "<<", 2},
+ {"rs", ">>", 2},
+ {"lS", "<<=", 2},
+ {"rS", ">>=", 2},
+ {"eq", "==", 2},
+ {"ne", "!=", 2},
+ {"lt", "<", 2},
+ {"gt", ">", 2},
+ {"le", "<=", 2},
+ {"ge", ">=", 2},
+ {"nt", "!", 1},
+ {"aa", "&&", 2},
+ {"oo", "||", 2},
+ {"pp", "++", 1},
+ {"mm", "--", 1},
+ {"cm", ",", 2},
+ {"pm", "->*", 2},
+ {"pt", "->", 0}, // Special syntax
+ {"cl", "()", 0}, // Special syntax
+ {"ix", "[]", 2},
+ {"qu", "?", 3},
+ {"st", "sizeof", 0}, // Special syntax
+ {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
+ {nullptr, nullptr, 0},
+};
+
+// List of builtin types from Itanium C++ ABI.
+static const AbbrevPair kBuiltinTypeList[] = {
+ { "v", "void", 0},
+ { "w", "wchar_t", 0},
+ { "b", "bool", 0},
+ { "c", "char", 0},
+ { "a", "signed char", 0},
+ { "h", "unsigned char", 0},
+ { "s", "short", 0},
+ { "t", "unsigned short", 0},
+ { "i", "int", 0},
+ { "j", "unsigned int", 0},
+ { "l", "long", 0},
+ { "m", "unsigned long", 0},
+ { "x", "long long", 0},
+ { "y", "unsigned long long", 0},
+ { "n", "__int128", 0},
+ { "o", "unsigned __int128", 0},
+ { "f", "float", 0},
+ { "d", "double", 0},
+ { "e", "long double", 0},
+ { "g", "__float128", 0},
+ { "z", "ellipsis", 0},
+ { nullptr, nullptr, 0}
+};
+
+// List of substitutions Itanium C++ ABI.
+static const AbbrevPair kSubstitutionList[] = {
+ { "St", "", 0},
+ { "Sa", "allocator", 0},
+ { "Sb", "basic_string", 0},
+ // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
+ { "Ss", "string", 0},
+ // std::basic_istream<char, std::char_traits<char> >
+ { "Si", "istream", 0},
+ // std::basic_ostream<char, std::char_traits<char> >
+ { "So", "ostream", 0},
+ // std::basic_iostream<char, std::char_traits<char> >
+ { "Sd", "iostream", 0},
+ { nullptr, nullptr, 0}
+};
+
+// State needed for demangling. This struct is copied in almost every stack
+// frame, so every byte counts.
+typedef struct {
+ int mangled_idx; // Cursor of mangled name.
+ int out_cur_idx; // Cursor of output string.
+ int prev_name_idx; // For constructors/destructors.
+ signed int prev_name_length : 16; // For constructors/destructors.
+ signed int nest_level : 15; // For nested names.
+ unsigned int append : 1; // Append flag.
+ // Note: for some reason MSVC can't pack "bool append : 1" into the same int
+ // with the above two fields, so we use an int instead. Amusingly it can pack
+ // "signed bool" as expected, but relying on that to continue to be a legal
+ // type seems ill-advised (as it's illegal in at least clang).
+} ParseState;
+
+static_assert(sizeof(ParseState) == 4 * sizeof(int),
+ "unexpected size of ParseState");
+
+// One-off state for demangling that's not subject to backtracking -- either
+// constant data, data that's intentionally immune to backtracking (steps), or
+// data that would never be changed by backtracking anyway (recursion_depth).
+//
+// Only one copy of this exists for each call to Demangle, so the size of this
+// struct is nearly inconsequential.
+typedef struct {
+ const char *mangled_begin; // Beginning of input string.
+ char *out; // Beginning of output string.
+ int out_end_idx; // One past last allowed output character.
+ int recursion_depth; // For stack exhaustion prevention: b/34269257.
+ int steps; // Cap how much work we'll do, regardless of depth.
+ ParseState parse_state; // Backtrackable state copied for most frames.
+} State;
+
+
+namespace {
+// Prevent deep recursion / stack exhaustion. b/34269257.
+// Also prevent unbounded handling of complex inputs. b/37793125.
+class ComplexityGuard {
+ public:
+ explicit ComplexityGuard(State *state) : state_(state) {
+ ++state->recursion_depth;
+ ++state->steps;
+ }
+ ~ComplexityGuard() { --state_->recursion_depth; }
+
+ // 256 levels of recursion seems like a reasonable upper limit on depth.
+ // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
+ // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
+ static constexpr int kRecursionDepthLimit = 256;
+
+ // We're trying to pick a charitable upper-limit on how many parse steps are
+ // necessary to handle something that a human could actually make use of.
+ // This is mostly in place as a bound on how much work we'll do if we are
+ // asked to demangle an mangled name from an untrusted source, so it should be
+ // much larger than the largest expected symbol, but much smaller than the
+ // amount of work we can do in, e.g., a second.
+ //
+ // Some real-world symbols from an arbitrary binary started failing between
+ // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
+ // the limit.
+ //
+ // Spending one second on 2^17 parse steps would require each step to take
+ // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
+ // under a second.
+ static constexpr int kParseStepsLimit = 1 << 17;
+
+ bool IsTooComplex() const {
+ return state_->recursion_depth > kRecursionDepthLimit ||
+ state_->steps > kParseStepsLimit;
+ }
+
+ private:
+ State *state_;
+};
+} // namespace
+
+// We don't use strlen() in libc since it's not guaranteed to be async
+// signal safe.
+static size_t StrLen(const char *str) {
+ size_t len = 0;
+ while (*str != '\0') {
+ ++str;
+ ++len;
+ }
+ return len;
+}
+
+// Returns true if "str" has at least "n" characters remaining.
+static bool AtLeastNumCharsRemaining(const char *str, int n) {
+ for (int i = 0; i < n; ++i) {
+ if (str[i] == '\0') {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns true if "str" has "prefix" as a prefix.
+static bool StrPrefix(const char *str, const char *prefix) {
+ size_t i = 0;
+ while (str[i] != '\0' && prefix[i] != '\0' &&
+ str[i] == prefix[i]) {
+ ++i;
+ }
+ return prefix[i] == '\0'; // Consumed everything in "prefix".
+}
+
+static void InitState(State *state, const char *mangled,
+ char *out, int out_size) {
+ state->mangled_begin = mangled;
+ state->out = out;
+ state->out_end_idx = out_size;
+ state->recursion_depth = 0;
+ state->steps = 0;
+
+ state->parse_state.mangled_idx = 0;
+ state->parse_state.out_cur_idx = 0;
+ state->parse_state.prev_name_idx = 0;
+ state->parse_state.prev_name_length = -1;
+ state->parse_state.nest_level = -1;
+ state->parse_state.append = true;
+}
+
+static inline const char *RemainingInput(State *state) {
+ return &state->mangled_begin[state->parse_state.mangled_idx];
+}
+
+// Returns true and advances "mangled_idx" if we find "one_char_token"
+// at "mangled_idx" position. It is assumed that "one_char_token" does
+// not contain '\0'.
+static bool ParseOneCharToken(State *state, const char one_char_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == one_char_token) {
+ ++state->parse_state.mangled_idx;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and advances "mangled_cur" if we find "two_char_token"
+// at "mangled_cur" position. It is assumed that "two_char_token" does
+// not contain '\0'.
+static bool ParseTwoCharToken(State *state, const char *two_char_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == two_char_token[0] &&
+ RemainingInput(state)[1] == two_char_token[1]) {
+ state->parse_state.mangled_idx += 2;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and advances "mangled_cur" if we find any character in
+// "char_class" at "mangled_cur" position.
+static bool ParseCharClass(State *state, const char *char_class) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == '\0') {
+ return false;
+ }
+ const char *p = char_class;
+ for (; *p != '\0'; ++p) {
+ if (RemainingInput(state)[0] == *p) {
+ ++state->parse_state.mangled_idx;
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool ParseDigit(State *state, int *digit) {
+ char c = RemainingInput(state)[0];
+ if (ParseCharClass(state, "0123456789")) {
+ if (digit != nullptr) {
+ *digit = c - '0';
+ }
+ return true;
+ }
+ return false;
+}
+
+// This function is used for handling an optional non-terminal.
+static bool Optional(bool status) {
+ (void)status;
+ return true;
+}
+
+// This function is used for handling <non-terminal>+ syntax.
+typedef bool (*ParseFunc)(State *);
+static bool OneOrMore(ParseFunc parse_func, State *state) {
+ if (parse_func(state)) {
+ while (parse_func(state)) {
+ }
+ return true;
+ }
+ return false;
+}
+
+// This function is used for handling <non-terminal>* syntax. The function
+// always returns true and must be followed by a termination token or a
+// terminating sequence not handled by parse_func (e.g.
+// ParseOneCharToken(state, 'E')).
+static bool ZeroOrMore(ParseFunc parse_func, State *state) {
+ while (parse_func(state)) {
+ }
+ return true;
+}
+
+// Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is
+// set to out_end_idx+1. The output string is ensured to
+// always terminate with '\0' as long as there is no overflow.
+static void Append(State *state, const char * const str, const int length) {
+ for (int i = 0; i < length; ++i) {
+ if (state->parse_state.out_cur_idx + 1 <
+ state->out_end_idx) { // +1 for '\0'
+ state->out[state->parse_state.out_cur_idx++] = str[i];
+ } else {
+ // signal overflow
+ state->parse_state.out_cur_idx = state->out_end_idx + 1;
+ break;
+ }
+ }
+ if (state->parse_state.out_cur_idx < state->out_end_idx) {
+ state->out[state->parse_state.out_cur_idx] =
+ '\0'; // Terminate it with '\0'
+ }
+}
+
+// We don't use equivalents in libc to avoid locale issues.
+static bool IsLower(char c) {
+ return c >= 'a' && c <= 'z';
+}
+
+static bool IsAlpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool IsDigit(char c) {
+ return c >= '0' && c <= '9';
+}
+
+// Returns true if "str" is a function clone suffix. These suffixes are used
+// by GCC 4.5.x and later versions (and our locally-modified version of GCC
+// 4.4.x) to indicate functions which have been cloned during optimization.
+// We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
+static bool IsFunctionCloneSuffix(const char *str) {
+ size_t i = 0;
+ while (str[i] != '\0') {
+ // Consume a single .<alpha>+.<digit>+ sequence.
+ if (str[i] != '.' || !IsAlpha(str[i + 1])) {
+ return false;
+ }
+ i += 2;
+ while (IsAlpha(str[i])) {
+ ++i;
+ }
+ if (str[i] != '.' || !IsDigit(str[i + 1])) {
+ return false;
+ }
+ i += 2;
+ while (IsDigit(str[i])) {
+ ++i;
+ }
+ }
+ return true; // Consumed everything in "str".
+}
+
+static bool EndsWith(State *state, const char chr) {
+ return state->parse_state.out_cur_idx > 0 &&
+ chr == state->out[state->parse_state.out_cur_idx - 1];
+}
+
+// Append "str" with some tweaks, iff "append" state is true.
+static void MaybeAppendWithLength(State *state, const char * const str,
+ const int length) {
+ if (state->parse_state.append && length > 0) {
+ // Append a space if the output buffer ends with '<' and "str"
+ // starts with '<' to avoid <<<.
+ if (str[0] == '<' && EndsWith(state, '<')) {
+ Append(state, " ", 1);
+ }
+ // Remember the last identifier name for ctors/dtors.
+ if (IsAlpha(str[0]) || str[0] == '_') {
+ state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
+ state->parse_state.prev_name_length = length;
+ }
+ Append(state, str, length);
+ }
+}
+
+// Appends a positive decimal number to the output if appending is enabled.
+static bool MaybeAppendDecimal(State *state, unsigned int val) {
+ // Max {32-64}-bit unsigned int is 20 digits.
+ constexpr size_t kMaxLength = 20;
+ char buf[kMaxLength];
+
+ // We can't use itoa or sprintf as neither is specified to be
+ // async-signal-safe.
+ if (state->parse_state.append) {
+ // We can't have a one-before-the-beginning pointer, so instead start with
+ // one-past-the-end and manipulate one character before the pointer.
+ char *p = &buf[kMaxLength];
+ do { // val=0 is the only input that should write a leading zero digit.
+ *--p = (val % 10) + '0';
+ val /= 10;
+ } while (p > buf && val != 0);
+
+ // 'p' landed on the last character we set. How convenient.
+ Append(state, p, kMaxLength - (p - buf));
+ }
+
+ return true;
+}
+
+// A convenient wrapper around MaybeAppendWithLength().
+// Returns true so that it can be placed in "if" conditions.
+static bool MaybeAppend(State *state, const char * const str) {
+ if (state->parse_state.append) {
+ int length = StrLen(str);
+ MaybeAppendWithLength(state, str, length);
+ }
+ return true;
+}
+
+// This function is used for handling nested names.
+static bool EnterNestedName(State *state) {
+ state->parse_state.nest_level = 0;
+ return true;
+}
+
+// This function is used for handling nested names.
+static bool LeaveNestedName(State *state, short prev_value) {
+ state->parse_state.nest_level = prev_value;
+ return true;
+}
+
+// Disable the append mode not to print function parameters, etc.
+static bool DisableAppend(State *state) {
+ state->parse_state.append = false;
+ return true;
+}
+
+// Restore the append mode to the previous state.
+static bool RestoreAppend(State *state, bool prev_value) {
+ state->parse_state.append = prev_value;
+ return true;
+}
+
+// Increase the nest level for nested names.
+static void MaybeIncreaseNestLevel(State *state) {
+ if (state->parse_state.nest_level > -1) {
+ ++state->parse_state.nest_level;
+ }
+}
+
+// Appends :: for nested names if necessary.
+static void MaybeAppendSeparator(State *state) {
+ if (state->parse_state.nest_level >= 1) {
+ MaybeAppend(state, "::");
+ }
+}
+
+// Cancel the last separator if necessary.
+static void MaybeCancelLastSeparator(State *state) {
+ if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
+ state->parse_state.out_cur_idx >= 2) {
+ state->parse_state.out_cur_idx -= 2;
+ state->out[state->parse_state.out_cur_idx] = '\0';
+ }
+}
+
+// Returns true if the identifier of the given length pointed to by
+// "mangled_cur" is anonymous namespace.
+static bool IdentifierIsAnonymousNamespace(State *state, int length) {
+ static const char anon_prefix[] = "_GLOBAL__N_";
+ return (length > sizeof(anon_prefix) - 1 && // Should be longer.
+ StrPrefix(RemainingInput(state), anon_prefix));
+}
+
+// Forward declarations of our parsing functions.
+static bool ParseMangledName(State *state);
+static bool ParseEncoding(State *state);
+static bool ParseName(State *state);
+static bool ParseUnscopedName(State *state);
+static bool ParseNestedName(State *state);
+static bool ParsePrefix(State *state);
+static bool ParseUnqualifiedName(State *state);
+static bool ParseSourceName(State *state);
+static bool ParseLocalSourceName(State *state);
+static bool ParseUnnamedTypeName(State *state);
+static bool ParseNumber(State *state, int *number_out);
+static bool ParseFloatNumber(State *state);
+static bool ParseSeqId(State *state);
+static bool ParseIdentifier(State *state, int length);
+static bool ParseOperatorName(State *state, int *arity);
+static bool ParseSpecialName(State *state);
+static bool ParseCallOffset(State *state);
+static bool ParseNVOffset(State *state);
+static bool ParseVOffset(State *state);
+static bool ParseCtorDtorName(State *state);
+static bool ParseDecltype(State *state);
+static bool ParseType(State *state);
+static bool ParseCVQualifiers(State *state);
+static bool ParseBuiltinType(State *state);
+static bool ParseFunctionType(State *state);
+static bool ParseBareFunctionType(State *state);
+static bool ParseClassEnumType(State *state);
+static bool ParseArrayType(State *state);
+static bool ParsePointerToMemberType(State *state);
+static bool ParseTemplateParam(State *state);
+static bool ParseTemplateTemplateParam(State *state);
+static bool ParseTemplateArgs(State *state);
+static bool ParseTemplateArg(State *state);
+static bool ParseBaseUnresolvedName(State *state);
+static bool ParseUnresolvedName(State *state);
+static bool ParseExpression(State *state);
+static bool ParseExprPrimary(State *state);
+static bool ParseExprCastValue(State *state);
+static bool ParseLocalName(State *state);
+static bool ParseLocalNameSuffix(State *state);
+static bool ParseDiscriminator(State *state);
+static bool ParseSubstitution(State *state, bool accept_std);
+
+// Implementation note: the following code is a straightforward
+// translation of the Itanium C++ ABI defined in BNF with a couple of
+// exceptions.
+//
+// - Support GNU extensions not defined in the Itanium C++ ABI
+// - <prefix> and <template-prefix> are combined to avoid infinite loop
+// - Reorder patterns to shorten the code
+// - Reorder patterns to give greedier functions precedence
+// We'll mark "Less greedy than" for these cases in the code
+//
+// Each parsing function changes the parse state and returns true on
+// success, or returns false and doesn't change the parse state (note:
+// the parse-steps counter increases regardless of success or failure).
+// To ensure that the parse state isn't changed in the latter case, we
+// save the original state before we call multiple parsing functions
+// consecutively with &&, and restore it if unsuccessful. See
+// ParseEncoding() as an example of this convention. We follow the
+// convention throughout the code.
+//
+// Originally we tried to do demangling without following the full ABI
+// syntax but it turned out we needed to follow the full syntax to
+// parse complicated cases like nested template arguments. Note that
+// implementing a full-fledged demangler isn't trivial (libiberty's
+// cp-demangle.c has +4300 lines).
+//
+// Note that (foo) in <(foo) ...> is a modifier to be ignored.
+//
+// Reference:
+// - Itanium C++ ABI
+// <https://mentorembedded.github.io/cxx-abi/abi.html#mangling>
+
+// <mangled-name> ::= _Z <encoding>
+static bool ParseMangledName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
+}
+
+// <encoding> ::= <(function) name> <bare-function-type>
+// ::= <(data) name>
+// ::= <special-name>
+static bool ParseEncoding(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ // Implementing the first two productions together as <name>
+ // [<bare-function-type>] avoids exponential blowup of backtracking.
+ //
+ // Since Optional(...) can't fail, there's no need to copy the state for
+ // backtracking.
+ if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
+ return true;
+ }
+
+ if (ParseSpecialName(state)) {
+ return true;
+ }
+ return false;
+}
+
+// <name> ::= <nested-name>
+// ::= <unscoped-template-name> <template-args>
+// ::= <unscoped-name>
+// ::= <local-name>
+static bool ParseName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseNestedName(state) || ParseLocalName(state)) {
+ return true;
+ }
+
+ // We reorganize the productions to avoid re-parsing unscoped names.
+ // - Inline <unscoped-template-name> productions:
+ // <name> ::= <substitution> <template-args>
+ // ::= <unscoped-name> <template-args>
+ // ::= <unscoped-name>
+ // - Merge the two productions that start with unscoped-name:
+ // <name> ::= <unscoped-name> [<template-args>]
+
+ ParseState copy = state->parse_state;
+ // "std<...>" isn't a valid name.
+ if (ParseSubstitution(state, /*accept_std=*/false) &&
+ ParseTemplateArgs(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Note there's no need to restore state after this since only the first
+ // subparser can fail.
+ return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
+}
+
+// <unscoped-name> ::= <unqualified-name>
+// ::= St <unqualified-name>
+static bool ParseUnscopedName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseUnqualifiedName(state)) {
+ return true;
+ }
+
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "St") &&
+ MaybeAppend(state, "std::") &&
+ ParseUnqualifiedName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <ref-qualifer> ::= R // lvalue method reference qualifier
+// ::= O // rvalue method reference qualifier
+static inline bool ParseRefQualifier(State *state) {
+ return ParseCharClass(state, "OR");
+}
+
+// <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
+// <unqualified-name> E
+// ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
+// <template-args> E
+static bool ParseNestedName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
+ Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseRefQualifier(state)) &&
+ ParsePrefix(state) && LeaveNestedName(state, copy.nest_level) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// This part is tricky. If we literally translate them to code, we'll
+// end up infinite loop. Hence we merge them to avoid the case.
+//
+// <prefix> ::= <prefix> <unqualified-name>
+// ::= <template-prefix> <template-args>
+// ::= <template-param>
+// ::= <substitution>
+// ::= # empty
+// <template-prefix> ::= <prefix> <(template) unqualified-name>
+// ::= <template-param>
+// ::= <substitution>
+static bool ParsePrefix(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ bool has_something = false;
+ while (true) {
+ MaybeAppendSeparator(state);
+ if (ParseTemplateParam(state) ||
+ ParseSubstitution(state, /*accept_std=*/true) ||
+ ParseUnscopedName(state) ||
+ // Lambda initializer scope (see
+ // http://cs/piper///depot/google3/third_party/binutils/binutils/libiberty/cp-demangle.c?l=1557).
+ (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
+ has_something = true;
+ MaybeIncreaseNestLevel(state);
+ continue;
+ }
+ MaybeCancelLastSeparator(state);
+ if (has_something && ParseTemplateArgs(state)) {
+ return ParsePrefix(state);
+ } else {
+ break;
+ }
+ }
+ return true;
+}
+
+// <unqualified-name> ::= <operator-name>
+// ::= <ctor-dtor-name>
+// ::= <source-name>
+// ::= <local-source-name> // GCC extension; see below.
+// ::= <unnamed-type-name>
+static bool ParseUnqualifiedName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ return (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) ||
+ ParseSourceName(state) || ParseLocalSourceName(state) ||
+ ParseUnnamedTypeName(state));
+}
+
+// <source-name> ::= <positive length number> <identifier>
+static bool ParseSourceName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ int length = -1;
+ if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <local-source-name> ::= L <source-name> [<discriminator>]
+//
+// References:
+// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
+// http://gcc.gnu.org/viewcvs?view=rev&revision=124467
+static bool ParseLocalSourceName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
+ Optional(ParseDiscriminator(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
+// ::= <closure-type-name>
+// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
+// <lambda-sig> ::= <(parameter) type>+
+static bool ParseUnnamedTypeName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
+ // Optionally parse the encoded value into 'which' and add 2 to get the index.
+ int which = -1;
+
+ // Unnamed type local to function or class.
+ if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
+ which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "{unnamed type#");
+ MaybeAppendDecimal(state, 2 + which);
+ MaybeAppend(state, "}");
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Closure type.
+ which = -1;
+ if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
+ OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
+ ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
+ which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "{lambda()#");
+ MaybeAppendDecimal(state, 2 + which);
+ MaybeAppend(state, "}");
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <number> ::= [n] <non-negative decimal integer>
+// If "number_out" is non-null, then *number_out is set to the value of the
+// parsed number on success.
+static bool ParseNumber(State *state, int *number_out) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ bool negative = false;
+ if (ParseOneCharToken(state, 'n')) {
+ negative = true;
+ }
+ const char *p = RemainingInput(state);
+ uint64_t number = 0;
+ for (;*p != '\0'; ++p) {
+ if (IsDigit(*p)) {
+ number = number * 10 + (*p - '0');
+ } else {
+ break;
+ }
+ }
+ // Apply the sign with uint64 arithmetic so overflows aren't UB. Gives
+ // "incorrect" results for out-of-range inputs, but negative values only
+ // appear for literals, which aren't printed.
+ if (negative) {
+ number = ~number + 1;
+ }
+ if (p != RemainingInput(state)) { // Conversion succeeded.
+ state->parse_state.mangled_idx += p - RemainingInput(state);
+ if (number_out != nullptr) {
+ // Note: possibly truncate "number".
+ *number_out = number;
+ }
+ return true;
+ }
+ return false;
+}
+
+// Floating-point literals are encoded using a fixed-length lowercase
+// hexadecimal string.
+static bool ParseFloatNumber(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ const char *p = RemainingInput(state);
+ for (;*p != '\0'; ++p) {
+ if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
+ break;
+ }
+ }
+ if (p != RemainingInput(state)) { // Conversion succeeded.
+ state->parse_state.mangled_idx += p - RemainingInput(state);
+ return true;
+ }
+ return false;
+}
+
+// The <seq-id> is a sequence number in base 36,
+// using digits and upper case letters
+static bool ParseSeqId(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ const char *p = RemainingInput(state);
+ for (;*p != '\0'; ++p) {
+ if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
+ break;
+ }
+ }
+ if (p != RemainingInput(state)) { // Conversion succeeded.
+ state->parse_state.mangled_idx += p - RemainingInput(state);
+ return true;
+ }
+ return false;
+}
+
+// <identifier> ::= <unqualified source code identifier> (of given length)
+static bool ParseIdentifier(State *state, int length) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (length < 0 || !AtLeastNumCharsRemaining(RemainingInput(state), length)) {
+ return false;
+ }
+ if (IdentifierIsAnonymousNamespace(state, length)) {
+ MaybeAppend(state, "(anonymous namespace)");
+ } else {
+ MaybeAppendWithLength(state, RemainingInput(state), length);
+ }
+ state->parse_state.mangled_idx += length;
+ return true;
+}
+
+// <operator-name> ::= nw, and other two letters cases
+// ::= cv <type> # (cast)
+// ::= v <digit> <source-name> # vendor extended operator
+static bool ParseOperatorName(State *state, int *arity) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
+ return false;
+ }
+ // First check with "cv" (cast) case.
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "cv") &&
+ MaybeAppend(state, "operator ") &&
+ EnterNestedName(state) &&
+ ParseType(state) &&
+ LeaveNestedName(state, copy.nest_level)) {
+ if (arity != nullptr) {
+ *arity = 1;
+ }
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Then vendor extended operators.
+ if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
+ ParseSourceName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Other operator names should start with a lower alphabet followed
+ // by a lower/upper alphabet.
+ if (!(IsLower(RemainingInput(state)[0]) &&
+ IsAlpha(RemainingInput(state)[1]))) {
+ return false;
+ }
+ // We may want to perform a binary search if we really need speed.
+ const AbbrevPair *p;
+ for (p = kOperatorList; p->abbrev != nullptr; ++p) {
+ if (RemainingInput(state)[0] == p->abbrev[0] &&
+ RemainingInput(state)[1] == p->abbrev[1]) {
+ if (arity != nullptr) {
+ *arity = p->arity;
+ }
+ MaybeAppend(state, "operator");
+ if (IsLower(*p->real_name)) { // new, delete, etc.
+ MaybeAppend(state, " ");
+ }
+ MaybeAppend(state, p->real_name);
+ state->parse_state.mangled_idx += 2;
+ return true;
+ }
+ }
+ return false;
+}
+
+// <special-name> ::= TV <type>
+// ::= TT <type>
+// ::= TI <type>
+// ::= TS <type>
+// ::= Tc <call-offset> <call-offset> <(base) encoding>
+// ::= GV <(object) name>
+// ::= T <call-offset> <(base) encoding>
+// G++ extensions:
+// ::= TC <type> <(offset) number> _ <(base) type>
+// ::= TF <type>
+// ::= TJ <type>
+// ::= GR <name>
+// ::= GA <encoding>
+// ::= Th <call-offset> <(base) encoding>
+// ::= Tv <call-offset> <(base) encoding>
+//
+// Note: we don't care much about them since they don't appear in
+// stack traces. The are special data.
+static bool ParseSpecialName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'T') &&
+ ParseCharClass(state, "VTIS") &&
+ ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
+ ParseCallOffset(state) && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "GV") &&
+ ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
+ ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // G++ extensions
+ if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
+ ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
+ DisableAppend(state) &&
+ ParseType(state)) {
+ RestoreAppend(state, copy.append);
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
+ ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
+ ParseCallOffset(state) && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <call-offset> ::= h <nv-offset> _
+// ::= v <v-offset> _
+static bool ParseCallOffset(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'h') &&
+ ParseNVOffset(state) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'v') &&
+ ParseVOffset(state) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <nv-offset> ::= <(offset) number>
+static bool ParseNVOffset(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ return ParseNumber(state, nullptr);
+}
+
+// <v-offset> ::= <(offset) number> _ <(virtual offset) number>
+static bool ParseVOffset(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
+ ParseNumber(state, nullptr)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <ctor-dtor-name> ::= C1 | C2 | C3
+// ::= D0 | D1 | D2
+// # GCC extensions: "unified" constructor/destructor. See
+// # https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
+// ::= C4 | D4
+static bool ParseCtorDtorName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'C') &&
+ ParseCharClass(state, "1234")) {
+ const char *const prev_name = state->out + state->parse_state.prev_name_idx;
+ MaybeAppendWithLength(state, prev_name,
+ state->parse_state.prev_name_length);
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'D') &&
+ ParseCharClass(state, "0124")) {
+ const char *const prev_name = state->out + state->parse_state.prev_name_idx;
+ MaybeAppend(state, "~");
+ MaybeAppendWithLength(state, prev_name,
+ state->parse_state.prev_name_length);
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <decltype> ::= Dt <expression> E # decltype of an id-expression or class
+// # member access (C++0x)
+// ::= DT <expression> E # decltype of an expression (C++0x)
+static bool ParseDecltype(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
+ ParseExpression(state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <type> ::= <CV-qualifiers> <type>
+// ::= P <type> # pointer-to
+// ::= R <type> # reference-to
+// ::= O <type> # rvalue reference-to (C++0x)
+// ::= C <type> # complex pair (C 2000)
+// ::= G <type> # imaginary (C 2000)
+// ::= U <source-name> <type> # vendor extended type qualifier
+// ::= <builtin-type>
+// ::= <function-type>
+// ::= <class-enum-type> # note: just an alias for <name>
+// ::= <array-type>
+// ::= <pointer-to-member-type>
+// ::= <template-template-param> <template-args>
+// ::= <template-param>
+// ::= <decltype>
+// ::= <substitution>
+// ::= Dp <type> # pack expansion of (C++0x)
+//
+static bool ParseType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // We should check CV-qualifers, and PRGC things first.
+ //
+ // CV-qualifiers overlap with some operator names, but an operator name is not
+ // valid as a type. To avoid an ambiguity that can lead to exponential time
+ // complexity, refuse to backtrack the CV-qualifiers.
+ //
+ // _Z4aoeuIrMvvE
+ // => _Z 4aoeuI rM v v E
+ // aoeu<operator%=, void, void>
+ // => _Z 4aoeuI r Mv v E
+ // aoeu<void void::* restrict>
+ //
+ // By consuming the CV-qualifiers first, the former parse is disabled.
+ if (ParseCVQualifiers(state)) {
+ const bool result = ParseType(state);
+ if (!result) state->parse_state = copy;
+ return result;
+ }
+ state->parse_state = copy;
+
+ // Similarly, these tag characters can overlap with other <name>s resulting in
+ // two different parse prefixes that land on <template-args> in the same
+ // place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by
+ // refusing to backtrack the tag characters.
+ if (ParseCharClass(state, "OPRCG")) {
+ const bool result = ParseType(state);
+ if (!result) state->parse_state = copy;
+ return result;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
+ ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseBuiltinType(state) ||
+ ParseFunctionType(state) ||
+ ParseClassEnumType(state) ||
+ ParseArrayType(state) ||
+ ParsePointerToMemberType(state) ||
+ ParseDecltype(state) ||
+ // "std" on its own isn't a type.
+ ParseSubstitution(state, /*accept_std=*/false)) {
+ return true;
+ }
+
+ if (ParseTemplateTemplateParam(state) &&
+ ParseTemplateArgs(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Less greedy than <template-template-param> <template-args>.
+ if (ParseTemplateParam(state)) {
+ return true;
+ }
+
+ return false;
+}
+
+// <CV-qualifiers> ::= [r] [V] [K]
+// We don't allow empty <CV-qualifiers> to avoid infinite loop in
+// ParseType().
+static bool ParseCVQualifiers(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ int num_cv_qualifiers = 0;
+ num_cv_qualifiers += ParseOneCharToken(state, 'r');
+ num_cv_qualifiers += ParseOneCharToken(state, 'V');
+ num_cv_qualifiers += ParseOneCharToken(state, 'K');
+ return num_cv_qualifiers > 0;
+}
+
+// <builtin-type> ::= v, etc.
+// ::= u <source-name>
+static bool ParseBuiltinType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ const AbbrevPair *p;
+ for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
+ if (RemainingInput(state)[0] == p->abbrev[0]) {
+ MaybeAppend(state, p->real_name);
+ ++state->parse_state.mangled_idx;
+ return true;
+ }
+ }
+
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <function-type> ::= F [Y] <bare-function-type> E
+static bool ParseFunctionType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'F') &&
+ Optional(ParseOneCharToken(state, 'Y')) &&
+ ParseBareFunctionType(state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <bare-function-type> ::= <(signature) type>+
+static bool ParseBareFunctionType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ DisableAppend(state);
+ if (OneOrMore(ParseType, state)) {
+ RestoreAppend(state, copy.append);
+ MaybeAppend(state, "()");
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <class-enum-type> ::= <name>
+static bool ParseClassEnumType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ return ParseName(state);
+}
+
+// <array-type> ::= A <(positive dimension) number> _ <(element) type>
+// ::= A [<(dimension) expression>] _ <(element) type>
+static bool ParseArrayType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_') && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
+ ParseOneCharToken(state, '_') && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <pointer-to-member-type> ::= M <(class) type> <(member) type>
+static bool ParsePointerToMemberType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'M') && ParseType(state) &&
+ ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <template-param> ::= T_
+// ::= T <parameter-2 non-negative number> _
+static bool ParseTemplateParam(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseTwoCharToken(state, "T_")) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true;
+ }
+
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+
+// <template-template-param> ::= <template-param>
+// ::= <substitution>
+static bool ParseTemplateTemplateParam(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ return (ParseTemplateParam(state) ||
+ // "std" on its own isn't a template.
+ ParseSubstitution(state, /*accept_std=*/false));
+}
+
+// <template-args> ::= I <template-arg>+ E
+static bool ParseTemplateArgs(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ DisableAppend(state);
+ if (ParseOneCharToken(state, 'I') &&
+ OneOrMore(ParseTemplateArg, state) &&
+ ParseOneCharToken(state, 'E')) {
+ RestoreAppend(state, copy.append);
+ MaybeAppend(state, "<>");
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <template-arg> ::= <type>
+// ::= <expr-primary>
+// ::= J <template-arg>* E # argument pack
+// ::= X <expression> E
+static bool ParseTemplateArg(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'J') &&
+ ZeroOrMore(ParseTemplateArg, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // There can be significant overlap between the following leading to
+ // exponential backtracking:
+ //
+ // <expr-primary> ::= L <type> <expr-cast-value> E
+ // e.g. L 2xxIvE 1 E
+ // <type> ==> <local-source-name> <template-args>
+ // e.g. L 2xx IvE
+ //
+ // This means parsing an entire <type> twice, and <type> can contain
+ // <template-arg>, so this can generate exponential backtracking. There is
+ // only overlap when the remaining input starts with "L <source-name>", so
+ // parse all cases that can start this way jointly to share the common prefix.
+ //
+ // We have:
+ //
+ // <template-arg> ::= <type>
+ // ::= <expr-primary>
+ //
+ // First, drop all the productions of <type> that must start with something
+ // other than 'L'. All that's left is <class-enum-type>; inline it.
+ //
+ // <type> ::= <nested-name> # starts with 'N'
+ // ::= <unscoped-name>
+ // ::= <unscoped-template-name> <template-args>
+ // ::= <local-name> # starts with 'Z'
+ //
+ // Drop and inline again:
+ //
+ // <type> ::= <unscoped-name>
+ // ::= <unscoped-name> <template-args>
+ // ::= <substitution> <template-args> # starts with 'S'
+ //
+ // Merge the first two, inline <unscoped-name>, drop last:
+ //
+ // <type> ::= <unqualified-name> [<template-args>]
+ // ::= St <unqualified-name> [<template-args>] # starts with 'S'
+ //
+ // Drop and inline:
+ //
+ // <type> ::= <operator-name> [<template-args>] # starts with lowercase
+ // ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
+ // ::= <source-name> [<template-args>] # starts with digit
+ // ::= <local-source-name> [<template-args>]
+ // ::= <unnamed-type-name> [<template-args>] # starts with 'U'
+ //
+ // One more time:
+ //
+ // <type> ::= L <source-name> [<template-args>]
+ //
+ // Likewise with <expr-primary>:
+ //
+ // <expr-primary> ::= L <type> <expr-cast-value> E
+ // ::= LZ <encoding> E # cannot overlap; drop
+ // ::= L <mangled_name> E # cannot overlap; drop
+ //
+ // By similar reasoning as shown above, the only <type>s starting with
+ // <source-name> are "<source-name> [<template-args>]". Inline this.
+ //
+ // <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
+ //
+ // Now inline both of these into <template-arg>:
+ //
+ // <template-arg> ::= L <source-name> [<template-args>]
+ // ::= L <source-name> [<template-args>] <expr-cast-value> E
+ //
+ // Merge them and we're done:
+ // <template-arg>
+ // ::= L <source-name> [<template-args>] [<expr-cast-value> E]
+ if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
+ copy = state->parse_state;
+ if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return true;
+ }
+
+ // Now that the overlapping cases can't reach this code, we can safely call
+ // both of these.
+ if (ParseType(state) ||
+ ParseExprPrimary(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <unresolved-type> ::= <template-param> [<template-args>]
+// ::= <decltype>
+// ::= <substitution>
+static inline bool ParseUnresolvedType(State *state) {
+ // No ComplexityGuard because we don't copy the state in this stack frame.
+ return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) ||
+ ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false);
+}
+
+// <simple-id> ::= <source-name> [<template-args>]
+static inline bool ParseSimpleId(State *state) {
+ // No ComplexityGuard because we don't copy the state in this stack frame.
+
+ // Note: <simple-id> cannot be followed by a parameter pack; see comment in
+ // ParseUnresolvedType.
+ return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
+}
+
+// <base-unresolved-name> ::= <source-name> [<template-args>]
+// ::= on <operator-name> [<template-args>]
+// ::= dn <destructor-name>
+static bool ParseBaseUnresolvedName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ if (ParseSimpleId(state)) {
+ return true;
+ }
+
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
+ Optional(ParseTemplateArgs(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dn") &&
+ (ParseUnresolvedType(state) || ParseSimpleId(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <unresolved-name> ::= [gs] <base-unresolved-name>
+// ::= sr <unresolved-type> <base-unresolved-name>
+// ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
+// <base-unresolved-name>
+// ::= [gs] sr <unresolved-qualifier-level>+ E
+// <base-unresolved-name>
+static bool ParseUnresolvedName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ ParseBaseUnresolvedName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
+ ParseBaseUnresolvedName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
+ ParseUnresolvedType(state) &&
+ OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ ParseTwoCharToken(state, "sr") &&
+ OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <expression> ::= <1-ary operator-name> <expression>
+// ::= <2-ary operator-name> <expression> <expression>
+// ::= <3-ary operator-name> <expression> <expression> <expression>
+// ::= cl <expression>+ E
+// ::= cv <type> <expression> # type (expression)
+// ::= cv <type> _ <expression>* E # type (expr-list)
+// ::= st <type>
+// ::= <template-param>
+// ::= <function-param>
+// ::= <expr-primary>
+// ::= dt <expression> <unresolved-name> # expr.name
+// ::= pt <expression> <unresolved-name> # expr->name
+// ::= sp <expression> # argument pack expansion
+// ::= sr <type> <unqualified-name> <template-args>
+// ::= sr <type> <unqualified-name>
+// <function-param> ::= fp <(top-level) CV-qualifiers> _
+// ::= fp <(top-level) CV-qualifiers> <number> _
+// ::= fL <number> p <(top-level) CV-qualifiers> _
+// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+static bool ParseExpression(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
+ return true;
+ }
+
+ // Object/function call expression.
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Function-param expression (level 0).
+ if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Function-param expression (level 1+).
+ if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
+ ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Parse the conversion expressions jointly to avoid re-parsing the <type> in
+ // their common prefix. Parsed as:
+ // <expression> ::= cv <type> <conversion-args>
+ // <conversion-args> ::= _ <expression>* E
+ // ::= <expression>
+ //
+ // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
+ // also needs to accept "cv <type>" in other contexts.
+ if (ParseTwoCharToken(state, "cv")) {
+ if (ParseType(state)) {
+ ParseState copy2 = state->parse_state;
+ if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy2;
+ if (ParseExpression(state)) {
+ return true;
+ }
+ }
+ } else {
+ // Parse unary, binary, and ternary operator expressions jointly, taking
+ // care not to re-parse subexpressions repeatedly. Parse like:
+ // <expression> ::= <operator-name> <expression>
+ // [<one-to-two-expressions>]
+ // <one-to-two-expressions> ::= <expression> [<expression>]
+ int arity = -1;
+ if (ParseOperatorName(state, &arity) &&
+ arity > 0 && // 0 arity => disabled.
+ (arity < 3 || ParseExpression(state)) &&
+ (arity < 2 || ParseExpression(state)) &&
+ (arity < 1 || ParseExpression(state))) {
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
+ // sizeof type
+ if (ParseTwoCharToken(state, "st") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Object and pointer member access expressions.
+ if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
+ ParseExpression(state) && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Parameter pack expansion
+ if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseUnresolvedName(state);
+}
+
+// <expr-primary> ::= L <type> <(value) number> E
+// ::= L <type> <(value) float> E
+// ::= L <mangled-name> E
+// // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
+// ::= LZ <encoding> E
+//
+// Warning, subtle: the "bug" LZ production above is ambiguous with the first
+// production where <type> starts with <local-name>, which can lead to
+// exponential backtracking in two scenarios:
+//
+// - When whatever follows the E in the <local-name> in the first production is
+// not a name, we backtrack the whole <encoding> and re-parse the whole thing.
+//
+// - When whatever follows the <local-name> in the first production is not a
+// number and this <expr-primary> may be followed by a name, we backtrack the
+// <name> and re-parse it.
+//
+// Moreover this ambiguity isn't always resolved -- for example, the following
+// has two different parses:
+//
+// _ZaaILZ4aoeuE1x1EvE
+// => operator&&<aoeu, x, E, void>
+// => operator&&<(aoeu::x)(1), void>
+//
+// To resolve this, we just do what GCC's demangler does, and refuse to parse
+// casts to <local-name> types.
+static bool ParseExprPrimary(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
+ // or fail, no backtracking.
+ if (ParseTwoCharToken(state, "LZ")) {
+ if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+
+ state->parse_state = copy;
+ return false;
+ }
+
+ // The merged cast production.
+ if (ParseOneCharToken(state, 'L') && ParseType(state) &&
+ ParseExprCastValue(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
+static bool ParseExprCastValue(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ // We have to be able to backtrack after accepting a number because we could
+ // have e.g. "7fffE", which will accept "7" as a number but then fail to find
+ // the 'E'.
+ ParseState copy = state->parse_state;
+ if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
+// ::= Z <(function) encoding> E s [<discriminator>]
+//
+// Parsing a common prefix of these two productions together avoids an
+// exponential blowup of backtracking. Parse like:
+// <local-name> := Z <encoding> E <local-name-suffix>
+// <local-name-suffix> ::= s [<discriminator>]
+// ::= <name> [<discriminator>]
+
+static bool ParseLocalNameSuffix(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ if (MaybeAppend(state, "::") && ParseName(state) &&
+ Optional(ParseDiscriminator(state))) {
+ return true;
+ }
+
+ // Since we're not going to overwrite the above "::" by re-parsing the
+ // <encoding> (whose trailing '\0' byte was in the byte now holding the
+ // first ':'), we have to rollback the "::" if the <name> parse failed.
+ if (state->parse_state.append) {
+ state->out[state->parse_state.out_cur_idx - 2] = '\0';
+ }
+
+ return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
+}
+
+static bool ParseLocalName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
+ ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <discriminator> := _ <(non-negative) number>
+static bool ParseDiscriminator(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <substitution> ::= S_
+// ::= S <seq-id> _
+// ::= St, etc.
+//
+// "St" is special in that it's not valid as a standalone name, and it *is*
+// allowed to precede a name without being wrapped in "N...E". This means that
+// if we accept it on its own, we can accept "St1a" and try to parse
+// template-args, then fail and backtrack, accept "St" on its own, then "1a" as
+// an unqualified name and re-parse the same template-args. To block this
+// exponential backtracking, we disable it with 'accept_std=false' in
+// problematic contexts.
+static bool ParseSubstitution(State *state, bool accept_std) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseTwoCharToken(state, "S_")) {
+ MaybeAppend(state, "?"); // We don't support substitutions.
+ return true;
+ }
+
+ ParseState copy = state->parse_state;
+ if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "?"); // We don't support substitutions.
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Expand abbreviations like "St" => "std".
+ if (ParseOneCharToken(state, 'S')) {
+ const AbbrevPair *p;
+ for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
+ if (RemainingInput(state)[0] == p->abbrev[1] &&
+ (accept_std || p->abbrev[1] != 't')) {
+ MaybeAppend(state, "std");
+ if (p->real_name[0] != '\0') {
+ MaybeAppend(state, "::");
+ MaybeAppend(state, p->real_name);
+ }
+ ++state->parse_state.mangled_idx;
+ return true;
+ }
+ }
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// Parse <mangled-name>, optionally followed by either a function-clone suffix
+// or version suffix. Returns true only if all of "mangled_cur" was consumed.
+static bool ParseTopLevelMangledName(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (ParseMangledName(state)) {
+ if (RemainingInput(state)[0] != '\0') {
+ // Drop trailing function clone suffix, if any.
+ if (IsFunctionCloneSuffix(RemainingInput(state))) {
+ return true;
+ }
+ // Append trailing version suffix if any.
+ // ex. _Z3foo@@GLIBCXX_3.4
+ if (RemainingInput(state)[0] == '@') {
+ MaybeAppend(state, RemainingInput(state));
+ return true;
+ }
+ return false; // Unconsumed suffix.
+ }
+ return true;
+ }
+ return false;
+}
+
+static bool Overflowed(const State *state) {
+ return state->parse_state.out_cur_idx >= state->out_end_idx;
+}
+
+// The demangler entry point.
+bool Demangle(const char *mangled, char *out, int out_size) {
+ State state;
+ InitState(&state, mangled, out, out_size);
+ return ParseTopLevelMangledName(&state) && !Overflowed(&state);
+}
diff --git a/src/demangle.h b/src/demangle.h
new file mode 100644
index 0000000..806b7fd
--- /dev/null
+++ b/src/demangle.h
@@ -0,0 +1,63 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// An async-signal-safe and thread-safe demangler for Itanium C++ ABI
+// (aka G++ V3 ABI).
+
+// The demangler is implemented to be used in async signal handlers to
+// symbolize stack traces. We cannot use libstdc++'s
+// abi::__cxa_demangle() in such signal handlers since it's not async
+// signal safe (it uses malloc() internally).
+//
+// Note that this demangler doesn't support full demangling. More
+// specifically, it doesn't print types of function parameters and
+// types of template arguments. It just skips them. However, it's
+// still very useful to extract basic information such as class,
+// function, constructor, destructor, and operator names.
+//
+// See the implementation note in demangle.cc if you are interested.
+//
+// Example:
+//
+// | Mangled Name | The Demangler | abi::__cxa_demangle()
+// |---------------|---------------|-----------------------
+// | _Z1fv | f() | f()
+// | _Z1fi | f() | f(int)
+// | _Z3foo3bar | foo() | foo(bar)
+// | _Z1fIiEvi | f<>() | void f<int>(int)
+// | _ZN1N1fE | N::f | N::f
+// | _ZN3Foo3BarEv | Foo::Bar() | Foo::Bar()
+// | _Zrm1XS_" | operator%() | operator%(X, X)
+// | _ZN3FooC1Ev | Foo::Foo() | Foo::Foo()
+// | _Z1fSs | f() | f(std::basic_string<char,
+// | | | std::char_traits<char>,
+// | | | std::allocator<char> >)
+//
+// See the unit test for more examples.
+//
+// Note: we might want to write demanglers for ABIs other than Itanium
+// C++ ABI in the future.
+//
+
+#ifndef BASE_DEMANGLE_H_
+#define BASE_DEMANGLE_H_
+
+// Demangle "mangled". On success, return true and write the
+// demangled symbol name to "out". Otherwise, return false.
+// "out" is modified even if demangling is unsuccessful.
+bool Demangle(const char *mangled, char *out, int out_size);
+
+#endif // BASE_DEMANGLE_H_
diff --git a/src/disassemble.cc b/src/disassemble.cc
new file mode 100644
index 0000000..d48cac4
--- /dev/null
+++ b/src/disassemble.cc
@@ -0,0 +1,233 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+
+#include "bloaty.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "capstone/capstone.h"
+#include "re.h"
+
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+
+using absl::string_view;
+
+namespace bloaty {
+
+namespace {
+
+static std::string RightPad(const std::string& input, size_t size) {
+ std::string ret = input;
+ while (ret.size() < size) {
+ ret += " ";
+ }
+ return ret;
+}
+
+} // anonymous namespace
+
+void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink) {
+ if (info.arch != CS_ARCH_X86) {
+ // x86 only for now.
+ return;
+ }
+
+ csh handle;
+ if (cs_open(info.arch, info.mode, &handle) != CS_ERR_OK ||
+ cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) {
+ THROW("Couldn't initialize Capstone");
+ }
+
+ if (info.text.size() == 0) {
+ THROW("Tried to disassemble empty function.");
+ }
+
+ cs_insn *in = cs_malloc(handle);
+ uint64_t address = info.start_address;
+ const uint8_t* ptr = reinterpret_cast<const uint8_t*>(info.text.data());
+ size_t size = info.text.size();
+
+ while (size > 0) {
+ if (!cs_disasm_iter(handle, &ptr, &size, &address, in)) {
+ // Some symbols that end up in the .text section aren't really functions
+ // but data. Not sure why this happens.
+ if (verbose_level > 1) {
+ printf("Error disassembling function at address: %" PRIx64 "\n",
+ address);
+ }
+ goto cleanup;
+ }
+
+ size_t count = in->detail->x86.op_count;
+ for (size_t i = 0; i < count; i++) {
+ cs_x86_op* op = &in->detail->x86.operands[i];
+ if (op->type == X86_OP_MEM && op->mem.base == X86_REG_RIP &&
+ op->mem.segment == X86_REG_INVALID &&
+ op->mem.index == X86_REG_INVALID) {
+ uint64_t to_address = in->address + in->size + op->mem.disp;
+ if (to_address) {
+ sink->AddVMRangeForVMAddr("x86_disassemble", in->address, to_address,
+ RangeSink::kUnknownSize);
+ }
+ }
+ }
+ }
+
+cleanup:
+ cs_free(in, 1);
+ cs_close(&handle);
+}
+
+bool TryGetJumpTarget(cs_arch arch, cs_insn *in, uint64_t* target) {
+ switch (arch) {
+ case CS_ARCH_X86:
+ switch (in->id) {
+ case X86_INS_JAE:
+ case X86_INS_JA:
+ case X86_INS_JBE:
+ case X86_INS_JB:
+ case X86_INS_JCXZ:
+ case X86_INS_JECXZ:
+ case X86_INS_JE:
+ case X86_INS_JGE:
+ case X86_INS_JG:
+ case X86_INS_JLE:
+ case X86_INS_JL:
+ case X86_INS_JMP:
+ case X86_INS_JNE:
+ case X86_INS_JNO:
+ case X86_INS_JNP:
+ case X86_INS_JNS:
+ case X86_INS_JO:
+ case X86_INS_JP:
+ case X86_INS_JS:
+ case X86_INS_CALL: {
+ auto op0 = in->detail->x86.operands[0];
+ if (op0.type == X86_OP_IMM) {
+ *target = op0.imm;
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+std::string DisassembleFunction(const DisassemblyInfo& info) {
+ std::string ret;
+
+ csh handle;
+ if (cs_open(info.arch, info.mode, &handle) != CS_ERR_OK ||
+ cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) {
+ THROW("Couldn't initialize Capstone");
+ }
+
+ if (info.text.size() == 0) {
+ THROW("Tried to disassemble empty function.");
+ }
+
+ cs_insn *insn;
+ size_t count =
+ cs_disasm(handle, reinterpret_cast<const uint8_t *>(info.text.data()),
+ info.text.size(), info.start_address, 0, &insn);
+
+ if (count == 0) {
+ THROW("Error disassembling function.");
+ }
+
+ std::map<uint64_t, int> local_labels;
+
+ for (size_t i = 0; i < count; i++) {
+ cs_insn *in = insn + i;
+ uint64_t target;
+ if (TryGetJumpTarget(info.arch, in, &target) &&
+ target >= info.start_address &&
+ target < info.start_address + info.text.size()) {
+ local_labels[target] = 0; // Fill in real value later.
+ }
+ }
+
+ int label = 0;
+ for (auto& pair : local_labels) {
+ pair.second = label++;
+ }
+
+ for (size_t i = 0; i < count; i++) {
+ cs_insn *in = insn + i;
+ std::string bytes = absl::BytesToHexString(
+ string_view(reinterpret_cast<const char*>(in->bytes), in->size));
+ string_view mnemonic(in->mnemonic);
+ std::string op_str(in->op_str);
+ std::string match;
+ std::string label;
+
+ if (info.arch == CS_ARCH_X86) {
+ if (in->id == X86_INS_LEA) {
+ ReImpl::GlobalReplace(&op_str, "\\w?word ptr ", "");
+ } else if (in->id == X86_INS_NOP) {
+ op_str.clear();
+ } else {
+ // qword ptr => QWORD
+ while (ReImpl::PartialMatch(op_str, "(\\w?word) ptr", &match)) {
+ std::string upper_match = match;
+ absl::AsciiStrToUpper(&upper_match);
+ ReImpl::Replace(&op_str, match + " ptr", upper_match);
+ }
+ }
+ }
+
+ ReImpl::GlobalReplace(&op_str, " ", "");
+
+ auto iter = local_labels.find(in->address);
+ if (iter != local_labels.end()) {
+ label = std::to_string(iter->second) + ":";
+ }
+
+ uint64_t target;
+ if (TryGetJumpTarget(info.arch, in, &target)) {
+ auto iter = local_labels.find(target);
+ std::string label;
+ if (iter != local_labels.end()) {
+ if (target > in->address) {
+ op_str = ">" + std::to_string(iter->second);
+ } else {
+ op_str = "<" + std::to_string(iter->second);
+ }
+ } else if (info.symbol_map.vm_map.TryGetLabel(target, &label)) {
+ op_str = label;
+ }
+ }
+
+ absl::StrAppend(&ret, " ", RightPad(label, 4),
+ RightPad(std::string(mnemonic), 8), " ", op_str, "\n");
+ }
+
+ cs_close(&handle);
+ return ret;
+}
+
+} // namespace bloaty
diff --git a/src/dwarf.cc b/src/dwarf.cc
new file mode 100644
index 0000000..9ca0d4c
--- /dev/null
+++ b/src/dwarf.cc
@@ -0,0 +1,2159 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <initializer_list>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <stack>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "absl/types/optional.h"
+#include "bloaty.h"
+#include "bloaty.pb.h"
+#include "dwarf_constants.h"
+
+using namespace dwarf2reader;
+using absl::string_view;
+
+static size_t AlignUpTo(size_t offset, size_t granularity) {
+ // Granularity must be a power of two.
+ return (offset + granularity - 1) & ~(granularity - 1);
+}
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+
+namespace bloaty {
+
+extern int verbose_level;
+
+namespace dwarf {
+
+int DivRoundUp(int n, int d) {
+ return (n + (d - 1)) / d;
+}
+
+namespace {
+
+// uint64/32 max is a tombstone value added by https://reviews.llvm.org/D81784.
+bool IsValidDwarfAddress(uint64_t addr, uint8_t address_size) {
+ if (addr == 0)
+ return false;
+ if (address_size == 4 && addr == std::numeric_limits<uint32_t>::max())
+ return false;
+ if (address_size == 8 && addr == std::numeric_limits<uint64_t>::max())
+ return false;
+ return true;
+}
+
+} // namespace
+
+// Low-level Parsing Routines //////////////////////////////////////////////////
+
+// For parsing the low-level values found in DWARF files. These are the only
+// routines that touch the bytes of the input buffer directly. Everything else
+// is layered on top of these.
+
+template <class T>
+T ReadMemcpy(string_view* data) {
+ T ret;
+ if (data->size() < sizeof(T)) {
+ THROW("premature EOF reading fixed-length DWARF data");
+ }
+ memcpy(&ret, data->data(), sizeof(T));
+ data->remove_prefix(sizeof(T));
+ return ret;
+}
+
+string_view ReadPiece(size_t bytes, string_view* data) {
+ if(data->size() < bytes) {
+ THROW("premature EOF reading variable-length DWARF data");
+ }
+ string_view ret = data->substr(0, bytes);
+ data->remove_prefix(bytes);
+ return ret;
+}
+
+void SkipBytes(size_t bytes, string_view* data) {
+ if (data->size() < bytes) {
+ THROW("premature EOF skipping DWARF data");
+ }
+ data->remove_prefix(bytes);
+}
+
+string_view ReadNullTerminated(string_view* data) {
+ const char* nullz =
+ static_cast<const char*>(memchr(data->data(), '\0', data->size()));
+
+ // Return false if not NULL-terminated.
+ if (nullz == NULL) {
+ THROW("DWARF string was not NULL-terminated");
+ }
+
+ size_t len = nullz - data->data();
+ string_view val = data->substr(0, len);
+ data->remove_prefix(len + 1); // Remove NULL also.
+ return val;
+}
+
+void SkipNullTerminated(string_view* data) {
+ const char* nullz =
+ static_cast<const char*>(memchr(data->data(), '\0', data->size()));
+
+ // Return false if not NULL-terminated.
+ if (nullz == NULL) {
+ THROW("DWARF string was not NULL-terminated");
+ }
+
+ size_t len = nullz - data->data();
+ data->remove_prefix(len + 1); // Remove NULL also.
+}
+
+// Parses the LEB128 format defined by DWARF (both signed and unsigned
+// versions).
+
+uint64_t ReadLEB128Internal(bool is_signed, string_view* data) {
+ uint64_t ret = 0;
+ int shift = 0;
+ int maxshift = 70;
+ const char* ptr = data->data();
+ const char* limit = ptr + data->size();
+
+ while (ptr < limit && shift < maxshift) {
+ char byte = *(ptr++);
+ ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0) {
+ data->remove_prefix(ptr - data->data());
+ if (is_signed && shift < 64 && (byte & 0x40)) {
+ ret |= -(1ULL << shift);
+ }
+ return ret;
+ }
+ }
+
+ THROW("corrupt DWARF data, unterminated LEB128");
+}
+
+template <typename T>
+T ReadLEB128(string_view* data) {
+ typedef typename std::conditional<std::is_signed<T>::value, int64_t,
+ uint64_t>::type Int64Type;
+ Int64Type val = ReadLEB128Internal(std::is_signed<T>::value, data);
+ if (val > std::numeric_limits<T>::max() ||
+ val < std::numeric_limits<T>::min()) {
+ THROW("DWARF data contained larger LEB128 than we were expecting");
+ }
+ return static_cast<T>(val);
+}
+
+void SkipLEB128(string_view* data) {
+ size_t limit =
+ std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10));
+ for (size_t i = 0; i < limit; i++) {
+ if (((*data)[i] & 0x80) == 0) {
+ data->remove_prefix(i + 1);
+ return;
+ }
+ }
+
+ THROW("corrupt DWARF data, unterminated LEB128");
+}
+
+// Some size information attached to each compilation unit. The size of an
+// address or offset in the DWARF data depends on this state which is parsed
+// from the header.
+class CompilationUnitSizes {
+ public:
+ // When true, DWARF offsets are 64 bits, otherwise they are 32 bit.
+ bool dwarf64() const { return dwarf64_; }
+
+ // The size of addresses. Guaranteed to be either 4 or 8.
+ uint8_t address_size() const { return address_size_; }
+
+ // DWARF version of this unit.
+ uint8_t dwarf_version() const { return dwarf_version_; }
+
+ void SetAddressSize(uint8_t address_size) {
+ if (address_size != 4 && address_size != 8) {
+ THROWF("Unexpected address size: $0", address_size);
+ }
+ address_size_ = address_size;
+ }
+
+ // To allow this as the key in a map.
+ bool operator<(const CompilationUnitSizes& rhs) const {
+ return std::tie(dwarf64_, address_size_) <
+ std::tie(rhs.dwarf64_, rhs.address_size_);
+ }
+
+ // Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64
+ // format.
+ uint64_t ReadDWARFOffset(string_view* data) const {
+ if (dwarf64_) {
+ return ReadMemcpy<uint64_t>(data);
+ } else {
+ return ReadMemcpy<uint32_t>(data);
+ }
+ }
+
+ // Reads an address according to the expected address_size.
+ uint64_t ReadAddress(string_view* data) const {
+ if (address_size_ == 8) {
+ return ReadMemcpy<uint64_t>(data);
+ } else if (address_size_ == 4) {
+ return ReadMemcpy<uint32_t>(data);
+ } else {
+ BLOATY_UNREACHABLE();
+ }
+ }
+
+ // Reads an "initial length" as specified in many DWARF headers. This
+ // contains either a 32-bit or a 64-bit length, and signals whether we are
+ // using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately).
+ //
+ // Returns the range for this section and stores the remaining data
+ // in |remaining|.
+ string_view ReadInitialLength(string_view* remaining) {
+ uint64_t len = ReadMemcpy<uint32_t>(remaining);
+
+ if (len == 0xffffffff) {
+ dwarf64_ = true;
+ len = ReadMemcpy<uint64_t>(remaining);
+ } else {
+ dwarf64_ = false;
+ }
+
+ if (remaining->size() < len) {
+ THROW("short DWARF compilation unit");
+ }
+
+ string_view unit = *remaining;
+ unit.remove_suffix(remaining->size() - len);
+ *remaining = remaining->substr(len);
+ return unit;
+ }
+
+ void ReadDWARFVersion(string_view* data) {
+ dwarf_version_ = ReadMemcpy<uint16_t>(data);
+ }
+
+ private:
+ uint16_t dwarf_version_;
+ bool dwarf64_;
+ uint8_t address_size_;
+};
+
+
+// AbbrevTable /////////////////////////////////////////////////////////////////
+
+// Parses and stores a representation of (a portion of) the .debug_abbrev
+// section of a DWARF file. An abbreviation is defined by a unique "code"
+// (unique within one table), and defines the DIE tag and set of attributes.
+// The encoding of the DIE then contains just the abbreviation code and the
+// attribute values -- thanks to the abbreviation table, the tag and attribute
+// keys/names are not required.
+//
+// The abbreviations are an internal detail of the DWARF format and users should
+// not need to care about them.
+
+class AbbrevTable {
+ public:
+ // Reads abbreviations until a terminating abbreviation is seen.
+ string_view ReadAbbrevs(string_view data);
+
+ // In a DWARF abbreviation, each attribute has a name and a form.
+ struct Attribute {
+ uint16_t name;
+ uint8_t form;
+ };
+
+ // The representation of a single abbreviation.
+ struct Abbrev {
+ uint32_t code;
+ uint16_t tag;
+ bool has_child;
+ std::vector<Attribute> attr;
+ };
+
+ bool IsEmpty() const { return abbrev_.empty(); }
+
+ // Looks for an abbreviation with the given code. Returns true if the lookup
+ // succeeded.
+ bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const {
+ auto it = abbrev_.find(code);
+ if (it != abbrev_.end()) {
+ *abbrev = &it->second;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ // Keyed by abbreviation code.
+ // Generally we expect these to be small, so we could almost use a vector<>.
+ // But you never know what crazy input data is going to do...
+ std::unordered_map<uint32_t, Abbrev> abbrev_;
+};
+
+string_view AbbrevTable::ReadAbbrevs(string_view data) {
+ while (true) {
+ uint32_t code = ReadLEB128<uint32_t>(&data);
+
+ if (code == 0) {
+ return data; // Terminator entry.
+ }
+
+ Abbrev& abbrev = abbrev_[code];
+
+ if (abbrev.code) {
+ THROW("DWARF data contained duplicate abbrev code");
+ }
+
+ uint8_t has_child;
+
+ abbrev.code = code;
+ abbrev.tag = ReadLEB128<uint16_t>(&data);
+ has_child = ReadMemcpy<uint8_t>(&data);
+
+ switch (has_child) {
+ case DW_children_yes:
+ abbrev.has_child = true;
+ break;
+ case DW_children_no:
+ abbrev.has_child = false;
+ break;
+ default:
+ THROW("DWARF has_child is neither true nor false.");
+ }
+
+ while (true) {
+ Attribute attr;
+ attr.name = ReadLEB128<uint16_t>(&data);
+ attr.form = ReadLEB128<uint8_t>(&data);
+
+ if (attr.name == 0 && attr.form == 0) {
+ break; // End of this abbrev
+ }
+
+ abbrev.attr.push_back(attr);
+ }
+ }
+}
+
+
+// StringTable /////////////////////////////////////////////////////////////////
+
+// Represents the .debug_str portion of a DWARF file and contains code for
+// reading strings out of it. This is an internal detail of the DWARF format
+// and users should not need to care about it.
+
+class StringTable {
+ public:
+ // Construct with the debug_str data from a DWARF file.
+ StringTable(string_view debug_str) : debug_str_(debug_str) {}
+
+ // Read a string from the table.
+ string_view ReadEntry(size_t ofs) const;
+
+ private:
+ string_view debug_str_;
+};
+
+string_view StringTable::ReadEntry(size_t ofs) const {
+ string_view str = debug_str_;
+ SkipBytes(ofs, &str);
+ return ReadNullTerminated(&str);
+}
+
+
+// AddressRanges ///////////////////////////////////////////////////////////////
+
+// Code for reading address ranges out of .debug_aranges.
+
+class AddressRanges {
+ public:
+ AddressRanges(string_view data) : section_(data), next_unit_(data) {}
+
+ // Offset into .debug_info for the current compilation unit.
+ uint64_t debug_info_offset() { return debug_info_offset_; }
+
+ // Address and length for this range.
+ uint64_t address() { return address_; }
+ uint64_t length() { return length_; }
+
+ // Advance to the next range. The values will be available in address() and
+ // length(). Returns false when the end of this compilation unit is hit.
+ // Must call this once before reading the first range.
+ bool NextRange();
+
+ // Advance to the next compilation unit. The unit offset will be available in
+ // debug_info_offset(). Must call this once before reading the first unit.
+ bool NextUnit();
+
+ uint8_t address_size() const { return sizes_.address_size(); }
+
+ private:
+ CompilationUnitSizes sizes_;
+ string_view section_;
+ string_view unit_remaining_;
+ string_view next_unit_;
+ uint64_t debug_info_offset_;
+ uint64_t address_;
+ uint64_t length_;
+};
+
+bool AddressRanges::NextRange() {
+ if (unit_remaining_.empty()) {
+ return false;
+ }
+
+ address_ = sizes_.ReadAddress(&unit_remaining_);
+ length_ = sizes_.ReadAddress(&unit_remaining_);
+ return true;
+}
+
+bool AddressRanges::NextUnit() {
+ if (next_unit_.empty()) {
+ return false;
+ }
+
+ unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
+ sizes_.ReadDWARFVersion(&unit_remaining_);
+
+ if (sizes_.dwarf_version() > 4) {
+ THROW("DWARF data is too new for us");
+ }
+
+ debug_info_offset_ = sizes_.ReadDWARFOffset(&unit_remaining_);
+
+ uint8_t segment_size;
+
+ sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
+ segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);
+
+ if (segment_size) {
+ THROW("we don't know how to handle segmented addresses.");
+ }
+
+ size_t ofs = unit_remaining_.data() - section_.data();
+ size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
+ SkipBytes(aligned_ofs - ofs, &unit_remaining_);
+ return true;
+}
+
+
+// LocationList ////////////////////////////////////////////////////////////////
+
+// Code for reading entries out of a location list.
+// For the moment we only care about finding the bounds of a list given its
+// offset, so we don't actually vend any of the data.
+
+class LocationList {
+ public:
+ LocationList(CompilationUnitSizes sizes, string_view data)
+ : sizes_(sizes), remaining_(data) {}
+
+ const char* read_offset() const { return remaining_.data(); }
+ bool NextEntry();
+
+ private:
+ CompilationUnitSizes sizes_;
+ string_view remaining_;
+};
+
+bool LocationList::NextEntry() {
+ uint64_t start, end;
+ start = sizes_.ReadAddress(&remaining_);
+ end = sizes_.ReadAddress(&remaining_);
+ if (start == 0 && end == 0) {
+ return false;
+ } else if (start == UINT64_MAX ||
+ (start == UINT32_MAX && sizes_.address_size() == 4)) {
+ // Base address selection, nothing more to do.
+ } else {
+ // Need to skip the location description.
+ uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
+ SkipBytes(length, &remaining_);
+ }
+ return true;
+}
+
+string_view GetLocationListRange(CompilationUnitSizes sizes,
+ string_view available) {
+ LocationList list(sizes, available);
+ while (list.NextEntry()) {}
+ return available.substr(0, list.read_offset() - available.data());
+}
+
+
+// RangeList ///////////////////////////////////////////////////////////////////
+
+// Code for reading entries out of a range list.
+// For the moment we only care about finding the bounds of a list given its
+// offset, so we don't actually vend any of the data.
+
+class RangeList {
+ public:
+ RangeList(CompilationUnitSizes sizes, string_view data)
+ : sizes_(sizes), remaining_(data) {}
+
+ const char* read_offset() const { return remaining_.data(); }
+ bool NextEntry();
+
+ private:
+ CompilationUnitSizes sizes_;
+ string_view remaining_;
+};
+
+bool RangeList::NextEntry() {
+ uint64_t start, end;
+ start = sizes_.ReadAddress(&remaining_);
+ end = sizes_.ReadAddress(&remaining_);
+ if (start == 0 && end == 0) {
+ return false;
+ }
+ return true;
+}
+
+string_view GetRangeListRange(CompilationUnitSizes sizes,
+ string_view available) {
+ RangeList list(sizes, available);
+ while (list.NextEntry()) {
+ }
+ return available.substr(0, list.read_offset() - available.data());
+}
+
+// DIEReader ///////////////////////////////////////////////////////////////////
+
+// Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
+// .debug_info or .debug_types section of a binary.
+//
+// Each DIE contains a tag and a set of attribute/value pairs. We rely on the
+// abbreviations in an AbbrevTable to decode the DIEs.
+
+class DIEReader {
+ public:
+ // Constructs a new DIEReader. Cannot be used until you call one of the
+ // Seek() methods below.
+ DIEReader(const File& file) : dwarf_(file) {}
+
+ // Returns true if we are at the end of DIEs for this compilation unit.
+ bool IsEof() const { return state_ == State::kEof; }
+
+ // DIEs exist in both .debug_info and .debug_types.
+ enum class Section {
+ kDebugInfo,
+ kDebugTypes
+ };
+
+ // Seeks to the overall start or the start of a specific compilation unit.
+ // Note that |header_offset| is the offset of the compilation unit *header*,
+ // not the offset of the first DIE.
+ bool SeekToCompilationUnit(Section section, uint64_t header_offset);
+ bool SeekToStart(Section section) {
+ return SeekToCompilationUnit(section, 0);
+ }
+
+ bool NextCompilationUnit();
+
+ // Advances to the next overall DIE, ignoring whether it happens to be a
+ // child, a sibling, or an uncle/aunt. Returns false at error or EOF.
+ bool NextDIE();
+
+ // Skips children of the current DIE, so that the next call to NextDIE()
+ // will read the next sibling (or parent, if no sibling exists).
+ bool SkipChildren();
+
+ const AbbrevTable::Abbrev& GetAbbrev() const {
+ assert(!IsEof());
+ return *current_abbrev_;
+ }
+
+ // Returns the current read offset within the current compilation unit.
+ int64_t GetReadOffset() const { return remaining_.data() - start_; }
+
+ int GetDepth() const { return depth_; }
+
+ // Returns the tag of the current DIE.
+ // Requires that ReadCode() has been called at least once.
+ uint16_t GetTag() const { return GetAbbrev().tag; }
+
+ // Returns whether the current DIE has a child.
+ // Requires that ReadCode() has been called at least once.
+ bool HasChild() const { return GetAbbrev().has_child; }
+
+ const File& dwarf() const { return dwarf_; }
+
+ string_view unit_range() const { return unit_range_; }
+ CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
+ uint32_t abbrev_version() const { return abbrev_version_; }
+ uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
+
+ // If both compileunit_name and strp_sink are set, this will automatically
+ // call strp_sink->AddFileRange(compileunit_name, <string range>) for every
+ // DW_FORM_strp attribute encountered. These strings occur in the .debug_str
+ // section.
+ void set_compileunit_name(absl::string_view name) {
+ unit_name_ = std::string(name);
+ }
+ void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }
+
+ void AddIndirectString(string_view range) const {
+ if (strp_sink_) {
+ strp_sink_->AddFileRange("dwarf_strp", unit_name_, range);
+ }
+ }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
+
+ template<typename> friend class AttrReader;
+
+ // APIs for our friends to use to update our state.
+
+ // Call to get the current read head where attributes should be parsed.
+ string_view ReadAttributesBegin() {
+ assert(state_ == State::kReadyToReadAttributes);
+ return remaining_;
+ }
+
+ // When some data has been parsed, this updates our read head.
+ bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
+ assert(state_ == State::kReadyToReadAttributes);
+ if (remaining.data() == nullptr) {
+ THROW("premature EOF reading DWARF attributes");
+ } else {
+ remaining_ = remaining;
+ sibling_offset_ = sibling;
+ state_ = State::kReadyToNext;
+ return true;
+ }
+ }
+
+ // Internal APIs.
+
+ bool ReadCompilationUnitHeader();
+ bool ReadCode();
+ void SkipNullEntries();
+
+ enum class State {
+ kReadyToReadAttributes,
+ kReadyToNext,
+ kEof,
+ } state_;
+
+ std::string error_;
+
+ const File& dwarf_;
+ RangeSink* strp_sink_ = nullptr;
+ const char *start_ = nullptr;
+
+ // Abbreviation for the current entry.
+ const AbbrevTable::Abbrev* current_abbrev_;
+
+ // Our current read position.
+ string_view remaining_;
+ uint64_t sibling_offset_;
+ int depth_ = 0;
+
+ // Data for the next compilation unit.
+ string_view next_unit_;
+
+ // All of the AbbrevTables we've read from .debug_abbrev, indexed by their
+ // offset within .debug_abbrev.
+ std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_;
+
+ // Whether we are in .debug_types or .debug_info.
+ Section section_;
+
+ // Information about the current compilation unit.
+ uint64_t debug_abbrev_offset_;
+ std::string unit_name_;
+ string_view unit_range_;
+ CompilationUnitSizes unit_sizes_;
+ AbbrevTable* unit_abbrev_;
+
+ // A small integer that uniquely identifies the combination of unit_abbrev_
+ // and unit_sizes_. Attribute readers use this to know when they can reuse an
+ // existing (abbrev code) -> (Actions) mapping, since this table depends on
+ // both the current abbrev. table and the sizes.
+ uint32_t abbrev_version_;
+
+ std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t>
+ abbrev_versions_;
+
+ // Only for .debug_types
+ uint64_t unit_type_signature_;
+ uint64_t unit_type_offset_;
+};
+
+void DIEReader::SkipNullEntries() {
+ while (!remaining_.empty() && remaining_[0] == 0) {
+ // null entry terminates a chain of sibling entries.
+ remaining_.remove_prefix(1);
+ depth_--;
+ }
+}
+
+bool DIEReader::ReadCode() {
+ SkipNullEntries();
+ if (remaining_.empty()) {
+ state_ = State::kEof;
+ return false;
+ }
+ uint32_t code = ReadLEB128<uint32_t>(&remaining_);
+ if (!unit_abbrev_->GetAbbrev(code, &current_abbrev_)) {
+ THROW("couldn't find abbreviation for code");
+ }
+ state_ = State::kReadyToReadAttributes;
+ sibling_offset_ = 0;
+
+ if (HasChild()) {
+ depth_++;
+ }
+
+ return true;
+}
+
+bool DIEReader::NextCompilationUnit() {
+ return ReadCompilationUnitHeader();
+}
+
+bool DIEReader::NextDIE() {
+ if (state_ == State::kEof) {
+ return false;
+ }
+
+ assert(state_ == State::kReadyToNext);
+ return ReadCode();
+}
+
+bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) {
+ section_ = section;
+
+ if (section == Section::kDebugInfo) {
+ next_unit_ = dwarf_.debug_info;
+ } else {
+ next_unit_ = dwarf_.debug_types;
+ }
+
+ start_ = next_unit_.data();
+ SkipBytes(offset, &next_unit_);
+ return ReadCompilationUnitHeader();
+}
+
+bool DIEReader::ReadCompilationUnitHeader() {
+ if (next_unit_.empty()) {
+ state_ = State::kEof;
+ return false;
+ }
+
+ unit_range_ = next_unit_;
+ remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
+ unit_range_ = unit_range_.substr(
+ 0, remaining_.size() + (remaining_.data() - unit_range_.data()));
+
+ unit_sizes_.ReadDWARFVersion(&remaining_);
+
+ if (unit_sizes_.dwarf_version() > 4) {
+ THROW("Data is in new DWARF format we don't understand");
+ }
+
+ debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
+
+ // If we haven't already read abbreviations for this debug_abbrev_offset_, we
+ // need to do so now.
+ if (unit_abbrev_->IsEmpty()) {
+ string_view abbrev_data = dwarf_.debug_abbrev;
+ SkipBytes(debug_abbrev_offset_, &abbrev_data);
+ unit_abbrev_->ReadAbbrevs(abbrev_data);
+ }
+
+ unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));
+
+ if (section_ == Section::kDebugTypes) {
+ unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
+ unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ }
+
+ auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
+ auto insert_pair = abbrev_versions_.insert(
+ std::make_pair(abbrev_id, abbrev_versions_.size()));
+
+ // This will be either the newly inserted value or the existing one, if there
+ // was one.
+ abbrev_version_ = insert_pair.first->second;
+
+ return ReadCode();
+}
+
+
+// DWARF form parsing //////////////////////////////////////////////////////////
+
+class AttrValue {
+ public:
+ AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
+ AttrValue(string_view val) : string_(val), type_(Type::kString) {}
+
+ enum class Type {
+ kUint,
+ kString
+ };
+
+ Type type() const { return type_; }
+ bool IsUint() const { return type_ == Type::kUint; }
+ bool IsString() const { return type_ == Type::kString; }
+
+ absl::optional<uint64_t> ToUint() const {
+ if (IsUint()) return uint_;
+ string_view str = string_;
+ switch (str.size()) {
+ case 1:
+ return ReadMemcpy<uint8_t>(&str);
+ case 2:
+ return ReadMemcpy<uint8_t>(&str);
+ case 4:
+ return ReadMemcpy<uint32_t>(&str);
+ case 8:
+ return ReadMemcpy<uint64_t>(&str);
+ }
+ return absl::nullopt;
+ }
+
+ uint64_t GetUint() const {
+ assert(type_ == Type::kUint);
+ return uint_;
+ }
+
+ string_view GetString() const {
+ assert(type_ == Type::kString);
+ return string_;
+ }
+
+ private:
+ union {
+ uint64_t uint_;
+ string_view string_;
+ };
+
+ Type type_;
+};
+
+template <class D>
+string_view ReadBlock(string_view* data) {
+ D len = ReadMemcpy<D>(data);
+ return ReadPiece(len, data);
+}
+
+string_view ReadVariableBlock(string_view* data) {
+ uint64_t len = ReadLEB128<uint64_t>(data);
+ return ReadPiece(len, data);
+}
+
+template <class D>
+string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
+ D ofs = ReadMemcpy<D>(data);
+ StringTable table(reader.dwarf().debug_str);
+ string_view ret = table.ReadEntry(ofs);
+ reader.AddIndirectString(ret);
+ return ret;
+}
+
+AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
+ switch (form) {
+ case DW_FORM_indirect: {
+ uint16_t indirect_form = ReadLEB128<uint16_t>(data);
+ if (indirect_form == DW_FORM_indirect) {
+ THROW("indirect attribute has indirect form type");
+ }
+ return ParseAttr(reader, indirect_form, data);
+ }
+ case DW_FORM_ref1:
+ return AttrValue(ReadMemcpy<uint8_t>(data));
+ case DW_FORM_ref2:
+ return AttrValue(ReadMemcpy<uint16_t>(data));
+ case DW_FORM_ref4:
+ return AttrValue(ReadMemcpy<uint32_t>(data));
+ case DW_FORM_ref_sig8:
+ case DW_FORM_ref8:
+ return AttrValue(ReadMemcpy<uint64_t>(data));
+ case DW_FORM_ref_udata:
+ return AttrValue(ReadLEB128<uint64_t>(data));
+ case DW_FORM_addr:
+ address_size:
+ switch (reader.unit_sizes().address_size()) {
+ case 4:
+ return AttrValue(ReadMemcpy<uint32_t>(data));
+ case 8:
+ return AttrValue(ReadMemcpy<uint64_t>(data));
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ case DW_FORM_ref_addr:
+ if (reader.unit_sizes().dwarf_version() <= 2) {
+ goto address_size;
+ }
+ ABSL_FALLTHROUGH_INTENDED;
+ case DW_FORM_sec_offset:
+ if (reader.unit_sizes().dwarf64()) {
+ return AttrValue(ReadMemcpy<uint64_t>(data));
+ } else {
+ return AttrValue(ReadMemcpy<uint32_t>(data));
+ }
+ case DW_FORM_udata:
+ return AttrValue(ReadLEB128<uint64_t>(data));
+ case DW_FORM_block1:
+ return AttrValue(ReadBlock<uint8_t>(data));
+ case DW_FORM_block2:
+ return AttrValue(ReadBlock<uint16_t>(data));
+ case DW_FORM_block4:
+ return AttrValue(ReadBlock<uint32_t>(data));
+ case DW_FORM_block:
+ case DW_FORM_exprloc:
+ return AttrValue(ReadVariableBlock(data));
+ case DW_FORM_string:
+ return AttrValue(ReadNullTerminated(data));
+ case DW_FORM_strp:
+ if (reader.unit_sizes().dwarf64()) {
+ return AttrValue(ReadIndirectString<uint64_t>(reader, data));
+ } else {
+ return AttrValue(ReadIndirectString<uint32_t>(reader, data));
+ }
+ case DW_FORM_data1:
+ return AttrValue(ReadPiece(1, data));
+ case DW_FORM_data2:
+ return AttrValue(ReadPiece(2, data));
+ case DW_FORM_data4:
+ return AttrValue(ReadPiece(4, data));
+ case DW_FORM_data8:
+ return AttrValue(ReadPiece(8, data));
+
+ // Bloaty doesn't currently care about any bool or signed data.
+ // So we fudge it a bit and just stuff these in a uint64.
+ case DW_FORM_flag_present:
+ return AttrValue(1);
+ case DW_FORM_flag:
+ return AttrValue(ReadMemcpy<uint8_t>(data));
+ case DW_FORM_sdata:
+ return AttrValue(ReadLEB128<uint64_t>(data));
+ default:
+ THROWF("Don't know how to parse DWARF form: $0", form);
+ }
+}
+
+
+// AttrReader //////////////////////////////////////////////////////////////////
+
+// Parses a DIE's attributes, calling user callbacks with the parsed values.
+
+template <class T>
+class AttrReader {
+ public:
+ typedef void CallbackFunc(T* container, AttrValue val);
+
+ void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
+ attributes_[attr] = func;
+ }
+
+ // Reads all attributes for this DIE, storing the ones we were expecting.
+ void ReadAttributes(DIEReader* reader, T* container) {
+ string_view data = reader->ReadAttributesBegin();
+ const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();
+
+ for (auto attr : abbrev.attr) {
+ AttrValue value = ParseAttr(*reader, attr.form, &data);
+ auto it = attributes_.find(attr.name);
+ if (it != attributes_.end()) {
+ it->second(container, value);
+ }
+ }
+
+ reader->ReadAttributesEnd(data, 0);
+ }
+
+ private:
+ std::unordered_map<int, CallbackFunc*> attributes_;
+};
+
+// From DIEReader, defined here because it depends on FixedAttrReader.
+bool DIEReader::SkipChildren() {
+ assert(state_ == State::kReadyToNext);
+ if (!HasChild()) {
+ return true;
+ }
+
+ int target_depth = depth_ - 1;
+ dwarf::AttrReader<void> attr_reader;
+ SkipNullEntries();
+ while (depth_ > target_depth) {
+ // TODO(haberman): use DW_AT_sibling to optimize skipping when it is
+ // available.
+ if (!NextDIE()) {
+ return false;
+ }
+ attr_reader.ReadAttributes(this, nullptr);
+ SkipNullEntries();
+ }
+ return true;
+}
+
+// LineInfoReader //////////////////////////////////////////////////////////////
+
+// Code to read the .line_info programs in a DWARF file.
+
+class LineInfoReader {
+ public:
+ LineInfoReader(const File& file) : file_(file), info_(0) {}
+
+ struct LineInfo {
+ LineInfo(bool default_is_stmt) : is_stmt(default_is_stmt) {}
+ uint64_t address = 0;
+ uint32_t file = 1;
+ uint32_t line = 1;
+ uint32_t column = 0;
+ uint32_t discriminator = 0;
+ bool end_sequence = false;
+ bool basic_block = false;
+ bool prologue_end = false;
+ bool epilogue_begin = false;
+ bool is_stmt;
+ uint8_t op_index = 0;
+ uint8_t isa = 0;
+ };
+
+ struct FileName {
+ string_view name;
+ uint32_t directory_index;
+ uint64_t modified_time;
+ uint64_t file_size;
+ };
+
+ void SeekToOffset(uint64_t offset, uint8_t address_size);
+ bool ReadLineInfo();
+ const LineInfo& lineinfo() const { return info_; }
+ const FileName& filename(size_t i) const { return filenames_[i]; }
+ string_view include_directory(size_t i) const {
+ return include_directories_[i];
+ }
+
+ const std::string& GetExpandedFilename(size_t index) {
+ if (index >= filenames_.size()) {
+ THROW("filename index out of range");
+ }
+
+ // Generate these lazily.
+ if (expanded_filenames_.size() <= index) {
+ expanded_filenames_.resize(filenames_.size());
+ }
+
+ std::string& ret = expanded_filenames_[index];
+ if (ret.empty()) {
+ const FileName& filename = filenames_[index];
+ string_view directory = include_directories_[filename.directory_index];
+ ret = std::string(directory);
+ if (!ret.empty()) {
+ ret += "/";
+ }
+ ret += std::string(filename.name);
+ }
+ return ret;
+ }
+
+ private:
+ struct Params {
+ uint8_t minimum_instruction_length;
+ uint8_t maximum_operations_per_instruction;
+ uint8_t default_is_stmt;
+ int8_t line_base;
+ uint8_t line_range;
+ uint8_t opcode_base;
+ } params_;
+
+ const File& file_;
+
+ CompilationUnitSizes sizes_;
+ std::vector<string_view> include_directories_;
+ std::vector<FileName> filenames_;
+ std::vector<uint8_t> standard_opcode_lengths_;
+ std::vector<std::string> expanded_filenames_;
+
+ string_view remaining_;
+
+ // Whether we are in a "shadow" part of the bytecode program. Sometimes
+ // parts of the line info program make it into the final binary even though
+ // the corresponding code was stripped. We can tell when this happened by
+ // looking for DW_LNE_set_address ops where the operand is 0. This
+ // indicates that a relocation for that argument never got applied, which
+ // probably means that the code got stripped.
+ //
+ // While this is true, we don't yield any LineInfo entries, because the
+ // "address" value is garbage.
+ bool shadow_;
+
+ LineInfo info_;
+
+ void DoAdvance(uint64_t advance, uint8_t max_per_instr) {
+ info_.address += params_.minimum_instruction_length *
+ ((info_.op_index + advance) / max_per_instr);
+ info_.op_index = (info_.op_index + advance) % max_per_instr;
+ }
+
+ void Advance(uint64_t amount) {
+ if (params_.maximum_operations_per_instruction == 1) {
+ // This is by far the common case (only false on VLIW architectuers),
+ // and this inlining/specialization avoids a costly division.
+ DoAdvance(amount, 1);
+ } else {
+ DoAdvance(amount, params_.maximum_operations_per_instruction);
+ }
+ }
+
+ uint8_t AdjustedOpcode(uint8_t op) { return op - params_.opcode_base; }
+
+ void SpecialOpcodeAdvance(uint8_t op) {
+ Advance(AdjustedOpcode(op) / params_.line_range);
+ }
+};
+
+void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) {
+ string_view data = file_.debug_line;
+ SkipBytes(offset, &data);
+
+ sizes_.SetAddressSize(address_size);
+ data = sizes_.ReadInitialLength(&data);
+ sizes_.ReadDWARFVersion(&data);
+ uint64_t header_length = sizes_.ReadDWARFOffset(&data);
+ string_view program = data;
+ SkipBytes(header_length, &program);
+
+ params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
+ if (sizes_.dwarf_version() == 4) {
+ params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
+
+ if (params_.maximum_operations_per_instruction == 0) {
+ THROW("DWARF line info had maximum_operations_per_instruction=0");
+ }
+ } else {
+ params_.maximum_operations_per_instruction = 1;
+ }
+ params_.default_is_stmt = ReadMemcpy<uint8_t>(&data);
+ params_.line_base = ReadMemcpy<int8_t>(&data);
+ params_.line_range = ReadMemcpy<uint8_t>(&data);
+ params_.opcode_base = ReadMemcpy<uint8_t>(&data);
+ if (params_.line_range == 0) {
+ THROW("line_range of zero will cause divide by zero");
+ }
+
+ standard_opcode_lengths_.resize(params_.opcode_base);
+ for (size_t i = 1; i < params_.opcode_base; i++) {
+ standard_opcode_lengths_[i] = ReadMemcpy<uint8_t>(&data);
+ }
+
+ // Read include_directories.
+ include_directories_.clear();
+
+ // Implicit current directory entry.
+ include_directories_.push_back(string_view());
+
+ while (true) {
+ string_view dir = ReadNullTerminated(&data);
+ if (dir.empty()) {
+ break;
+ }
+ include_directories_.push_back(dir);
+ }
+
+ // Read file_names.
+ filenames_.clear();
+ expanded_filenames_.clear();
+
+ // Filename 0 is unused.
+ filenames_.push_back(FileName());
+ while (true) {
+ FileName file_name;
+ file_name.name = ReadNullTerminated(&data);
+ if (file_name.name.empty()) {
+ break;
+ }
+ file_name.directory_index = ReadLEB128<uint32_t>(&data);
+ file_name.modified_time = ReadLEB128<uint64_t>(&data);
+ file_name.file_size = ReadLEB128<uint64_t>(&data);
+ if (file_name.directory_index >= include_directories_.size()) {
+ THROW("directory index out of range");
+ }
+ filenames_.push_back(file_name);
+ }
+
+ info_ = LineInfo(params_.default_is_stmt);
+ remaining_ = program;
+ shadow_ = false;
+}
+
+bool LineInfoReader::ReadLineInfo() {
+ // Final step of last DW_LNS_copy / special opcode.
+ info_.discriminator = 0;
+ info_.basic_block = false;
+ info_.prologue_end = false;
+ info_.epilogue_begin = false;
+
+ // Final step of DW_LNE_end_sequence.
+ info_.end_sequence = false;
+
+ string_view data = remaining_;
+
+ while (true) {
+ if (data.empty()) {
+ remaining_ = data;
+ return false;
+ }
+
+ uint8_t op = ReadMemcpy<uint8_t>(&data);
+
+ if (op >= params_.opcode_base) {
+ SpecialOpcodeAdvance(op);
+ info_.line +=
+ params_.line_base + (AdjustedOpcode(op) % params_.line_range);
+ if (!shadow_) {
+ remaining_ = data;
+ return true;
+ }
+ } else {
+ switch (op) {
+ case DW_LNS_extended_op: {
+ uint16_t len = ReadLEB128<uint16_t>(&data);
+ uint8_t extended_op = ReadMemcpy<uint8_t>(&data);
+ switch (extended_op) {
+ case DW_LNE_end_sequence: {
+ // Preserve address and set end_sequence, but reset everything
+ // else.
+ uint64_t addr = info_.address;
+ info_ = LineInfo(params_.default_is_stmt);
+ info_.address = addr;
+ info_.end_sequence = true;
+ if (!shadow_) {
+ remaining_ = data;
+ return true;
+ }
+ break;
+ }
+ case DW_LNE_set_address:
+ info_.address = sizes_.ReadAddress(&data);
+ info_.op_index = 0;
+ shadow_ = (info_.address == 0);
+ break;
+ case DW_LNE_define_file: {
+ FileName file_name;
+ file_name.name = ReadNullTerminated(&data);
+ file_name.directory_index = ReadLEB128<uint32_t>(&data);
+ file_name.modified_time = ReadLEB128<uint64_t>(&data);
+ file_name.file_size = ReadLEB128<uint64_t>(&data);
+ if (file_name.directory_index >= include_directories_.size()) {
+ THROW("directory index out of range");
+ }
+ filenames_.push_back(file_name);
+ break;
+ }
+ case DW_LNE_set_discriminator:
+ info_.discriminator = ReadLEB128<uint32_t>(&data);
+ break;
+ default:
+ // We don't understand this opcode, skip it.
+ SkipBytes(len, &data);
+ if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: unknown DWARF line table extended "
+ "opcode: %d\n",
+ extended_op);
+ }
+ break;
+ }
+ break;
+ }
+ case DW_LNS_copy:
+ if (!shadow_) {
+ remaining_ = data;
+ return true;
+ }
+ break;
+ case DW_LNS_advance_pc:
+ Advance(ReadLEB128<uint64_t>(&data));
+ break;
+ case DW_LNS_advance_line:
+ info_.line += ReadLEB128<int32_t>(&data);
+ break;
+ case DW_LNS_set_file:
+ info_.file = ReadLEB128<uint32_t>(&data);
+ if (info_.file >= filenames_.size()) {
+ THROW("filename index too big");
+ }
+ break;
+ case DW_LNS_set_column:
+ info_.column = ReadLEB128<uint32_t>(&data);
+ break;
+ case DW_LNS_negate_stmt:
+ info_.is_stmt = !info_.is_stmt;
+ break;
+ case DW_LNS_set_basic_block:
+ info_.basic_block = true;
+ break;
+ case DW_LNS_const_add_pc:
+ SpecialOpcodeAdvance(255);
+ break;
+ case DW_LNS_fixed_advance_pc:
+ info_.address += ReadMemcpy<uint16_t>(&data);
+ info_.op_index = 0;
+ break;
+ case DW_LNS_set_prologue_end:
+ info_.prologue_end = true;
+ break;
+ case DW_LNS_set_epilogue_begin:
+ info_.epilogue_begin = true;
+ break;
+ case DW_LNS_set_isa:
+ info_.isa = ReadLEB128<uint8_t>(&data);
+ break;
+ default:
+ // Unknown opcode, but we know its length so can skip it.
+ SkipBytes(standard_opcode_lengths_[op], &data);
+ if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: unknown DWARF line table opcode: %d\n",
+ op);
+ }
+ break;
+ }
+ }
+ }
+}
+
+} // namespace dwarf
+
+// Bloaty DWARF Data Sources ///////////////////////////////////////////////////
+
+// The DWARF .debug_aranges section should, in theory, give us exactly the
+// information we need to map file ranges in linked binaries to compilation
+// units from where that code came. However, .debug_aranges is often incomplete
+// or missing completely, so we use it as just one of several data sources for
+// the "compileunits" data source.
+static bool ReadDWARFAddressRanges(const dwarf::File& file, RangeSink* sink) {
+ // Maps compilation unit offset -> source filename
+ // Lazily initialized.
+ class FilenameMap {
+ public:
+ FilenameMap(const dwarf::File& file)
+ : die_reader_(file),
+ missing_("[DWARF is missing filename]") {
+ attr_reader_.OnAttribute(
+ DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
+ if (!data.IsString()) return;
+ *s = data.GetString();
+ });
+ }
+
+ std::string GetFilename(uint64_t compilation_unit_offset) {
+ auto& name = map_[compilation_unit_offset];
+ if (name.empty()) {
+ name = LookupFilename(compilation_unit_offset);
+ }
+ return name;
+ }
+
+ private:
+ std::string LookupFilename(uint64_t compilation_unit_offset) {
+ auto section = dwarf::DIEReader::Section::kDebugInfo;
+ string_view name;
+ if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) &&
+ die_reader_.GetTag() == DW_TAG_compile_unit &&
+ (attr_reader_.ReadAttributes(&die_reader_, &name),
+ !name.empty())) {
+ return std::string(name);
+ } else {
+ return missing_;
+ }
+ }
+
+ dwarf::DIEReader die_reader_;
+ dwarf::AttrReader<string_view> attr_reader_;
+ std::unordered_map<uint64_t, std::string> map_;
+ std::string missing_;
+ } map(file);
+
+ dwarf::AddressRanges ranges(file.debug_aranges);
+
+ while (ranges.NextUnit()) {
+ std::string filename = map.GetFilename(ranges.debug_info_offset());
+
+ while (ranges.NextRange()) {
+ if (dwarf::IsValidDwarfAddress(ranges.address(), ranges.address_size())) {
+ sink->AddVMRangeIgnoreDuplicate("dwarf_aranges", ranges.address(),
+ ranges.length(), filename);
+ }
+ }
+ }
+
+ return true;
+}
+
+// TODO(haberman): make these into real protobufs once proto supports
+// string_view.
+class GeneralDIE {
+ public:
+ bool has_name() const { return has_name_; }
+ bool has_linkage_name() const { return has_linkage_name_; }
+ bool has_location_string() const { return has_location_string_; }
+ bool has_low_pc() const { return has_low_pc_; }
+ bool has_high_pc() const { return has_high_pc_; }
+ bool has_location_uint64() const { return has_location_uint64_; }
+ bool has_stmt_list() const { return has_stmt_list_; }
+ bool has_ranges() const { return has_ranges_; }
+ bool has_start_scope() const { return has_start_scope_; }
+
+ std::string DebugString() {
+ std::string ret;
+ if (has_name()) {
+ ret += absl::Substitute("name: $0\n", name());
+ }
+ if (has_linkage_name()) {
+ ret += absl::Substitute("linkage_name: $0\n", linkage_name());
+ }
+ if (has_location_string()) {
+ ret += absl::Substitute("location_string: $0\n", location_string());
+ }
+ if (has_low_pc()) {
+ ret += absl::Substitute("low_pc: $0\n", low_pc());
+ }
+ if (has_high_pc()) {
+ ret += absl::Substitute("high_pc: $0\n", high_pc());
+ }
+ if (has_location_uint64()) {
+ ret += absl::Substitute("location_uint64: $0\n", location_uint64());
+ }
+ if (has_stmt_list()) {
+ ret += absl::Substitute("stmt_list: $0\n", stmt_list());
+ }
+ if (has_ranges()) {
+ ret += absl::Substitute("ranges: $0\n", ranges());
+ }
+ if (has_start_scope()) {
+ ret += absl::Substitute("start_scope: $0\n", start_scope());
+ }
+ return ret;
+ }
+
+ string_view name() const { return name_; }
+ string_view linkage_name() const { return linkage_name_; }
+ string_view location_string() const { return location_string_; }
+ uint64_t low_pc() const { return low_pc_; }
+ uint64_t high_pc() const { return high_pc_; }
+ uint64_t location_uint64() const { return location_uint64_; }
+ uint64_t stmt_list() const { return stmt_list_; }
+ uint64_t ranges() const { return ranges_; }
+ uint64_t start_scope() const { return start_scope_; }
+
+ void set_name(string_view val) {
+ has_name_ = true;
+ name_ = val;
+ }
+ void set_linkage_name(string_view val) {
+ has_linkage_name_ = true;
+ location_string_ = val;
+ }
+ void set_location_string(string_view val) {
+ has_location_string_ = true;
+ location_string_ = val;
+ }
+ void set_low_pc(uint64_t val) {
+ has_low_pc_ = true;
+ low_pc_ = val;
+ }
+ void set_high_pc(uint64_t val) {
+ has_high_pc_ = true;
+ high_pc_ = val;
+ }
+ void set_location_uint64(uint64_t val) {
+ has_location_uint64_ = true;
+ location_uint64_ = val;
+ }
+ void set_stmt_list(uint64_t val) {
+ has_stmt_list_ = true;
+ stmt_list_ = val;
+ }
+ void set_ranges(uint64_t val) {
+ has_ranges_ = true;
+ ranges_ = val;
+ }
+ void set_start_scope(uint64_t val) {
+ has_start_scope_ = true;
+ start_scope_ = val;
+ }
+
+ private:
+ bool has_name_ = false;
+ bool has_linkage_name_ = false;
+ bool has_location_string_ = false;
+ bool has_low_pc_ = false;
+ bool has_high_pc_ = false;
+ bool has_location_uint64_ = false;
+ bool has_stmt_list_ = false;
+ bool has_ranges_ = false;
+ bool has_start_scope_ = false;
+
+ string_view name_;
+ string_view linkage_name_;
+ string_view location_string_;
+ uint64_t low_pc_ = 0;
+ uint64_t high_pc_ = 0;
+ uint64_t location_uint64_ = 0;
+ uint64_t stmt_list_ = 0;
+ uint64_t ranges_ = 0;
+ uint64_t start_scope_ = 0;
+};
+
+class InlinesDIE {
+ public:
+ bool has_stmt_list() const { return has_stmt_list_; }
+
+ uint64_t stmt_list() const { return stmt_list_; }
+
+ void set_stmt_list(uint64_t val) {
+ has_stmt_list_ = true;
+ stmt_list_ = val;
+ }
+
+ private:
+ bool has_stmt_list_ = false;
+ uint64_t stmt_list_ = 0;
+};
+
+// To view DIEs for a given file, try:
+// readelf --debug-dump=info foo.bin
+void AddDIE(const dwarf::File& file, const std::string& name,
+ const GeneralDIE& die, const SymbolTable& symtab,
+ const DualMap& symbol_map, const dwarf::CompilationUnitSizes& sizes,
+ RangeSink* sink) {
+ // Some DIEs mark address ranges with high_pc/low_pc pairs (especially
+ // functions).
+ if (die.has_low_pc() && die.has_high_pc() &&
+ dwarf::IsValidDwarfAddress(die.low_pc(), sizes.address_size())) {
+ uint64_t high_pc = die.high_pc();
+
+ // It appears that some compilers make high_pc a size, and others make it an
+ // address.
+ if (high_pc >= die.low_pc()) {
+ high_pc -= die.low_pc();
+ }
+ sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", die.low_pc(), high_pc,
+ name);
+ }
+
+ // Sometimes a DIE has a linkage_name, which we can look up in the symbol
+ // table.
+ if (die.has_linkage_name()) {
+ auto it = symtab.find(die.linkage_name());
+ if (it != symtab.end()) {
+ sink->AddVMRangeIgnoreDuplicate("dwarf_linkagename", it->second.first,
+ it->second.second, name);
+ }
+ }
+
+ // Sometimes the DIE has a "location", which gives the location as an address.
+ // This parses a very small subset of the overall DWARF expression grammar.
+ if (die.has_location_string()) {
+ string_view location = die.location_string();
+ if (location.size() == sizes.address_size() + 1 &&
+ location[0] == DW_OP_addr) {
+ location.remove_prefix(1);
+ uint64_t addr;
+ // TODO(haberman): endian?
+ if (sizes.address_size() == 4) {
+ addr = dwarf::ReadMemcpy<uint32_t>(&location);
+ } else if (sizes.address_size() == 8) {
+ addr = dwarf::ReadMemcpy<uint64_t>(&location);
+ } else {
+ BLOATY_UNREACHABLE();
+ }
+
+ // Unfortunately the location doesn't include a size, so we look that part
+ // up in the symbol map.
+ uint64_t size;
+ if (symbol_map.vm_map.TryGetSize(addr, &size)) {
+ sink->AddVMRangeIgnoreDuplicate("dwarf_location", addr, size, name);
+ } else {
+ if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: couldn't find DWARF location in symbol "
+ "table, address: %" PRIx64 ", name: %s\n",
+ addr, name.c_str());
+ }
+ }
+ }
+ }
+
+ // Sometimes a location is given as an offset into debug_loc.
+ if (die.has_location_uint64()) {
+ if (die.location_uint64() < file.debug_loc.size()) {
+ absl::string_view loc_range = file.debug_loc.substr(die.location_uint64());
+ loc_range = GetLocationListRange(sizes, loc_range);
+ sink->AddFileRange("dwarf_locrange", name, loc_range);
+ } else if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: DWARF location out of range, location=%" PRIx64
+ "\n",
+ die.location_uint64());
+ }
+ }
+
+ uint64_t ranges_offset = UINT64_MAX;
+
+ // There are two different attributes that sometimes contain an offset into
+ // debug_ranges.
+ if (die.has_ranges()) {
+ ranges_offset = die.ranges();
+ } else if (die.has_start_scope()) {
+ ranges_offset = die.start_scope();
+ }
+
+ if (ranges_offset != UINT64_MAX) {
+ if (ranges_offset < file.debug_ranges.size()) {
+ absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
+ ranges_range = GetRangeListRange(sizes, ranges_range);
+ sink->AddFileRange("dwarf_debugrange", name, ranges_range);
+ } else if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: DWARF debug range out of range, "
+ "ranges_offset=%" PRIx64 "\n",
+ ranges_offset);
+ }
+ }
+}
+
+static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
+ RangeSink* sink) {
+ dwarf::DIEReader die_reader(file);
+ dwarf::AttrReader<string_view> attr_reader;
+ string_view remaining = section;
+
+ attr_reader.OnAttribute(
+ DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
+ if (data.type() == dwarf::AttrValue::Type::kString) {
+ *s = data.GetString();
+ }
+ });
+
+ while (remaining.size() > 0) {
+ dwarf::CompilationUnitSizes sizes;
+ string_view full_unit = remaining;
+ string_view unit = sizes.ReadInitialLength(&remaining);
+ full_unit =
+ full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
+ sizes.ReadDWARFVersion(&unit);
+ uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
+ bool ok = die_reader.SeekToCompilationUnit(
+ dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
+ if (!ok) {
+ THROW("Couldn't seek to debug_info section");
+ }
+ string_view compileunit_name;
+ attr_reader.ReadAttributes(&die_reader, &compileunit_name);
+ if (!compileunit_name.empty()) {
+ sink->AddFileRange("dwarf_pubnames", compileunit_name, full_unit);
+ }
+ }
+}
+
+uint64_t ReadEncodedPointer(uint8_t encoding, bool is_64bit, string_view* data,
+ const char* data_base, RangeSink* sink) {
+ uint64_t value;
+ const char* ptr = data->data();
+ uint8_t format = encoding & DW_EH_PE_FORMAT_MASK;
+
+ switch (format) {
+ case DW_EH_PE_omit:
+ return 0;
+ case DW_EH_PE_absptr:
+ if (is_64bit) {
+ value = dwarf::ReadMemcpy<uint64_t>(data);
+ } else {
+ value = dwarf::ReadMemcpy<uint32_t>(data);
+ }
+ break;
+ case DW_EH_PE_uleb128:
+ value = dwarf::ReadLEB128<uint64_t>(data);
+ break;
+ case DW_EH_PE_udata2:
+ value = dwarf::ReadMemcpy<uint16_t>(data);
+ break;
+ case DW_EH_PE_udata4:
+ value = dwarf::ReadMemcpy<uint32_t>(data);
+ break;
+ case DW_EH_PE_udata8:
+ value = dwarf::ReadMemcpy<uint64_t>(data);
+ break;
+ case DW_EH_PE_sleb128:
+ value = dwarf::ReadLEB128<int64_t>(data);
+ break;
+ case DW_EH_PE_sdata2:
+ value = dwarf::ReadMemcpy<int16_t>(data);
+ break;
+ case DW_EH_PE_sdata4:
+ value = dwarf::ReadMemcpy<int32_t>(data);
+ break;
+ case DW_EH_PE_sdata8:
+ value = dwarf::ReadMemcpy<int64_t>(data);
+ break;
+ default:
+ THROWF("Unexpected eh_frame format value: $0", format);
+ }
+
+ uint8_t application = encoding & DW_EH_PE_APPLICATION_MASK;
+
+ switch (application) {
+ case 0:
+ break;
+ case DW_EH_PE_pcrel:
+ value += sink->TranslateFileToVM(ptr);
+ break;
+ case DW_EH_PE_datarel:
+ if (data_base == nullptr) {
+ THROW("datarel requested but no data_base provided");
+ }
+ value += sink->TranslateFileToVM(data_base);
+ break;
+ case DW_EH_PE_textrel:
+ case DW_EH_PE_funcrel:
+ case DW_EH_PE_aligned:
+ THROWF("Unimplemented eh_frame application value: $0", application);
+ }
+
+ if (encoding & DW_EH_PE_indirect) {
+ string_view location = sink->TranslateVMToFile(value);
+ if (is_64bit) {
+ value = dwarf::ReadMemcpy<uint64_t>(&location);
+ } else {
+ value = dwarf::ReadMemcpy<uint32_t>(&location);
+ }
+ }
+
+ return value;
+}
+
+// Code to read the .eh_frame section. This is not technically DWARF, but it
+// is similar to .debug_frame (which is DWARF) so it's convenient to put it
+// here.
+//
+// The best documentation I can find for this format comes from:
+//
+// *
+// http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+// * https://www.airs.com/blog/archives/460
+//
+// However these are both under-specified. Some details are not mentioned in
+// either of these (for example, the fact that the function length uses the FDE
+// encoding, but always absolute). libdwarf's implementation contains a comment
+// saying "It is not clear if this is entirely correct". Basically the only
+// thing you can trust for some of these details is the code that actually
+// implements unwinding in production:
+//
+// * libunwind http://www.nongnu.org/libunwind/
+// https://github.com/pathscale/libunwind/blob/master/src/dwarf/Gfde.c
+// * LLVM libunwind (a different project!!)
+// https://github.com/llvm-mirror/libunwind/blob/master/src/DwarfParser.hpp
+// * libgcc
+// https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.c
+void ReadEhFrame(string_view data, RangeSink* sink) {
+ string_view remaining = data;
+
+ struct CIEInfo {
+ int version = 0;
+ uint32_t code_align = 0;
+ int32_t data_align = 0;
+ uint8_t fde_encoding = 0;
+ uint8_t lsda_encoding = 0;
+ bool is_signal_handler = false;
+ bool has_augmentation_length = false;
+ uint64_t personality_function = 0;
+ uint32_t return_address_reg = 0;
+ };
+
+ std::unordered_map<const void*, CIEInfo> cie_map;
+
+ while (remaining.size() > 0) {
+ dwarf::CompilationUnitSizes sizes;
+ string_view full_entry = remaining;
+ string_view entry = sizes.ReadInitialLength(&remaining);
+ if (entry.size() == 0 && remaining.size() == 0) {
+ return;
+ }
+ full_entry =
+ full_entry.substr(0, entry.size() + (entry.data() - full_entry.data()));
+ uint32_t id = dwarf::ReadMemcpy<uint32_t>(&entry);
+ if (id == 0) {
+ // CIE, we don't attribute this yet.
+ CIEInfo& cie_info = cie_map[full_entry.data()];
+ cie_info.version = dwarf::ReadMemcpy<uint8_t>(&entry);
+ string_view aug_string = dwarf::ReadNullTerminated(&entry);
+ cie_info.code_align = dwarf::ReadLEB128<uint32_t>(&entry);
+ cie_info.data_align = dwarf::ReadLEB128<int32_t>(&entry);
+ switch (cie_info.version) {
+ case 1:
+ cie_info.return_address_reg = dwarf::ReadMemcpy<uint8_t>(&entry);
+ break;
+ case 3:
+ cie_info.return_address_reg = dwarf::ReadLEB128<uint32_t>(&entry);
+ break;
+ default:
+ THROW("Unexpected eh_frame CIE version");
+ }
+ while (aug_string.size() > 0) {
+ switch (aug_string[0]) {
+ case 'z':
+ // Length until the end of augmentation data.
+ cie_info.has_augmentation_length = true;
+ dwarf::ReadLEB128<uint32_t>(&entry);
+ break;
+ case 'L':
+ cie_info.lsda_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ break;
+ case 'R':
+ cie_info.fde_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ break;
+ case 'S':
+ cie_info.is_signal_handler = true;
+ break;
+ case 'P': {
+ uint8_t encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ cie_info.personality_function =
+ ReadEncodedPointer(encoding, true, &entry, nullptr, sink);
+ break;
+ }
+ default:
+ THROW("Unexepcted augmentation character");
+ }
+ aug_string.remove_prefix(1);
+ }
+ } else {
+ auto iter = cie_map.find(entry.data() - id - 4);
+ if (iter == cie_map.end()) {
+ THROW("Couldn't find CIE for FDE");
+ }
+ const CIEInfo& cie_info = iter->second;
+ // TODO(haberman): don't hard-code 64-bit.
+ uint64_t address = ReadEncodedPointer(cie_info.fde_encoding, true, &entry,
+ nullptr, sink);
+ // TODO(haberman); Technically the FDE addresses could span a
+ // function/compilation unit? They can certainly span inlines.
+ /*
+ uint64_t length =
+ ReadEncodedPointer(cie_info.fde_encoding & 0xf, true, &entry, sink);
+ (void)length;
+
+ if (cie_info.has_augmentation_length) {
+ uint32_t augmentation_length = dwarf::ReadLEB128<uint32_t>(&entry);
+ (void)augmentation_length;
+ }
+
+ uint64_t lsda =
+ ReadEncodedPointer(cie_info.lsda_encoding, true, &entry, sink);
+ if (lsda) {
+ }
+ */
+
+ sink->AddFileRangeForVMAddr("dwarf_fde", address, full_entry);
+ }
+ }
+}
+
+// See documentation here:
+// http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html#EHFRAME
+void ReadEhFrameHdr(string_view data, RangeSink* sink) {
+ const char* base = data.data();
+ uint8_t version = dwarf::ReadMemcpy<uint8_t>(&data);
+ uint8_t eh_frame_ptr_enc = dwarf::ReadMemcpy<uint8_t>(&data);
+ uint8_t fde_count_enc = dwarf::ReadMemcpy<uint8_t>(&data);
+ uint8_t table_enc = dwarf::ReadMemcpy<uint8_t>(&data);
+
+ if (version != 1) {
+ THROWF("Unknown eh_frame_hdr version: $0", version);
+ }
+
+ // TODO(haberman): don't hard-code 64-bit.
+ uint64_t eh_frame_ptr =
+ ReadEncodedPointer(eh_frame_ptr_enc, true, &data, base, sink);
+ (void)eh_frame_ptr;
+ uint64_t fde_count =
+ ReadEncodedPointer(fde_count_enc, true, &data, base, sink);
+
+ for (uint64_t i = 0; i < fde_count; i++) {
+ string_view entry_data = data;
+ uint64_t initial_location =
+ ReadEncodedPointer(table_enc, true, &data, base, sink);
+ uint64_t fde_addr = ReadEncodedPointer(table_enc, true, &data, base, sink);
+ entry_data.remove_suffix(data.size());
+ sink->AddFileRangeForVMAddr("dwarf_fde_table", initial_location,
+ entry_data);
+
+ // We could add fde_addr with an unknown length if we wanted to skip reading
+ // eh_frame. We can't count on this table being available though, so we
+ // don't want to remove the eh_frame reading code altogether.
+ (void)fde_addr;
+ }
+}
+
+static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
+ string_view unit_name, RangeSink* sink) {
+ string_view data = file.debug_line;
+ dwarf::SkipBytes(offset, &data);
+ string_view data_with_length = data;
+ dwarf::CompilationUnitSizes sizes;
+ data = sizes.ReadInitialLength(&data);
+ data = data_with_length.substr(
+ 0, data.size() + (data.data() - data_with_length.data()));
+ sink->AddFileRange("dwarf_stmtlistrange", unit_name, data);
+}
+
+// The DWARF debug info can help us get compileunits info. DIEs for compilation
+// units, functions, and global variables often have attributes that will
+// resolve to addresses.
+static void ReadDWARFDebugInfo(
+ const dwarf::File& file, dwarf::DIEReader::Section section,
+ const SymbolTable& symtab, const DualMap& symbol_map, RangeSink* sink,
+ std::unordered_map<uint64_t, std::string>* stmt_list_map) {
+ dwarf::DIEReader die_reader(file);
+ die_reader.set_strp_sink(sink);
+ dwarf::AttrReader<GeneralDIE> attr_reader;
+
+ attr_reader.OnAttribute(DW_AT_name,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ if (!val.IsString()) return;
+ die->set_name(val.GetString());
+ });
+ attr_reader.OnAttribute(DW_AT_linkage_name,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ if (!val.IsString()) return;
+ die->set_linkage_name(val.GetString());
+ });
+ attr_reader.OnAttribute(DW_AT_location,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ if (val.IsString()) {
+ die->set_location_string(val.GetString());
+ } else {
+ die->set_location_uint64(val.GetUint());
+ }
+ });
+ attr_reader.OnAttribute(DW_AT_low_pc,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ absl::optional<uint64_t> uint = val.ToUint();
+ if (!uint.has_value()) return;
+ die->set_low_pc(uint.value());
+ });
+ attr_reader.OnAttribute(DW_AT_high_pc,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ absl::optional<uint64_t> uint = val.ToUint();
+ if (!uint.has_value()) return;
+ die->set_high_pc(uint.value());
+ });
+ attr_reader.OnAttribute(DW_AT_stmt_list,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ absl::optional<uint64_t> uint = val.ToUint();
+ if (!uint.has_value()) return;
+ die->set_stmt_list(uint.value());
+ });
+ attr_reader.OnAttribute(DW_AT_ranges,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ absl::optional<uint64_t> uint = val.ToUint();
+ if (!uint.has_value()) return;
+ die->set_ranges(uint.value());
+ });
+ attr_reader.OnAttribute(DW_AT_start_scope,
+ [](GeneralDIE* die, dwarf::AttrValue val) {
+ absl::optional<uint64_t> uint = val.ToUint();
+ if (!uint.has_value()) return;
+ die->set_start_scope(uint.value());
+ });
+
+ if (!die_reader.SeekToStart(section)) {
+ return;
+ }
+
+ do {
+ GeneralDIE compileunit_die;
+ attr_reader.ReadAttributes(&die_reader, &compileunit_die);
+ std::string compileunit_name = std::string(compileunit_die.name());
+
+ if (compileunit_die.has_stmt_list()) {
+ uint64_t stmt_list = compileunit_die.stmt_list();
+ if (compileunit_name.empty()) {
+ auto iter = stmt_list_map->find(stmt_list);
+ if (iter != stmt_list_map->end()) {
+ compileunit_name = iter->second;
+ }
+ } else {
+ (*stmt_list_map)[stmt_list] = compileunit_name;
+ }
+ }
+
+ if (compileunit_name.empty()) {
+ continue;
+ }
+
+ die_reader.set_compileunit_name(compileunit_name);
+ sink->AddFileRange("dwarf_debuginfo", compileunit_name,
+ die_reader.unit_range());
+ AddDIE(file, compileunit_name, compileunit_die, symtab, symbol_map,
+ die_reader.unit_sizes(), sink);
+
+ if (compileunit_die.has_stmt_list()) {
+ uint64_t offset = compileunit_die.stmt_list();
+ ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
+ }
+
+ string_view abbrev_data = file.debug_abbrev;
+ dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
+ dwarf::AbbrevTable unit_abbrev;
+ abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
+ sink->AddFileRange("dwarf_abbrev", compileunit_name, abbrev_data);
+
+ while (die_reader.NextDIE()) {
+ GeneralDIE die;
+ attr_reader.ReadAttributes(&die_reader, &die);
+
+ // low_pc == 0 is a signal that this routine was stripped out of the
+ // final binary. Skip this DIE and all of its children.
+ if (die.has_low_pc() && die.low_pc() == 0) {
+ die_reader.SkipChildren();
+ } else {
+ AddDIE(file, compileunit_name, die, symtab, symbol_map,
+ die_reader.unit_sizes(), sink);
+ }
+ }
+ } while (die_reader.NextCompilationUnit());
+}
+
+void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
+ const DualMap& symbol_map, RangeSink* sink) {
+ if (!file.debug_info.size()) {
+ THROW("missing debug info");
+ }
+
+ if (file.debug_aranges.size()) {
+ ReadDWARFAddressRanges(file, sink);
+ }
+
+ std::unordered_map<uint64_t, std::string> stmt_list_map;
+ ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugInfo, symtab,
+ symbol_map, sink, &stmt_list_map);
+ ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugTypes, symtab,
+ symbol_map, sink, &stmt_list_map);
+ ReadDWARFPubNames(file, file.debug_pubnames, sink);
+ ReadDWARFPubNames(file, file.debug_pubtypes, sink);
+}
+
+static std::string LineInfoKey(const std::string& file, uint32_t line,
+ bool include_line) {
+ if (include_line) {
+ return file + ":" + std::to_string(line);
+ } else {
+ return file;
+ }
+}
+
+static void ReadDWARFStmtList(bool include_line,
+ dwarf::LineInfoReader* line_info_reader,
+ RangeSink* sink) {
+ uint64_t span_startaddr = 0;
+ std::string last_source;
+
+ while (line_info_reader->ReadLineInfo()) {
+ const auto& line_info = line_info_reader->lineinfo();
+ auto addr = line_info.address;
+ auto number = line_info.line;
+ auto name =
+ line_info.end_sequence
+ ? last_source
+ : LineInfoKey(line_info_reader->GetExpandedFilename(line_info.file),
+ number, include_line);
+ if (!span_startaddr) {
+ span_startaddr = addr;
+ } else if (line_info.end_sequence ||
+ (!last_source.empty() && name != last_source)) {
+ sink->AddVMRange("dwarf_stmtlist", span_startaddr, addr - span_startaddr,
+ last_source);
+ if (line_info.end_sequence) {
+ span_startaddr = 0;
+ } else {
+ span_startaddr = addr;
+ }
+ }
+ last_source = name;
+ }
+}
+
+void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
+ bool include_line) {
+ if (!file.debug_info.size() || !file.debug_line.size()) {
+ THROW("no debug info");
+ }
+
+ dwarf::DIEReader die_reader(file);
+ dwarf::LineInfoReader line_info_reader(file);
+ dwarf::AttrReader<InlinesDIE> attr_reader;
+
+ attr_reader.OnAttribute(
+ DW_AT_stmt_list, [](InlinesDIE* die, dwarf::AttrValue data) {
+ absl::optional<uint64_t> uint = data.ToUint();
+ if (!uint.has_value()) return;
+ die->set_stmt_list(uint.value());
+ });
+
+ if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
+ THROW("debug info is present, but empty");
+ }
+
+ while (true) {
+ InlinesDIE die;
+ attr_reader.ReadAttributes(&die_reader, &die);
+
+ if (die.has_stmt_list()) {
+ uint64_t offset = die.stmt_list();
+ line_info_reader.SeekToOffset(offset,
+ die_reader.unit_sizes().address_size());
+ ReadDWARFStmtList(include_line, &line_info_reader, sink);
+ }
+
+ if (!die_reader.NextCompilationUnit()) {
+ return;
+ }
+ }
+}
+
+} // namespace bloaty
diff --git a/src/dwarf_constants.h b/src/dwarf_constants.h
new file mode 100644
index 0000000..ba6eb2c
--- /dev/null
+++ b/src/dwarf_constants.h
@@ -0,0 +1,649 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: dannyb@google.com (Daniel Berlin)
+
+#ifndef UTIL_DEBUGINFO_DWARF2ENUMS_H__
+#define UTIL_DEBUGINFO_DWARF2ENUMS_H__
+
+namespace dwarf2reader {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+enum DwarfTag {
+ DW_TAG_padding = 0x00,
+ DW_TAG_array_type = 0x01,
+ DW_TAG_class_type = 0x02,
+ DW_TAG_entry_point = 0x03,
+ DW_TAG_enumeration_type = 0x04,
+ DW_TAG_formal_parameter = 0x05,
+ DW_TAG_imported_declaration = 0x08,
+ DW_TAG_label = 0x0a,
+ DW_TAG_lexical_block = 0x0b,
+ DW_TAG_member = 0x0d,
+ DW_TAG_pointer_type = 0x0f,
+ DW_TAG_reference_type = 0x10,
+ DW_TAG_compile_unit = 0x11,
+ DW_TAG_string_type = 0x12,
+ DW_TAG_structure_type = 0x13,
+ DW_TAG_subroutine_type = 0x15,
+ DW_TAG_typedef = 0x16,
+ DW_TAG_union_type = 0x17,
+ DW_TAG_unspecified_parameters = 0x18,
+ DW_TAG_variant = 0x19,
+ DW_TAG_common_block = 0x1a,
+ DW_TAG_common_inclusion = 0x1b,
+ DW_TAG_inheritance = 0x1c,
+ DW_TAG_inlined_subroutine = 0x1d,
+ DW_TAG_module = 0x1e,
+ DW_TAG_ptr_to_member_type = 0x1f,
+ DW_TAG_set_type = 0x20,
+ DW_TAG_subrange_type = 0x21,
+ DW_TAG_with_stmt = 0x22,
+ DW_TAG_access_declaration = 0x23,
+ DW_TAG_base_type = 0x24,
+ DW_TAG_catch_block = 0x25,
+ DW_TAG_const_type = 0x26,
+ DW_TAG_constant = 0x27,
+ DW_TAG_enumerator = 0x28,
+ DW_TAG_file_type = 0x29,
+ DW_TAG_friend = 0x2a,
+ DW_TAG_namelist = 0x2b,
+ DW_TAG_namelist_item = 0x2c,
+ DW_TAG_packed_type = 0x2d,
+ DW_TAG_subprogram = 0x2e,
+ DW_TAG_template_type_param = 0x2f,
+ DW_TAG_template_value_param = 0x30,
+ DW_TAG_thrown_type = 0x31,
+ DW_TAG_try_block = 0x32,
+ DW_TAG_variant_part = 0x33,
+ DW_TAG_variable = 0x34,
+ DW_TAG_volatile_type = 0x35,
+ // DWARF 3.
+ DW_TAG_dwarf_procedure = 0x36,
+ DW_TAG_restrict_type = 0x37,
+ DW_TAG_interface_type = 0x38,
+ DW_TAG_namespace = 0x39,
+ DW_TAG_imported_module = 0x3a,
+ DW_TAG_unspecified_type = 0x3b,
+ DW_TAG_partial_unit = 0x3c,
+ DW_TAG_imported_unit = 0x3d,
+ DW_TAG_condition = 0x3f,
+ DW_TAG_shared_type = 0x40,
+ // DWARF 4.
+ DW_TAG_type_unit = 0x41,
+ DW_TAG_rvalue_reference_type = 0x42,
+ DW_TAG_template_alias = 0x43,
+ DW_TAG_lo_user = 0x4080,
+ DW_TAG_hi_user = 0xffff,
+ // SGI/MIPS Extensions.
+ DW_TAG_MIPS_loop = 0x4081,
+ // HP extensions. See:
+ // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz
+ DW_TAG_HP_array_descriptor = 0x4090,
+ // GNU extensions.
+ DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90.
+ DW_TAG_function_template = 0x4102, // For C++.
+ DW_TAG_class_template = 0x4103, // For C++.
+ DW_TAG_GNU_BINCL = 0x4104,
+ DW_TAG_GNU_EINCL = 0x4105,
+ // http://gcc.gnu.org/wiki/TemplateParmsDwarf
+ DW_TAG_GNU_template_template_param = 0x4106,
+ DW_TAG_GNU_template_parameter_pack = 0x4107,
+ DW_TAG_GNU_formal_parameter_pack = 0x4108,
+ // http://www.dwarfstd.org/ShowIssue.php?issue=100909.2&type=open
+ DW_TAG_GNU_call_site = 0x4109,
+ DW_TAG_GNU_call_site_parameter = 0x410a,
+ // Apple extensions.
+ DW_TAG_APPLE_property = 0x4200,
+ // Extensions for UPC. See: http://upc.gwu.edu/~upc.
+ DW_TAG_upc_shared_type = 0x8765,
+ DW_TAG_upc_strict_type = 0x8766,
+ DW_TAG_upc_relaxed_type = 0x8767,
+ // PGI (STMicroelectronics) extensions. No documentation available.
+ DW_TAG_PGI_kanji_type = 0xA000,
+ DW_TAG_PGI_interface_block = 0xA020
+};
+
+
+enum DwarfHasChild {
+ DW_children_no = 0,
+ DW_children_yes = 1
+};
+
+// Form names and codes.
+enum DwarfForm {
+ DW_FORM_addr = 0x01,
+ DW_FORM_block2 = 0x03,
+ DW_FORM_block4 = 0x04,
+ DW_FORM_data2 = 0x05,
+ DW_FORM_data4 = 0x06,
+ DW_FORM_data8 = 0x07,
+ DW_FORM_string = 0x08,
+ DW_FORM_block = 0x09,
+ DW_FORM_block1 = 0x0a,
+ DW_FORM_data1 = 0x0b,
+ DW_FORM_flag = 0x0c,
+ DW_FORM_sdata = 0x0d,
+ DW_FORM_strp = 0x0e,
+ DW_FORM_udata = 0x0f,
+ DW_FORM_ref_addr = 0x10,
+ DW_FORM_ref1 = 0x11,
+ DW_FORM_ref2 = 0x12,
+ DW_FORM_ref4 = 0x13,
+ DW_FORM_ref8 = 0x14,
+ DW_FORM_ref_udata = 0x15,
+ DW_FORM_indirect = 0x16,
+ // DWARF 4.
+ DW_FORM_sec_offset = 0x17,
+ DW_FORM_exprloc = 0x18,
+ DW_FORM_flag_present = 0x19,
+ // DWARF 5.
+ DW_FORM_line_strp = 0x1f,
+ // DWARF 4.
+ DW_FORM_ref_sig8 = 0x20,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_FORM_GNU_addr_index = 0x1f01,
+ DW_FORM_GNU_str_index = 0x1f02
+};
+
+// Attribute names and codes
+enum DwarfAttribute {
+ DW_AT_sibling = 0x01,
+ DW_AT_location = 0x02,
+ DW_AT_name = 0x03,
+ DW_AT_ordering = 0x09,
+ DW_AT_subscr_data = 0x0a,
+ DW_AT_byte_size = 0x0b,
+ DW_AT_bit_offset = 0x0c,
+ DW_AT_bit_size = 0x0d,
+ DW_AT_element_list = 0x0f,
+ DW_AT_stmt_list = 0x10,
+ DW_AT_low_pc = 0x11,
+ DW_AT_high_pc = 0x12,
+ DW_AT_language = 0x13,
+ DW_AT_member = 0x14,
+ DW_AT_discr = 0x15,
+ DW_AT_discr_value = 0x16,
+ DW_AT_visibility = 0x17,
+ DW_AT_import = 0x18,
+ DW_AT_string_length = 0x19,
+ DW_AT_common_reference = 0x1a,
+ DW_AT_comp_dir = 0x1b,
+ DW_AT_const_value = 0x1c,
+ DW_AT_containing_type = 0x1d,
+ DW_AT_default_value = 0x1e,
+ DW_AT_inline = 0x20,
+ DW_AT_is_optional = 0x21,
+ DW_AT_lower_bound = 0x22,
+ DW_AT_producer = 0x25,
+ DW_AT_prototyped = 0x27,
+ DW_AT_return_addr = 0x2a,
+ DW_AT_start_scope = 0x2c,
+ DW_AT_stride_size = 0x2e,
+ DW_AT_upper_bound = 0x2f,
+ DW_AT_abstract_origin = 0x31,
+ DW_AT_accessibility = 0x32,
+ DW_AT_address_class = 0x33,
+ DW_AT_artificial = 0x34,
+ DW_AT_base_types = 0x35,
+ DW_AT_calling_convention = 0x36,
+ DW_AT_count = 0x37,
+ DW_AT_data_member_location = 0x38,
+ DW_AT_decl_column = 0x39,
+ DW_AT_decl_file = 0x3a,
+ DW_AT_decl_line = 0x3b,
+ DW_AT_declaration = 0x3c,
+ DW_AT_discr_list = 0x3d,
+ DW_AT_encoding = 0x3e,
+ DW_AT_external = 0x3f,
+ DW_AT_frame_base = 0x40,
+ DW_AT_friend = 0x41,
+ DW_AT_identifier_case = 0x42,
+ DW_AT_macro_info = 0x43,
+ DW_AT_namelist_items = 0x44,
+ DW_AT_priority = 0x45,
+ DW_AT_segment = 0x46,
+ DW_AT_specification = 0x47,
+ DW_AT_static_link = 0x48,
+ DW_AT_type = 0x49,
+ DW_AT_use_location = 0x4a,
+ DW_AT_variable_parameter = 0x4b,
+ DW_AT_virtuality = 0x4c,
+ DW_AT_vtable_elem_location = 0x4d,
+ // DWARF 3 values.
+ DW_AT_allocated = 0x4e,
+ DW_AT_associated = 0x4f,
+ DW_AT_data_location = 0x50,
+ DW_AT_stride = 0x51,
+ DW_AT_entry_pc = 0x52,
+ DW_AT_use_UTF8 = 0x53,
+ DW_AT_extension = 0x54,
+ DW_AT_ranges = 0x55,
+ DW_AT_trampoline = 0x56,
+ DW_AT_call_column = 0x57,
+ DW_AT_call_file = 0x58,
+ DW_AT_call_line = 0x59,
+ DW_AT_description = 0x5a,
+ DW_AT_binary_scale = 0x5b,
+ DW_AT_decimal_scale = 0x5c,
+ DW_AT_small = 0x5d,
+ DW_AT_descimal_sign = 0x5e,
+ DW_AT_digit_count = 0x5f,
+ DW_AT_picture_string = 0x60,
+ DW_AT_mutable = 0x61,
+ DW_AT_threads_scaled = 0x62,
+ DW_AT_explicit = 0x63,
+ DW_AT_object_pointer = 0x64,
+ DW_AT_endianity = 0x65,
+ DW_AT_elemental = 0x66,
+ DW_AT_pure = 0x67,
+ DW_AT_recursive = 0x68,
+ DW_AT_lo_user = 0x2000,
+ DW_AT_hi_user = 0x3fff,
+ // DWARF 4 values.
+ DW_AT_signature = 0x69,
+ DW_AT_main_subprogram = 0x6a,
+ DW_AT_data_bit_offset = 0x6b,
+ DW_AT_const_expr = 0x6c,
+ DW_AT_enum_class = 0x6d,
+ DW_AT_linkage_name = 0x6e,
+ // SGI/MIPS extensions.
+ DW_AT_MIPS_fde = 0x2001,
+ DW_AT_MIPS_loop_begin = 0x2002,
+ DW_AT_MIPS_tail_loop_begin = 0x2003,
+ DW_AT_MIPS_epilog_begin = 0x2004,
+ DW_AT_MIPS_loop_unroll_factor = 0x2005,
+ DW_AT_MIPS_software_pipeline_depth = 0x2006,
+ DW_AT_MIPS_linkage_name = 0x2007,
+ DW_AT_MIPS_stride = 0x2008,
+ DW_AT_MIPS_abstract_name = 0x2009,
+ DW_AT_MIPS_clone_origin = 0x200a,
+ DW_AT_MIPS_has_inlines = 0x200b,
+ // HP extensions.
+ DW_AT_HP_block_index = 0x2000,
+ DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde.
+ DW_AT_HP_actuals_stmt_list = 0x2010,
+ DW_AT_HP_proc_per_section = 0x2011,
+ DW_AT_HP_raw_data_ptr = 0x2012,
+ DW_AT_HP_pass_by_reference = 0x2013,
+ DW_AT_HP_opt_level = 0x2014,
+ DW_AT_HP_prof_version_id = 0x2015,
+ DW_AT_HP_opt_flags = 0x2016,
+ DW_AT_HP_cold_region_low_pc = 0x2017,
+ DW_AT_HP_cold_region_high_pc = 0x2018,
+ DW_AT_HP_all_variables_modifiable = 0x2019,
+ DW_AT_HP_linkage_name = 0x201a,
+ DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g.
+ // GNU extensions.
+ DW_AT_sf_names = 0x2101,
+ DW_AT_src_info = 0x2102,
+ DW_AT_mac_info = 0x2103,
+ DW_AT_src_coords = 0x2104,
+ DW_AT_body_begin = 0x2105,
+ DW_AT_body_end = 0x2106,
+ DW_AT_GNU_vector = 0x2107,
+ // http://gcc.gnu.org/wiki/ThreadSafetyAnnotation
+ DW_AT_GNU_guarded_by = 0x2108,
+ DW_AT_GNU_pt_guarded_by = 0x2109,
+ DW_AT_GNU_guarded = 0x210a,
+ DW_AT_GNU_pt_guarded = 0x210b,
+ DW_AT_GNU_locks_excluded = 0x210c,
+ DW_AT_GNU_exclusive_locks_required = 0x210d,
+ DW_AT_GNU_shared_locks_required = 0x210e,
+ // http://gcc.gnu.org/wiki/DwarfSeparateTypeInfo
+ DW_AT_GNU_odr_signature = 0x210f,
+ // http://gcc.gnu.org/wiki/TemplateParmsDwarf
+ DW_AT_GNU_template_name = 0x2110,
+ // http://www.dwarfstd.org/ShowIssue.php?issue=100909.2&type=open
+ DW_AT_GNU_call_site_value = 0x2111,
+ DW_AT_GNU_call_site_data_value = 0x2112,
+ DW_AT_GNU_call_site_target = 0x2113,
+ DW_AT_GNU_call_site_target_clobbered = 0x2114,
+ DW_AT_GNU_tail_call = 0x2115,
+ DW_AT_GNU_all_tail_call_sites = 0x2116,
+ DW_AT_GNU_all_call_sites = 0x2117,
+ DW_AT_GNU_all_source_call_sites = 0x2118,
+ DW_AT_GNU_macros = 0x2119,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_AT_GNU_dwo_name = 0x2130,
+ DW_AT_GNU_dwo_id = 0x2131,
+ DW_AT_GNU_ranges_base = 0x2132,
+ DW_AT_GNU_addr_base = 0x2133,
+ DW_AT_GNU_pubnames = 0x2134,
+ DW_AT_GNU_pubtypes = 0x2135,
+ // discriminator.
+ DW_AT_GNU_discriminator = 0x2136,
+ // VMS extensions.
+ DW_AT_VMS_rtnbeg_pd_address = 0x2201,
+ // UPC extension.
+ DW_AT_upc_threads_scaled = 0x3210,
+ // PGI (STMicroelectronics) extensions.
+ DW_AT_PGI_lbase = 0x3a00,
+ DW_AT_PGI_soffset = 0x3a01,
+ DW_AT_PGI_lstride = 0x3a02,
+ // Apple extensions.
+ DW_AT_APPLE_optimized = 0x3fe1,
+ DW_AT_APPLE_flags = 0x3fe2,
+ DW_AT_APPLE_isa = 0x3fe3,
+ DW_AT_APPLE_block = 0x3fe4,
+ DW_AT_APPLE_major_runtime_vers = 0x3fe5,
+ DW_AT_APPLE_runtime_class = 0x3fe6,
+ DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
+ DW_AT_APPLE_property_name = 0x3fe8,
+ DW_AT_APPLE_property_getter = 0x3fe9,
+ DW_AT_APPLE_property_setter = 0x3fea,
+ DW_AT_APPLE_property_attribute = 0x3feb,
+ DW_AT_APPLE_objc_complete_type = 0x3fec
+};
+
+
+// Line number opcodes.
+enum DwarfLineNumberOps {
+ DW_LNS_extended_op = 0,
+ DW_LNS_copy = 1,
+ DW_LNS_advance_pc = 2,
+ DW_LNS_advance_line = 3,
+ DW_LNS_set_file = 4,
+ DW_LNS_set_column = 5,
+ DW_LNS_negate_stmt = 6,
+ DW_LNS_set_basic_block = 7,
+ DW_LNS_const_add_pc = 8,
+ DW_LNS_fixed_advance_pc = 9,
+ // DWARF 3.
+ DW_LNS_set_prologue_end = 10,
+ DW_LNS_set_epilogue_begin = 11,
+ DW_LNS_set_isa = 12,
+ /* Experimental DWARF 5 extensions.
+ See http://wiki.dwarfstd.org/index.php?title=TwoLevelLineTables. */
+ DW_LNS_set_address_from_logical = 13, /* Actuals table only. */
+ DW_LNS_set_subprogram = 13, /* Logicals table only. */
+ DW_LNS_inlined_call = 14, /* Logicals table only. */
+ DW_LNS_pop_context = 15 /* Logicals table only. */
+};
+
+// Line number extended opcodes.
+enum DwarfLineNumberExtendedOps {
+ DW_LNE_end_sequence = 1,
+ DW_LNE_set_address = 2,
+ DW_LNE_define_file = 3,
+ DW_LNE_set_discriminator = 4,
+ DW_LNE_lo_user = 0x80,
+ DW_LNE_hi_user = 0xff,
+ // HP extensions.
+ DW_LNE_HP_negate_is_UV_update = 0x11,
+ DW_LNE_HP_push_context = 0x12,
+ DW_LNE_HP_pop_context = 0x13,
+ DW_LNE_HP_set_file_line_column = 0x14,
+ DW_LNE_HP_set_routine_name = 0x15,
+ DW_LNE_HP_set_sequence = 0x16,
+ DW_LNE_HP_negate_post_semantics = 0x17,
+ DW_LNE_HP_negate_function_exit = 0x18,
+ DW_LNE_HP_negate_front_end_logical = 0x19,
+ DW_LNE_HP_define_proc = 0x20
+};
+
+// Line number content type codes (DWARF 5).
+enum DwarfLineNumberContentType {
+ DW_LNCT_path = 1,
+ DW_LNCT_directory_index = 2,
+ DW_LNCT_timestamp = 3,
+ DW_LNCT_size = 4,
+ DW_LNCT_MD5 = 5,
+ // Experimental DWARF 5 extensions.
+ // See http://wiki.dwarfstd.org/index.php?title=TwoLevelLineTables.
+ DW_LNCT_subprogram_name = 6,
+ DW_LNCT_decl_file = 7,
+ DW_LNCT_decl_line = 8
+};
+
+// Type encoding names and codes
+enum DwarfEncoding {
+ DW_ATE_address =0x1,
+ DW_ATE_boolean =0x2,
+ DW_ATE_complex_float =0x3,
+ DW_ATE_float =0x4,
+ DW_ATE_signed =0x5,
+ DW_ATE_signed_char =0x6,
+ DW_ATE_unsigned =0x7,
+ DW_ATE_unsigned_char =0x8,
+ // DWARF3/DWARF3f
+ DW_ATE_imaginary_float =0x9,
+ DW_ATE_packed_decimal =0xa,
+ DW_ATE_numeric_string =0xb,
+ DW_ATE_edited =0xc,
+ DW_ATE_signed_fixed =0xd,
+ DW_ATE_unsigned_fixed =0xe,
+ DW_ATE_decimal_float =0xf,
+ // DWARF4
+ DW_ATR_UTF =0x10,
+ DW_ATE_lo_user =0x80,
+ DW_ATE_hi_user =0xff
+};
+
+// Location virtual machine opcodes
+enum DwarfOpcode {
+ DW_OP_addr =0x03,
+ DW_OP_deref =0x06,
+ DW_OP_const1u =0x08,
+ DW_OP_const1s =0x09,
+ DW_OP_const2u =0x0a,
+ DW_OP_const2s =0x0b,
+ DW_OP_const4u =0x0c,
+ DW_OP_const4s =0x0d,
+ DW_OP_const8u =0x0e,
+ DW_OP_const8s =0x0f,
+ DW_OP_constu =0x10,
+ DW_OP_consts =0x11,
+ DW_OP_dup =0x12,
+ DW_OP_drop =0x13,
+ DW_OP_over =0x14,
+ DW_OP_pick =0x15,
+ DW_OP_swap =0x16,
+ DW_OP_rot =0x17,
+ DW_OP_xderef =0x18,
+ DW_OP_abs =0x19,
+ DW_OP_and =0x1a,
+ DW_OP_div =0x1b,
+ DW_OP_minus =0x1c,
+ DW_OP_mod =0x1d,
+ DW_OP_mul =0x1e,
+ DW_OP_neg =0x1f,
+ DW_OP_not =0x20,
+ DW_OP_or =0x21,
+ DW_OP_plus =0x22,
+ DW_OP_plus_uconst =0x23,
+ DW_OP_shl =0x24,
+ DW_OP_shr =0x25,
+ DW_OP_shra =0x26,
+ DW_OP_xor =0x27,
+ DW_OP_bra =0x28,
+ DW_OP_eq =0x29,
+ DW_OP_ge =0x2a,
+ DW_OP_gt =0x2b,
+ DW_OP_le =0x2c,
+ DW_OP_lt =0x2d,
+ DW_OP_ne =0x2e,
+ DW_OP_skip =0x2f,
+ DW_OP_lit0 =0x30,
+ DW_OP_lit1 =0x31,
+ DW_OP_lit2 =0x32,
+ DW_OP_lit3 =0x33,
+ DW_OP_lit4 =0x34,
+ DW_OP_lit5 =0x35,
+ DW_OP_lit6 =0x36,
+ DW_OP_lit7 =0x37,
+ DW_OP_lit8 =0x38,
+ DW_OP_lit9 =0x39,
+ DW_OP_lit10 =0x3a,
+ DW_OP_lit11 =0x3b,
+ DW_OP_lit12 =0x3c,
+ DW_OP_lit13 =0x3d,
+ DW_OP_lit14 =0x3e,
+ DW_OP_lit15 =0x3f,
+ DW_OP_lit16 =0x40,
+ DW_OP_lit17 =0x41,
+ DW_OP_lit18 =0x42,
+ DW_OP_lit19 =0x43,
+ DW_OP_lit20 =0x44,
+ DW_OP_lit21 =0x45,
+ DW_OP_lit22 =0x46,
+ DW_OP_lit23 =0x47,
+ DW_OP_lit24 =0x48,
+ DW_OP_lit25 =0x49,
+ DW_OP_lit26 =0x4a,
+ DW_OP_lit27 =0x4b,
+ DW_OP_lit28 =0x4c,
+ DW_OP_lit29 =0x4d,
+ DW_OP_lit30 =0x4e,
+ DW_OP_lit31 =0x4f,
+ DW_OP_reg0 =0x50,
+ DW_OP_reg1 =0x51,
+ DW_OP_reg2 =0x52,
+ DW_OP_reg3 =0x53,
+ DW_OP_reg4 =0x54,
+ DW_OP_reg5 =0x55,
+ DW_OP_reg6 =0x56,
+ DW_OP_reg7 =0x57,
+ DW_OP_reg8 =0x58,
+ DW_OP_reg9 =0x59,
+ DW_OP_reg10 =0x5a,
+ DW_OP_reg11 =0x5b,
+ DW_OP_reg12 =0x5c,
+ DW_OP_reg13 =0x5d,
+ DW_OP_reg14 =0x5e,
+ DW_OP_reg15 =0x5f,
+ DW_OP_reg16 =0x60,
+ DW_OP_reg17 =0x61,
+ DW_OP_reg18 =0x62,
+ DW_OP_reg19 =0x63,
+ DW_OP_reg20 =0x64,
+ DW_OP_reg21 =0x65,
+ DW_OP_reg22 =0x66,
+ DW_OP_reg23 =0x67,
+ DW_OP_reg24 =0x68,
+ DW_OP_reg25 =0x69,
+ DW_OP_reg26 =0x6a,
+ DW_OP_reg27 =0x6b,
+ DW_OP_reg28 =0x6c,
+ DW_OP_reg29 =0x6d,
+ DW_OP_reg30 =0x6e,
+ DW_OP_reg31 =0x6f,
+ DW_OP_breg0 =0x70,
+ DW_OP_breg1 =0x71,
+ DW_OP_breg2 =0x72,
+ DW_OP_breg3 =0x73,
+ DW_OP_breg4 =0x74,
+ DW_OP_breg5 =0x75,
+ DW_OP_breg6 =0x76,
+ DW_OP_breg7 =0x77,
+ DW_OP_breg8 =0x78,
+ DW_OP_breg9 =0x79,
+ DW_OP_breg10 =0x7a,
+ DW_OP_breg11 =0x7b,
+ DW_OP_breg12 =0x7c,
+ DW_OP_breg13 =0x7d,
+ DW_OP_breg14 =0x7e,
+ DW_OP_breg15 =0x7f,
+ DW_OP_breg16 =0x80,
+ DW_OP_breg17 =0x81,
+ DW_OP_breg18 =0x82,
+ DW_OP_breg19 =0x83,
+ DW_OP_breg20 =0x84,
+ DW_OP_breg21 =0x85,
+ DW_OP_breg22 =0x86,
+ DW_OP_breg23 =0x87,
+ DW_OP_breg24 =0x88,
+ DW_OP_breg25 =0x89,
+ DW_OP_breg26 =0x8a,
+ DW_OP_breg27 =0x8b,
+ DW_OP_breg28 =0x8c,
+ DW_OP_breg29 =0x8d,
+ DW_OP_breg30 =0x8e,
+ DW_OP_breg31 =0x8f,
+ DW_OP_regX =0x90,
+ DW_OP_fbreg =0x91,
+ DW_OP_bregX =0x92,
+ DW_OP_piece =0x93,
+ DW_OP_deref_size =0x94,
+ DW_OP_xderef_size =0x95,
+ DW_OP_nop =0x96,
+ // DWARF3/DWARF3f
+ DW_OP_push_object_address =0x97,
+ DW_OP_call2 =0x98,
+ DW_OP_call4 =0x99,
+ DW_OP_call_ref =0x9a,
+ DW_OP_form_tls_address =0x9b,
+ DW_OP_call_frame_cfa =0x9c,
+ DW_OP_bit_piece =0x9d,
+ // DWARF4
+ DW_OP_implicit_value =0x9e,
+ DW_OP_stack_value =0x9f,
+ DW_OP_lo_user =0xe0,
+ DW_OP_hi_user =0xff,
+ // GNU extensions
+ DW_OP_GNU_push_tls_address =0xe0,
+ DW_OP_GNU_uninit =0xf0,
+ DW_OP_GNU_encoded_addr =0xf1,
+ // http://www.dwarfstd.org/ShowIssue.php?issue=100831.1&type=open
+ DW_OP_GNU_implicit_pointer =0xf2,
+ // http://www.dwarfstd.org/ShowIssue.php?issue=100909.1&type=open
+ DW_OP_GNU_entry_value =0xf3,
+ // Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
+ DW_OP_GNU_addr_index =0xfb,
+ DW_OP_GNU_const_index =0xfc
+};
+
+// Section identifiers for DWP files
+enum DwarfSectionId {
+ DW_SECT_INFO = 1,
+ DW_SECT_TYPES = 2,
+ DW_SECT_ABBREV = 3,
+ DW_SECT_LINE = 4,
+ DW_SECT_LOC = 5,
+ DW_SECT_STR_OFFSETS = 6,
+ DW_SECT_MACINFO = 7,
+ DW_SECT_MACRO = 8
+};
+
+// For .eh_frame, see: http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+enum PointerEncoding {
+ DW_EH_PE_absptr = 0x00,
+ DW_EH_PE_uleb128 = 0x01,
+ DW_EH_PE_udata2 = 0x02,
+ DW_EH_PE_udata4 = 0x03,
+ DW_EH_PE_udata8 = 0x04,
+ DW_EH_PE_sleb128 = 0x09,
+ DW_EH_PE_sdata2 = 0x0A,
+ DW_EH_PE_sdata4 = 0x0B,
+ DW_EH_PE_sdata8 = 0x0C,
+ DW_EH_PE_FORMAT_MASK = 0x0f,
+
+ DW_EH_PE_pcrel = 0x10,
+ DW_EH_PE_textrel = 0x20,
+ DW_EH_PE_datarel = 0x30,
+ DW_EH_PE_funcrel = 0x40,
+ DW_EH_PE_aligned = 0x50,
+ DW_EH_PE_APPLICATION_MASK = 0x70,
+
+ // This acts as a flag.
+ DW_EH_PE_indirect = 0x80,
+
+ DW_EH_PE_omit = 0xff
+};
+
+} // namespace dwarf2reader
+#endif // UTIL_DEBUGINFO_DWARF2ENUMS_H__
diff --git a/src/elf.cc b/src/elf.cc
new file mode 100644
index 0000000..1f9efa5
--- /dev/null
+++ b/src/elf.cc
@@ -0,0 +1,1432 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include "absl/numeric/int128.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "third_party/freebsd_elf/elf.h"
+#include "bloaty.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+
+// Not present in the FreeBSD ELF headers.
+#define NT_GNU_BUILD_ID 3
+
+using absl::string_view;
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+
+namespace bloaty {
+
+namespace {
+
+uint64_t CheckedAdd(uint64_t a, uint64_t b) {
+ absl::uint128 a_128(a), b_128(b);
+ absl::uint128 c_128 = a_128 + b_128;
+ if (c_128 > UINT64_MAX) {
+ THROW("integer overflow in addition");
+ }
+ return static_cast<uint64_t>(c_128);
+}
+
+uint64_t CheckedMul(uint64_t a, uint64_t b) {
+ absl::uint128 a_128(a), b_128(b);
+ absl::uint128 c = a * b;
+ if (c > UINT64_MAX) {
+ THROW("integer overflow in multiply");
+ }
+ return static_cast<uint64_t>(c);
+}
+
+struct ByteSwapFunc {
+ template <class T>
+ T operator()(T val) {
+ return ByteSwap(val);
+ }
+};
+
+struct NullFunc {
+ template <class T>
+ T operator()(T val) { return val; }
+};
+
+size_t StringViewToSize(string_view str) {
+ size_t ret;
+ if (!absl::SimpleAtoi(str, &ret)) {
+ THROWF("couldn't convert string '$0' to integer.", str);
+ }
+ return ret;
+}
+
+template <class T>
+const T* GetStructPointer(string_view data) {
+ if (sizeof(T) > data.size()) {
+ THROW("Premature EOF reading ELF data.");
+ }
+ return reinterpret_cast<const T*>(data.data());
+}
+
+template <class T>
+void AdvancePastStruct(string_view* data) {
+ *data = data->substr(sizeof(T));
+}
+
+static string_view StrictSubstr(string_view data, size_t off, size_t n) {
+ uint64_t end = CheckedAdd(off, n);
+ if (end > data.size()) {
+ THROW("ELF region out-of-bounds");
+ }
+ return data.substr(off, n);
+}
+
+static string_view StrictSubstr(string_view data, size_t off) {
+ if (off > data.size()) {
+ THROW("ELF region out-of-bounds");
+ }
+ return data.substr(off);
+}
+
+static size_t AlignUp(size_t offset, size_t granularity) {
+ // Granularity must be a power of two.
+ return (offset + granularity - 1) & ~(granularity - 1);
+}
+
+// ElfFile /////////////////////////////////////////////////////////////////////
+
+// For parsing the pieces we need out of an ELF file (.o, .so, and binaries).
+
+class ElfFile {
+ public:
+ ElfFile(string_view data) : data_(data) {
+ ok_ = Initialize();
+ }
+
+ bool IsOpen() { return ok_; }
+
+ // Regions of the file where different headers live.
+ string_view entire_file() const { return data_; }
+ string_view header_region() const { return header_region_; }
+ string_view section_headers() const { return section_headers_; }
+ string_view segment_headers() const { return segment_headers_; }
+
+ const Elf64_Ehdr& header() const { return header_; }
+ Elf64_Xword section_count() const { return section_count_; }
+ Elf64_Xword section_string_index() const { return section_string_index_; }
+
+ // Represents an ELF segment (data used by the loader / dynamic linker).
+ class Segment {
+ public:
+ const Elf64_Phdr& header() const { return header_; }
+ string_view contents() const { return contents_; }
+ string_view range() const { return range_; }
+
+ private:
+ friend class ElfFile;
+ Elf64_Phdr header_;
+ string_view contents_;
+ string_view range_;
+ };
+
+ // Represents an ELF section (.text, .data, .bss, etc.)
+ class Section {
+ public:
+ const Elf64_Shdr& header() const { return header_; }
+ string_view contents() const { return contents_; }
+ string_view range() const { return range_; }
+
+ // For SHN_UNDEF (undefined name), returns [nullptr, 0].
+ string_view GetName() const;
+
+ // Requires: this is a section with fixed-width entries (symbol table,
+ // relocation table, etc).
+ Elf64_Word GetEntryCount() const;
+
+ // Requires: header().sh_type == SHT_STRTAB.
+ string_view ReadString(Elf64_Word index) const;
+
+ // Requires: header().sh_type == SHT_SYMTAB || header().sh_type ==
+ // SHT_DYNSYM
+ void ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
+ string_view* file_range) const;
+
+ // Requires: header().sh_type == SHT_REL
+ void ReadRelocation(Elf64_Word index, Elf64_Rel* rel,
+ string_view* file_range) const;
+
+ // Requires: header().sh_type == SHT_RELA
+ void ReadRelocationWithAddend(Elf64_Word index, Elf64_Rela* rel,
+ string_view* file_range) const;
+
+ const ElfFile& elf() const { return *elf_; }
+
+ private:
+ friend class ElfFile;
+ const ElfFile* elf_;
+ Elf64_Shdr header_;
+ string_view contents_;
+ string_view range_;
+ };
+
+ class NoteIter {
+ public:
+ NoteIter(const Section& section)
+ : elf_(&section.elf()), remaining_(section.contents()) {
+ Next();
+ }
+
+ bool IsDone() const { return done_; }
+ uint32_t type() const { return type_; }
+ string_view name() const { return name_; }
+ string_view descriptor() const { return descriptor_; }
+
+ void Next();
+
+ public:
+ const ElfFile* elf_;
+ string_view name_;
+ string_view descriptor_;
+ string_view remaining_;
+ uint32_t type_;
+ bool done_ = false;
+ };
+
+ void ReadSegment(Elf64_Word index, Segment* segment) const;
+ void ReadSection(Elf64_Word index, Section* section) const;
+
+ bool FindSectionByName(absl::string_view name, Section* section) const;
+
+ bool is_64bit() const { return is_64bit_; }
+ bool is_native_endian() const { return is_native_endian_; }
+
+ private:
+ friend class Section;
+
+ bool Initialize();
+
+ string_view GetRegion(uint64_t start, uint64_t n) const {
+ return StrictSubstr(data_, start, n);
+ }
+
+ // Shared code for reading various ELF structures. Handles endianness
+ // conversion and 32->64 bit conversion, when necessary.
+ class StructReader {
+ public:
+ StructReader(const ElfFile& elf, string_view data)
+ : elf_(elf), data_(data) {}
+
+ template <class T32, class T64, class Munger>
+ void Read(uint64_t offset, Munger /*munger*/, absl::string_view* range,
+ T64* out) const {
+ if (elf_.is_64bit() && elf_.is_native_endian()) {
+ return Memcpy(offset, range, out);
+ } else {
+ return ReadFallback<T32, T64, Munger>(offset, range, out);
+ }
+ }
+
+ private:
+ const ElfFile& elf_;
+ string_view data_;
+
+ template <class T32, class T64, class Munger>
+ void ReadFallback(uint64_t offset, absl::string_view* range,
+ T64* out) const;
+
+ template <class T>
+ void Memcpy(uint64_t offset, absl::string_view* range, T* out) const {
+ uint64_t end = CheckedAdd(offset, sizeof(T));
+ if (end > data_.size()) {
+ THROW("out-of-bounds read to ELF file");
+ }
+ if (range) {
+ *range = absl::string_view(data_.data() + offset, sizeof(*out));
+ }
+ memcpy(out, data_.data() + offset, sizeof(*out));
+ }
+ };
+
+ template <class T32, class T64, class Munger>
+ void ReadStruct(absl::string_view contents, uint64_t offset, Munger munger,
+ absl::string_view* range, T64* out) const {
+ StructReader(*this, contents).Read<T32>(offset, munger, range, out);
+ }
+
+ bool ok_;
+ bool is_64bit_;
+ bool is_native_endian_;
+ string_view data_;
+ Elf64_Ehdr header_;
+ Elf64_Xword section_count_;
+ Elf64_Xword section_string_index_;
+ string_view header_region_;
+ string_view section_headers_;
+ string_view segment_headers_;
+ Section section_name_table_;
+};
+
+// ELF uses different structure definitions for 32/64 bit files. The sizes of
+// members are different, and members are even in a different order!
+//
+// These mungers can convert 32 bit structures to 64-bit ones. They can also
+// handle converting endianness. We use templates so a single template function
+// can handle all three patterns:
+//
+// 32 native -> 64 native
+// 32 swapped -> 64 native
+// 64 swapped -> 64 native
+
+struct EhdrMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Ehdr* to, Func func) {
+ memmove(&to->e_ident[0], &from.e_ident[0], EI_NIDENT);
+ to->e_type = func(from.e_type);
+ to->e_machine = func(from.e_machine);
+ to->e_version = func(from.e_version);
+ to->e_entry = func(from.e_entry);
+ to->e_phoff = func(from.e_phoff);
+ to->e_shoff = func(from.e_shoff);
+ to->e_flags = func(from.e_flags);
+ to->e_ehsize = func(from.e_ehsize);
+ to->e_phentsize = func(from.e_phentsize);
+ to->e_phnum = func(from.e_phnum);
+ to->e_shentsize = func(from.e_shentsize);
+ to->e_shnum = func(from.e_shnum);
+ to->e_shstrndx = func(from.e_shstrndx);
+ }
+};
+
+struct ShdrMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Shdr* to, Func func) {
+ to->sh_name = func(from.sh_name);
+ to->sh_type = func(from.sh_type);
+ to->sh_flags = func(from.sh_flags);
+ to->sh_addr = func(from.sh_addr);
+ to->sh_offset = func(from.sh_offset);
+ to->sh_size = func(from.sh_size);
+ to->sh_link = func(from.sh_link);
+ to->sh_info = func(from.sh_info);
+ to->sh_addralign = func(from.sh_addralign);
+ to->sh_entsize = func(from.sh_entsize);
+ }
+};
+
+struct PhdrMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Phdr* to, Func func) {
+ to->p_type = func(from.p_type);
+ to->p_flags = func(from.p_flags);
+ to->p_offset = func(from.p_offset);
+ to->p_vaddr = func(from.p_vaddr);
+ to->p_paddr = func(from.p_paddr);
+ to->p_filesz = func(from.p_filesz);
+ to->p_memsz = func(from.p_memsz);
+ to->p_align = func(from.p_align);
+ }
+};
+
+struct SymMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Sym* to, Func func) {
+ to->st_name = func(from.st_name);
+ to->st_info = func(from.st_info);
+ to->st_other = func(from.st_other);
+ to->st_shndx = func(from.st_shndx);
+ to->st_value = func(from.st_value);
+ to->st_size = func(from.st_size);
+ }
+};
+
+struct RelMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Rel* to, Func func) {
+ to->r_offset = func(from.r_offset);
+ to->r_info = func(from.r_info);
+ }
+};
+
+struct RelaMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Rela* to, Func func) {
+ to->r_offset = func(from.r_offset);
+ to->r_info = func(from.r_info);
+ to->r_addend = func(from.r_addend);
+ }
+};
+
+struct NoteMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Nhdr* to, Func func) {
+ to->n_namesz = func(from.n_namesz);
+ to->n_descsz = func(from.n_descsz);
+ to->n_type = func(from.n_type);
+ }
+};
+
+template <class T32, class T64, class Munger>
+void ElfFile::StructReader::ReadFallback(uint64_t offset,
+ absl::string_view* range,
+ T64* out) const {
+ if (elf_.is_64bit()) {
+ assert(!elf_.is_native_endian());
+ Memcpy(offset, range, out);
+ Munger()(*out, out, ByteSwapFunc());
+ } else {
+ T32 data32;
+ Memcpy(offset, range, &data32);
+ if (elf_.is_native_endian()) {
+ Munger()(data32, out, NullFunc());
+ } else {
+ Munger()(data32, out, ByteSwapFunc());
+ }
+ }
+}
+
+string_view ElfFile::Section::GetName() const {
+ if (header_.sh_name == SHN_UNDEF) {
+ return string_view(nullptr, 0);
+ }
+ return elf_->section_name_table_.ReadString(header_.sh_name);
+}
+
+string_view ElfFile::Section::ReadString(Elf64_Word index) const {
+ assert(header().sh_type == SHT_STRTAB);
+
+ if (index == SHN_UNDEF || index >= contents_.size()) {
+ THROWF("can't read index $0 from strtab, total size is $1", index,
+ contents_.size());
+ }
+
+ string_view ret = StrictSubstr(contents_, index);
+
+ const char* null_pos =
+ static_cast<const char*>(memchr(ret.data(), '\0', ret.size()));
+
+ if (null_pos == NULL) {
+ THROW("no NULL terminator found");
+ }
+
+ size_t len = null_pos - ret.data();
+ ret = ret.substr(0, len);
+ return ret;
+}
+
+Elf64_Word ElfFile::Section::GetEntryCount() const {
+ if (header_.sh_entsize == 0) {
+ THROW("sh_entsize is zero");
+ }
+ return contents_.size() / header_.sh_entsize;
+}
+
+void ElfFile::Section::ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
+ string_view* file_range) const {
+ assert(header().sh_type == SHT_SYMTAB || header().sh_type == SHT_DYNSYM);
+ size_t offset = header_.sh_entsize * index;
+ elf_->ReadStruct<Elf32_Sym>(contents(), offset, SymMunger(), file_range, sym);
+}
+
+void ElfFile::Section::ReadRelocation(Elf64_Word index, Elf64_Rel* rel,
+ string_view* file_range) const {
+ assert(header().sh_type == SHT_REL);
+ size_t offset = header_.sh_entsize * index;
+ elf_->ReadStruct<Elf32_Rel>(contents(), offset, RelMunger(), file_range, rel);
+}
+
+void ElfFile::Section::ReadRelocationWithAddend(Elf64_Word index,
+ Elf64_Rela* rela,
+ string_view* file_range) const {
+ assert(header().sh_type == SHT_RELA);
+ size_t offset = header_.sh_entsize * index;
+ elf_->ReadStruct<Elf32_Rela>(contents(), offset, RelaMunger(), file_range,
+ rela);
+}
+
+void ElfFile::NoteIter::Next() {
+ if (remaining_.empty()) {
+ done_ = true;
+ return;
+ }
+
+ Elf_Note note;
+ elf_->ReadStruct<Elf_Note>(remaining_, 0, NoteMunger(), nullptr, &note);
+
+ // 32-bit and 64-bit note are the same size, so we don't have to treat
+ // them separately when advancing.
+ AdvancePastStruct<Elf_Note>(&remaining_);
+
+ type_ = note.n_type;
+ name_ = StrictSubstr(remaining_, 0, note.n_namesz);
+
+ // Size might include NULL terminator.
+ if (name_[name_.size() - 1] == 0) {
+ name_ = name_.substr(0, name_.size() - 1);
+ }
+
+ remaining_ = StrictSubstr(remaining_, AlignUp(note.n_namesz, 4));
+ descriptor_ = StrictSubstr(remaining_, 0, note.n_descsz);
+ remaining_ = StrictSubstr(remaining_, AlignUp(note.n_descsz, 4));
+}
+
+bool ElfFile::Initialize() {
+ if (data_.size() < EI_NIDENT) {
+ return false;
+ }
+
+ unsigned char ident[EI_NIDENT];
+ memcpy(ident, data_.data(), EI_NIDENT);
+
+ if (memcmp(ident, "\177ELF", 4) != 0) {
+ // Not an ELF file.
+ return false;
+ }
+
+ switch (ident[EI_CLASS]) {
+ case ELFCLASS32:
+ is_64bit_ = false;
+ break;
+ case ELFCLASS64:
+ is_64bit_ = true;
+ break;
+ default:
+ THROWF("unexpected ELF class: $0", ident[EI_CLASS]);
+ }
+
+ switch (ident[EI_DATA]) {
+ case ELFDATA2LSB:
+ is_native_endian_ = IsLittleEndian();
+ break;
+ case ELFDATA2MSB:
+ is_native_endian_ = !IsLittleEndian();
+ break;
+ default:
+ THROWF("unexpected ELF data: $0", ident[EI_DATA]);
+ }
+
+ absl::string_view range;
+ ReadStruct<Elf32_Ehdr>(entire_file(), 0, EhdrMunger(), &range, &header_);
+
+ Section section0;
+ bool has_section0 = 0;
+
+ // ELF extensions: if certain fields overflow, we have to find their true data
+ // from elsewhere. For more info see:
+ // https://docs.oracle.com/cd/E19683-01/817-3677/chapter6-94076/index.html
+ if (header_.e_shoff > 0 &&
+ data_.size() > (header_.e_shoff + header_.e_shentsize)) {
+ section_count_ = 1;
+ ReadSection(0, &section0);
+ has_section0 = true;
+ }
+
+ section_count_ = header_.e_shnum;
+ section_string_index_ = header_.e_shstrndx;
+
+ if (section_count_ == 0 && has_section0) {
+ section_count_ = section0.header().sh_size;
+ }
+
+ if (section_string_index_ == SHN_XINDEX && has_section0) {
+ section_string_index_ = section0.header().sh_link;
+ }
+
+ header_region_ = GetRegion(0, header_.e_ehsize);
+ section_headers_ = GetRegion(header_.e_shoff,
+ CheckedMul(header_.e_shentsize, section_count_));
+ segment_headers_ = GetRegion(
+ header_.e_phoff, CheckedMul(header_.e_phentsize, header_.e_phnum));
+
+ if (section_count_ > 0) {
+ ReadSection(section_string_index_, &section_name_table_);
+ if (section_name_table_.header().sh_type != SHT_STRTAB) {
+ THROW("section string index pointed to non-strtab");
+ }
+ }
+
+ return true;
+}
+
+void ElfFile::ReadSegment(Elf64_Word index, Segment* segment) const {
+ if (index >= header_.e_phnum) {
+ THROWF("segment $0 doesn't exist, only $1 segments", index,
+ header_.e_phnum);
+ }
+
+ Elf64_Phdr* header = &segment->header_;
+ ReadStruct<Elf32_Phdr>(
+ entire_file(),
+ CheckedAdd(header_.e_phoff, CheckedMul(header_.e_phentsize, index)),
+ PhdrMunger(), &segment->range_, header);
+ segment->contents_ = GetRegion(header->p_offset, header->p_filesz);
+}
+
+void ElfFile::ReadSection(Elf64_Word index, Section* section) const {
+ if (index >= section_count_) {
+ THROWF("tried to read section $0, but there are only $1", index,
+ section_count_);
+ }
+
+ Elf64_Shdr* header = &section->header_;
+ ReadStruct<Elf32_Shdr>(
+ entire_file(),
+ CheckedAdd(header_.e_shoff, CheckedMul(header_.e_shentsize, index)),
+ ShdrMunger(), &section->range_, header);
+
+ if (header->sh_type == SHT_NOBITS) {
+ section->contents_ = string_view();
+ } else {
+ section->contents_ = GetRegion(header->sh_offset, header->sh_size);
+ }
+
+ section->elf_ = this;
+}
+
+bool ElfFile::FindSectionByName(absl::string_view name, Section* section) const {
+ for (Elf64_Word i = 0; i < section_count_; i++) {
+ ReadSection(i, section);
+ if (section->GetName() == name) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+// ArFile //////////////////////////////////////////////////////////////////////
+
+// For parsing .a files (static libraries).
+//
+// The best documentation I've been able to find for this file format is
+// Wikipedia: https://en.wikipedia.org/wiki/Ar_(Unix)
+//
+// So far we only parse the System V / GNU variant.
+
+class ArFile {
+ public:
+ ArFile(string_view data)
+ : magic_(StrictSubstr(data, 0, kMagicSize)),
+ contents_(data.substr(std::min<size_t>(data.size(), kMagicSize))) {}
+
+ bool IsOpen() const { return magic() == string_view(kMagic); }
+
+ string_view magic() const { return magic_; }
+ string_view contents() const { return contents_; }
+
+ struct MemberFile {
+ enum {
+ kSymbolTable, // Stores a symbol table.
+ kLongFilenameTable, // Stores long filenames, users should ignore.
+ kNormal, // Regular data file.
+ } file_type;
+ string_view filename; // Only when file_type == kNormal
+ size_t size;
+ string_view header;
+ string_view contents;
+ };
+
+ class MemberReader {
+ public:
+ MemberReader(const ArFile& ar) : remaining_(ar.contents()) {}
+ bool ReadMember(MemberFile* file);
+ bool IsEof() const { return remaining_.size() == 0; }
+
+ private:
+ string_view Consume(size_t n) {
+ if (remaining_.size() < n) {
+ THROW("premature end of file");
+ }
+ string_view ret = remaining_.substr(0, n);
+ remaining_.remove_prefix(n);
+ return ret;
+ }
+
+ string_view long_filenames_;
+ string_view remaining_;
+ };
+
+ private:
+ const string_view magic_;
+ const string_view contents_;
+
+ static constexpr const char* kMagic = "!<arch>\n";
+ static constexpr int kMagicSize = 8;
+};
+
+bool ArFile::MemberReader::ReadMember(MemberFile* file) {
+ struct Header {
+ char file_id[16];
+ char modified_timestamp[12];
+ char owner_id[6];
+ char group_id[6];
+ char mode[8];
+ char size[10];
+ char end[2];
+ };
+
+ if (remaining_.size() == 0) {
+ return false;
+ } else if (remaining_.size() < sizeof(Header)) {
+ THROW("Premature EOF in AR data");
+ }
+
+ const Header* header = reinterpret_cast<const Header*>(remaining_.data());
+ file->header = Consume(sizeof(Header));
+
+ string_view file_id(&header->file_id[0], sizeof(header->file_id));
+ string_view size_str(&header->size[0], sizeof(header->size));
+ file->size = StringViewToSize(size_str);
+ file->contents = Consume(file->size);
+ file->file_type = MemberFile::kNormal;
+
+ if (file_id[0] == '/') {
+ // Special filename, internal to the format.
+ if (file_id[1] == ' ') {
+ file->file_type = MemberFile::kSymbolTable;
+ } else if (file_id[1] == '/') {
+ file->file_type = MemberFile::kLongFilenameTable;
+ long_filenames_ = file->contents;
+ } else if (isdigit(file_id[1])) {
+ size_t offset = StringViewToSize(file_id.substr(1));
+ size_t end = long_filenames_.find('/', offset);
+
+ if (end == std::string::npos) {
+ THROW("Unterminated long filename");
+ }
+
+ file->filename = long_filenames_.substr(offset, end - offset);
+ } else {
+ THROW("Unexpected special filename in AR archive");
+ }
+ } else {
+ // Normal filename, slash-terminated.
+ size_t slash = file_id.find('/');
+
+ if (slash == std::string::npos) {
+ THROW("BSD-style AR not yet implemented");
+ }
+
+ file->filename = file_id.substr(0, slash);
+ }
+
+ return true;
+}
+
+void MaybeAddFileRange(const char* analyzer, RangeSink* sink, string_view label,
+ string_view range) {
+ if (sink) {
+ sink->AddFileRange(analyzer, label, range);
+ }
+}
+
+// Iterate over each ELF file, agnostic to whether it is inside a .a (AR) file
+// or not.
+template <class Func>
+void ForEachElf(const InputFile& file, RangeSink* sink, Func func) {
+ ArFile ar_file(file.data());
+ unsigned long index_base = 0;
+
+ if (ar_file.IsOpen()) {
+ ArFile::MemberFile member;
+ ArFile::MemberReader reader(ar_file);
+
+ MaybeAddFileRange("ar_archive", sink, "[AR Headers]", ar_file.magic());
+
+ while (reader.ReadMember(&member)) {
+ MaybeAddFileRange("ar_archive", sink, "[AR Headers]", member.header);
+ switch (member.file_type) {
+ case ArFile::MemberFile::kNormal: {
+ ElfFile elf(member.contents);
+ if (elf.IsOpen()) {
+ func(elf, member.filename, index_base);
+ index_base += elf.section_count();
+ } else {
+ MaybeAddFileRange("ar_archive", sink, "[AR Non-ELF Member File]",
+ member.contents);
+ }
+ break;
+ }
+ case ArFile::MemberFile::kSymbolTable:
+ MaybeAddFileRange("ar_archive", sink, "[AR Symbol Table]",
+ member.contents);
+ break;
+ case ArFile::MemberFile::kLongFilenameTable:
+ MaybeAddFileRange("ar_archive", sink, "[AR Headers]",
+ member.contents);
+ break;
+ }
+ }
+ } else {
+ ElfFile elf(file.data());
+ if (!elf.IsOpen()) {
+ THROWF("Not an ELF or Archive file: $0", file.filename());
+ }
+
+ func(elf, file.filename(), index_base);
+ }
+}
+
+// For object files, addresses are relative to the section they live in, which
+// is indicated by ndx. We split this into:
+//
+// - 24 bits for index (up to 16M symbols with -ffunction-sections)
+// - 40 bits for address (up to 1TB section)
+static uint64_t ToVMAddr(size_t addr, long ndx, bool is_object) {
+ if (is_object) {
+ if (ndx >= 1 << 24) {
+ THROW("ndx overflow: too many sections");
+ }
+ if (addr >= 1UL << 40) {
+ THROW("address overflow: section too big");
+ }
+ return (ndx << 40) | addr;
+ } else {
+ return addr;
+ }
+}
+
+static bool IsArchiveFile(string_view data) {
+ ArFile ar(data);
+ return ar.IsOpen();
+}
+
+static bool IsObjectFile(string_view data) {
+ ElfFile elf(data);
+ return IsArchiveFile(data) || (elf.IsOpen() && elf.header().e_type == ET_REL);
+}
+
+static void CheckNotObject(const char* source, RangeSink* sink) {
+ if (IsObjectFile(sink->input_file().data())) {
+ THROWF(
+ "can't use data source '$0' on object files (only binaries and shared "
+ "libraries)",
+ source);
+ }
+}
+
+static void ElfMachineToCapstone(Elf64_Half e_machine, cs_arch* arch,
+ cs_mode* mode) {
+ switch (e_machine) {
+ case EM_386:
+ *arch = CS_ARCH_X86;
+ *mode = CS_MODE_32;
+ break;
+ case EM_X86_64:
+ *arch = CS_ARCH_X86;
+ *mode = CS_MODE_64;
+ break;
+
+ // These aren't tested, but we include them on the off-chance
+ // that it will work.
+ case EM_ARM:
+ *arch = CS_ARCH_ARM;
+ *mode = CS_MODE_LITTLE_ENDIAN;
+ break;
+ case EM_AARCH64:
+ *arch = CS_ARCH_ARM64;
+ *mode = CS_MODE_ARM;
+ break;
+ case EM_MIPS:
+ *arch = CS_ARCH_MIPS;
+ break;
+ case EM_PPC:
+ *arch = CS_ARCH_PPC;
+ *mode = CS_MODE_32;
+ break;
+ case EM_PPC64:
+ *arch = CS_ARCH_PPC;
+ *mode = CS_MODE_64;
+ break;
+ case EM_SPARC:
+ *arch = CS_ARCH_SPARC;
+ *mode = CS_MODE_BIG_ENDIAN;
+ break;
+ case EM_SPARCV9:
+ *arch = CS_ARCH_SPARC;
+ *mode = CS_MODE_V9;
+ break;
+ default:
+ THROWF("Unknown ELF machine value: $0'", e_machine);
+ }
+}
+
+static void ReadElfArchMode(const InputFile& file, cs_arch* arch, cs_mode* mode) {
+ ForEachElf(file, nullptr,
+ [=](const ElfFile& elf, string_view /*filename*/,
+ uint32_t /*index_base*/) {
+ // Last .o file wins? (For .a files)? It's kind of arbitrary,
+ // but a single .a file shouldn't have multiple archs in it.
+ ElfMachineToCapstone(elf.header().e_machine, arch, mode);
+ });
+}
+
+static void ReadELFSymbols(const InputFile& file, RangeSink* sink,
+ SymbolTable* table, bool disassemble) {
+ bool is_object = IsObjectFile(file.data());
+ DisassemblyInfo info;
+ DisassemblyInfo* infop = &info;
+ ReadElfArchMode(file, &info.arch, &info.mode);
+
+ ForEachElf(
+ file, sink,
+ [=](const ElfFile& elf, string_view /*filename*/, uint32_t index_base) {
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, &section);
+
+ if (section.header().sh_type != SHT_SYMTAB) {
+ continue;
+ }
+
+ Elf64_Word symbol_count = section.GetEntryCount();
+
+ // Find the corresponding section where the strings for the symbol
+ // table can be found.
+ ElfFile::Section strtab_section;
+ elf.ReadSection(section.header().sh_link, &strtab_section);
+ if (strtab_section.header().sh_type != SHT_STRTAB) {
+ THROW("symtab section pointed to non-strtab section");
+ }
+
+ for (Elf64_Word i = 1; i < symbol_count; i++) {
+ Elf64_Sym sym;
+
+ section.ReadSymbol(i, &sym, nullptr);
+
+ if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION) {
+ continue;
+ }
+
+ if (sym.st_shndx == STN_UNDEF) {
+ continue;
+ }
+
+ if (sym.st_size == 0) {
+ // Maybe try to refine? See ReadELFSectionsRefineSymbols below.
+ continue;
+ }
+
+ string_view name = strtab_section.ReadString(sym.st_name);
+ uint64_t full_addr =
+ ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object);
+ if (sink && !disassemble) {
+ sink->AddVMRangeAllowAlias(
+ "elf_symbols", full_addr, sym.st_size,
+ ItaniumDemangle(name, sink->data_source()));
+ }
+ if (table) {
+ table->insert(
+ std::make_pair(name, std::make_pair(full_addr, sym.st_size)));
+ }
+ if (disassemble && ELF64_ST_TYPE(sym.st_info) == STT_FUNC) {
+ if (verbose_level > 1) {
+ printf("Disassembling function: %s\n", name.data());
+ }
+ infop->text = sink->TranslateVMToFile(full_addr).substr(0, sym.st_size);
+ infop->start_address = full_addr;
+ DisassembleFindReferences(*infop, sink);
+ }
+ }
+ }
+ });
+}
+
+static void ReadELFSymbolTableEntries(const ElfFile& elf,
+ const ElfFile::Section& section,
+ uint64_t index_base, bool is_object,
+ RangeSink* sink) {
+ Elf64_Word symbol_count = section.GetEntryCount();
+
+ // Find the corresponding section where the strings for the symbol
+ // table can be found.
+ ElfFile::Section strtab_section;
+ elf.ReadSection(section.header().sh_link, &strtab_section);
+ if (strtab_section.header().sh_type != SHT_STRTAB) {
+ THROW("symtab section pointed to non-strtab section");
+ }
+
+ for (Elf64_Word i = 1; i < symbol_count; i++) {
+ Elf64_Sym sym;
+ string_view sym_range;
+ section.ReadSymbol(i, &sym, &sym_range);
+
+ if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION ||
+ sym.st_shndx == STN_UNDEF ||
+ sym.st_name == SHN_UNDEF) {
+ continue;
+ }
+
+ string_view name = strtab_section.ReadString(sym.st_name);
+ uint64_t full_addr =
+ ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object);
+ // Capture the trailing NULL.
+ name = string_view(name.data(), name.size() + 1);
+ sink->AddFileRangeForVMAddr("elf_symtab_name", full_addr, name);
+ sink->AddFileRangeForVMAddr("elf_symtab_sym", full_addr, sym_range);
+ }
+}
+
+static void ReadELFRelaEntries(const ElfFile::Section& section,
+ uint64_t index_base, bool is_object,
+ RangeSink* sink) {
+ Elf64_Word rela_count = section.GetEntryCount();
+ Elf64_Word sh_info = section.header().sh_info;
+ for (Elf64_Word i = 1; i < rela_count; i++) {
+ Elf64_Rela rela;
+ string_view rela_range;
+ section.ReadRelocationWithAddend(i, &rela, &rela_range);
+ uint64_t full_addr =
+ ToVMAddr(rela.r_offset, index_base + sh_info, is_object);
+ sink->AddFileRangeForVMAddr("elf_rela", full_addr, rela_range);
+ }
+}
+
+// Adds file ranges for the symbol tables and string tables *themselves* (ie.
+// the space that the symtab/strtab take up in the file). This will cover
+// .symtab
+// .strtab
+// .dynsym
+// .dynstr
+static void ReadELFTables(const InputFile& file, RangeSink* sink) {
+ bool is_object = IsObjectFile(file.data());
+
+ // Disassemble first, because sometimes other tables will refer to things we
+ // discovered through disassembling.
+ ReadELFSymbols(file, sink, nullptr, true);
+
+ // Now scan other tables.
+ ForEachElf(file, sink,
+ [sink, is_object](const ElfFile& elf, string_view /*filename*/,
+ uint32_t index_base) {
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, &section);
+
+ switch (section.header().sh_type) {
+ case SHT_SYMTAB:
+ case SHT_DYNSYM:
+ ReadELFSymbolTableEntries(elf, section, index_base,
+ is_object, sink);
+ break;
+ case SHT_RELA:
+ ReadELFRelaEntries(section, index_base, is_object, sink);
+ break;
+ }
+
+ // We are looking by section name, which is a little different
+ // than what the loader actually does (which is find
+ // eh_frame_hdr from the program headers and then find eh_frame
+ // fde entries from there). But these section names should be
+ // standard enough that this approach works also.
+ if (section.GetName() == ".eh_frame") {
+ ReadEhFrame(section.contents(), sink);
+ } else if (section.GetName() == ".eh_frame_hdr") {
+ ReadEhFrameHdr(section.contents(), sink);
+ }
+ }
+ });
+}
+
+enum ReportSectionsBy {
+ kReportBySectionName,
+ kReportByEscapedSectionName,
+ kReportByFlags,
+ kReportByArchiveMember,
+};
+
+static void DoReadELFSections(RangeSink* sink, enum ReportSectionsBy report_by) {
+ bool is_object = IsObjectFile(sink->input_file().data());
+ ForEachElf(
+ sink->input_file(), sink,
+ [=](const ElfFile& elf, string_view filename, uint32_t index_base) {
+ std::string name_from_flags;
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, &section);
+ string_view name = section.GetName();
+
+ if (name.size() == 0) {
+ return;
+ }
+
+ const auto& header = section.header();
+ auto addr = header.sh_addr;
+ auto size = header.sh_size;
+ auto filesize = (header.sh_type == SHT_NOBITS) ? 0 : size;
+ auto vmsize = (header.sh_flags & SHF_ALLOC) ? size : 0;
+
+ string_view contents = StrictSubstr(section.contents(), 0, filesize);
+
+ uint64_t full_addr = ToVMAddr(addr, index_base + i, is_object);
+
+ if (report_by == kReportByFlags) {
+ name_from_flags = std::string(name);
+
+ name_from_flags = "Section [";
+
+ if (header.sh_flags & SHF_ALLOC) {
+ name_from_flags += 'A';
+ }
+
+ if (header.sh_flags & SHF_WRITE) {
+ name_from_flags += 'W';
+ }
+
+ if (header.sh_flags & SHF_EXECINSTR) {
+ name_from_flags += 'X';
+ }
+
+ name_from_flags += ']';
+ sink->AddRange("elf_section", name_from_flags, full_addr, vmsize,
+ contents);
+ } else if (report_by == kReportBySectionName) {
+ sink->AddRange("elf_section", name, full_addr, vmsize, contents);
+ } else if (report_by == kReportByEscapedSectionName) {
+ if (!sink->IsBaseMap()) {
+ sink->AddFileRangeForFileRange("elf_section", contents,
+ section.range());
+ }
+ sink->AddRange("elf_section",
+ std::string("[section ") + std::string(name) + "]",
+ full_addr, vmsize, contents);
+ } else if (report_by == kReportByArchiveMember) {
+ sink->AddRange("elf_section", filename, full_addr, vmsize,
+ contents);
+ }
+ }
+
+ if (report_by == kReportByArchiveMember) {
+ // Cover unmapped parts of the file.
+ sink->AddFileRange("unmapped_armember", filename, elf.entire_file());
+ }
+ });
+}
+
+enum ReportSegmentsBy {
+ kReportBySegmentName,
+ kReportByEscapedSegmentName,
+};
+
+static void DoReadELFSegments(RangeSink* sink, ReportSegmentsBy report_by) {
+ ForEachElf(sink->input_file(), sink,
+ [=](const ElfFile& elf, string_view /*filename*/,
+ uint32_t /*index_base*/) {
+ for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) {
+ ElfFile::Segment segment;
+ elf.ReadSegment(i, &segment);
+ const auto& header = segment.header();
+
+ if (header.p_type != PT_LOAD) {
+ continue;
+ }
+
+ // Include the segment index in the label, to support embedded.
+ //
+ // Including the index in the segment label differentiates
+ // segments with the same access control (e.g. RWX vs RW). In
+ // ELF files built for embedded microcontroller projects, a
+ // segment is used for each distinct type of memory. In simple
+ // cases, there is a segment for the flash (which will store
+ // code and read-only data) and a segment for RAM (which
+ // usually stores globals, stacks, and maybe a heap). In more
+ // involved projects, there may be special segments for faster
+ // RAM (e.g. core coupled RAM or CCRAM), or there may even be
+ // memory overlays to support manual paging of code from flash
+ // (which may be slow) into RAM.
+ std::string name(absl::StrCat("LOAD #", i, " ["));
+
+ if (header.p_flags & PF_R) {
+ name += 'R';
+ }
+
+ if (header.p_flags & PF_W) {
+ name += 'W';
+ }
+
+ if (header.p_flags & PF_X) {
+ name += 'X';
+ }
+
+ name += ']';
+
+ if (report_by == kReportByEscapedSegmentName) {
+ name = absl::StrCat("[", name, "]");
+ }
+
+ sink->AddRange("elf_segment", name, header.p_vaddr,
+ header.p_memsz, segment.contents());
+ }
+ });
+ ForEachElf(sink->input_file(), sink,
+ [=](const ElfFile& elf, string_view /*filename*/,
+ uint32_t /*index_base*/) {
+ for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) {
+ ElfFile::Segment segment;
+ elf.ReadSegment(i, &segment);
+ const auto& header = segment.header();
+ if(header.p_type != PT_TLS) continue;
+ std::string name = "TLS";
+ sink->AddRange("elf_segment", "TLS", header.p_vaddr, header.p_memsz,
+ segment.contents());
+ }
+ });
+}
+
+static void ReadELFSegments(RangeSink* sink) {
+ if (IsObjectFile(sink->input_file().data())) {
+ // Object files don't actually have segments. But we can cheat a little bit
+ // and make up "segments" based on section flags. This can be really useful
+ // when you are compiling with -ffunction-sections and -fdata-sections,
+ // because in those cases the actual "sections" report becomes pretty
+ // useless (since every function/data has its own section, it's like the
+ // "symbols" report except less readable).
+ DoReadELFSections(sink, kReportByFlags);
+ } else {
+ DoReadELFSegments(sink, kReportBySegmentName);
+ }
+}
+
+// ELF files put debug info directly into the binary, so we call the DWARF
+// reader directly on them. At the moment we don't attempt to make these
+// work with object files.
+
+static void ReadDWARFSections(const InputFile& file, dwarf::File* dwarf) {
+ ElfFile elf(file.data());
+ assert(elf.IsOpen());
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, &section);
+ string_view name = section.GetName();
+
+ if (name == ".debug_aranges") {
+ dwarf->debug_aranges = section.contents();
+ } else if (name == ".debug_str") {
+ dwarf->debug_str = section.contents();
+ } else if (name == ".debug_info") {
+ dwarf->debug_info = section.contents();
+ } else if (name == ".debug_types") {
+ dwarf->debug_types = section.contents();
+ } else if (name == ".debug_abbrev") {
+ dwarf->debug_abbrev = section.contents();
+ } else if (name == ".debug_line") {
+ dwarf->debug_line = section.contents();
+ } else if (name == ".debug_loc") {
+ dwarf->debug_loc = section.contents();
+ } else if (name == ".debug_pubnames") {
+ dwarf->debug_pubnames = section.contents();
+ } else if (name == ".debug_pubtypes") {
+ dwarf->debug_pubtypes = section.contents();
+ } else if (name == ".debug_ranges") {
+ dwarf->debug_ranges = section.contents();
+ }
+ }
+}
+
+void AddCatchAll(RangeSink* sink) {
+ // The last-line fallback to make sure we cover the entire VM space.
+ if (sink->data_source() != DataSource::kSegments) {
+ DoReadELFSections(sink, kReportByEscapedSectionName);
+ }
+ DoReadELFSegments(sink, kReportByEscapedSegmentName);
+
+ ForEachElf(sink->input_file(), sink,
+ [sink](const ElfFile& elf, string_view /*filename*/,
+ uint32_t /*index_base*/) {
+ sink->AddFileRange("elf_catchall", "[ELF Headers]",
+ elf.header_region());
+ sink->AddFileRange("elf_catchall", "[ELF Headers]",
+ elf.section_headers());
+ sink->AddFileRange("elf_catchall", "[ELF Headers]",
+ elf.segment_headers());
+ });
+
+ // The last-line fallback to make sure we cover the entire file.
+ sink->AddFileRange("elf_catchall", "[Unmapped]", sink->input_file().data());
+}
+
+class ElfObjectFile : public ObjectFile {
+ public:
+ ElfObjectFile(std::unique_ptr<InputFile> file)
+ : ObjectFile(std::move(file)) {}
+
+ std::string GetBuildId() const override {
+ if (IsObjectFile(file_data().data())) {
+ // Object files don't have a build ID.
+ return std::string();
+ }
+
+ ElfFile elf(file_data().data());
+ assert(elf.IsOpen());
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, &section);
+ if (section.header().sh_type != SHT_NOTE) {
+ continue;
+ }
+
+ for (ElfFile::NoteIter notes(section); !notes.IsDone(); notes.Next()) {
+ if (notes.name() == "GNU" && notes.type() == NT_GNU_BUILD_ID) {
+ return std::string(notes.descriptor());
+ }
+ }
+ }
+
+ // No build id section found.
+ return std::string();
+ }
+
+ void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ ReadELFSegments(sink);
+ break;
+ case DataSource::kSections:
+ DoReadELFSections(sink, kReportBySectionName);
+ break;
+ case DataSource::kRawSymbols:
+ case DataSource::kShortSymbols:
+ case DataSource::kFullSymbols:
+ ReadELFSymbols(debug_file().file_data(), sink, nullptr, false);
+ break;
+ case DataSource::kArchiveMembers:
+ DoReadELFSections(sink, kReportByArchiveMember);
+ break;
+ case DataSource::kCompileUnits: {
+ CheckNotObject("compileunits", sink);
+ SymbolTable symtab;
+ DualMap symbol_map;
+ NameMunger empty_munger;
+ RangeSink symbol_sink(&debug_file().file_data(),
+ sink->options(),
+ DataSource::kRawSymbols,
+ &sinks[0]->MapAtIndex(0));
+ symbol_sink.AddOutput(&symbol_map, &empty_munger);
+ ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symtab,
+ false);
+ dwarf::File dwarf;
+ ReadDWARFSections(debug_file().file_data(), &dwarf);
+ ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
+ break;
+ }
+ case DataSource::kInlines: {
+ CheckNotObject("lineinfo", sink);
+ dwarf::File dwarf;
+ ReadDWARFSections(debug_file().file_data(), &dwarf);
+ ReadDWARFInlines(dwarf, sink, true);
+ DoReadELFSections(sink, kReportByEscapedSectionName);
+ break;
+ }
+ default:
+ THROW("unknown data source");
+ }
+
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ case DataSource::kSections:
+ case DataSource::kArchiveMembers:
+ break;
+ default:
+ // Add these *after* processing all other data sources.
+ ReadELFTables(sink->input_file(), sink);
+ break;
+ }
+
+ AddCatchAll(sink);
+ }
+ }
+
+ bool GetDisassemblyInfo(const absl::string_view symbol,
+ DataSource symbol_source,
+ DisassemblyInfo* info) const override {
+ return DoGetDisassemblyInfo(&symbol, symbol_source, info);
+ }
+
+ bool DoGetDisassemblyInfo(const absl::string_view* symbol,
+ DataSource symbol_source,
+ DisassemblyInfo* info) const {
+ // Find the corresponding file range. This also could be optimized not to
+ // build the entire map.
+ DualMap base_map;
+ NameMunger empty_munger;
+ RangeSink base_sink(&file_data(), bloaty::Options(), DataSource::kSegments,
+ nullptr);
+ base_sink.AddOutput(&base_map, &empty_munger);
+ std::vector<RangeSink*> sink_ptrs{&base_sink};
+ ProcessFile(sink_ptrs);
+
+ // Could optimize this not to build the whole table if necessary.
+ SymbolTable symbol_table;
+ RangeSink symbol_sink(&file_data(), bloaty::Options(), symbol_source,
+ &base_map);
+ symbol_sink.AddOutput(&info->symbol_map, &empty_munger);
+ ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symbol_table,
+ false);
+
+ if (symbol) {
+ auto entry = symbol_table.find(*symbol);
+ if (entry == symbol_table.end()) {
+ entry = symbol_table.find(ItaniumDemangle(*symbol, symbol_source));
+ if (entry == symbol_table.end()) {
+ return false;
+ }
+ }
+ uint64_t vmaddr = entry->second.first;
+ uint64_t size = entry->second.second;
+
+ // TODO(haberman); Add PLT entries to symbol map, so call <plt stub> gets
+ // symbolized.
+
+ uint64_t fileoff;
+ if (!base_map.vm_map.Translate(vmaddr, &fileoff)) {
+ THROWF("Couldn't translate VM address for function $0", symbol);
+ }
+
+ info->text = StrictSubstr(file_data().data(), fileoff, size);
+ info->start_address = vmaddr;
+ }
+
+ ReadElfArchMode(file_data(), &info->arch, &info->mode);
+ return true;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file) {
+ ElfFile elf(file->data());
+ ArFile ar(file->data());
+ if (elf.IsOpen() || ar.IsOpen()) {
+ return std::unique_ptr<ObjectFile>(new ElfObjectFile(std::move(file)));
+ } else {
+ return nullptr;
+ }
+
+ // A few functions that have been defined but are not yet used.
+ (void)&ElfFile::FindSectionByName;
+ (void)&ElfFile::Section::ReadRelocation;
+}
+
+} // namespace bloaty
diff --git a/src/macho.cc b/src/macho.cc
new file mode 100644
index 0000000..64a5250
--- /dev/null
+++ b/src/macho.cc
@@ -0,0 +1,666 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+#include "string.h"
+#include "bloaty.h"
+
+#include <cassert>
+
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "third_party/darwin_xnu_macho/mach-o/loader.h"
+#include "third_party/darwin_xnu_macho/mach-o/fat.h"
+#include "third_party/darwin_xnu_macho/mach-o/nlist.h"
+#include "third_party/darwin_xnu_macho/mach-o/reloc.h"
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+using absl::string_view;
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+
+namespace bloaty {
+namespace macho {
+
+// segname (& sectname) may NOT be NULL-terminated,
+// i.e. can use up all 16 chars, e.g. '__gcc_except_tab' (no '\0'!)
+// hence specifying size when constructing std::string
+static string_view ArrayToStr(const char* s, size_t maxlen) {
+ return string_view(s, strnlen(s, maxlen));
+}
+
+static uint64_t CheckedAdd(uint64_t a, uint64_t b) {
+ absl::uint128 a_128(a), b_128(b);
+ absl::uint128 c_128 = a_128 + b_128;
+ if (c_128 > absl::uint128(UINT64_MAX)) {
+ THROW("integer overflow in addition");
+ }
+ return static_cast<uint64_t>(c_128);
+}
+
+static string_view StrictSubstr(string_view data, size_t off, size_t n) {
+ uint64_t end = CheckedAdd(off, n);
+ if (end > data.size()) {
+ THROW("Mach-O region out-of-bounds");
+ }
+ return data.substr(off, n);
+}
+
+uint32_t ReadMagic(string_view data) {
+ if (data.size() < sizeof(uint32_t)) {
+ THROW("Malformed Mach-O file");
+ }
+ uint32_t magic;
+ memcpy(&magic, data.data(), sizeof(magic));
+ return magic;
+}
+
+template <class T>
+const T* GetStructPointer(string_view data) {
+ if (sizeof(T) > data.size()) {
+ THROW("Premature EOF reading Mach-O data.");
+ }
+ return reinterpret_cast<const T*>(data.data());
+}
+
+template <class T>
+void AdvancePastStruct(string_view* data) {
+ *data = data->substr(sizeof(T));
+}
+
+string_view ReadNullTerminated(string_view data, size_t offset) {
+ if (offset >= data.size()) {
+ THROW("Invalid Mach-O string table offset.");
+ }
+
+ data = data.substr(offset);
+
+ const char* nullz =
+ static_cast<const char*>(memchr(data.data(), '\0', data.size()));
+
+ // Return false if not NULL-terminated.
+ if (nullz == NULL) {
+ THROW("Mach-O string was not NULL-terminated");
+ }
+
+ size_t len = nullz - data.data();
+ return data.substr(0, len);
+}
+
+template <class T>
+const T* GetStructPointerAndAdvance(string_view* data) {
+ const T* ret = GetStructPointer<T>(*data);
+ AdvancePastStruct<T>(data);
+ return ret;
+}
+
+void MaybeAddOverhead(RangeSink* sink, const char* label, string_view data) {
+ if (sink) {
+ sink->AddFileRange("macho_overhead", label, data);
+ }
+}
+
+struct LoadCommand {
+ bool is64bit;
+ uint32_t cmd;
+ string_view command_data;
+ string_view file_data;
+};
+
+template <class Struct>
+bool Is64Bit() { return false; }
+
+template <>
+bool Is64Bit<mach_header_64>() { return true; }
+
+template <class Struct, class Func>
+void ParseMachOHeaderImpl(string_view macho_data, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ string_view header_data = macho_data;
+ auto header = GetStructPointerAndAdvance<Struct>(&header_data);
+ MaybeAddOverhead(overhead_sink,
+ "[Mach-O Headers]",
+ macho_data.substr(0, sizeof(Struct)));
+ uint32_t ncmds = header->ncmds;
+
+ for (uint32_t i = 0; i < ncmds; i++) {
+ auto command = GetStructPointer<load_command>(header_data);
+
+ // We test for this because otherwise a large ncmds can make bloaty hang for
+ // a while, even on a small file. Hopefully there are no real cases where a
+ // zero-size loadcmd exists.
+ if (command->cmdsize == 0) {
+ THROW("Mach-O load command had zero size.");
+ }
+
+ LoadCommand data;
+ data.is64bit = Is64Bit<Struct>();
+ data.cmd = command->cmd;
+ data.command_data = StrictSubstr(header_data, 0, command->cmdsize);
+ data.file_data = macho_data;
+ std::forward<Func>(loadcmd_func)(data);
+
+ MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", data.command_data);
+ header_data = header_data.substr(command->cmdsize);
+ }
+}
+
+template <class Func>
+void ParseMachOHeader(string_view macho_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ uint32_t magic = ReadMagic(macho_file);
+ switch (magic) {
+ case MH_MAGIC:
+ // We don't expect to see many 32-bit binaries out in the wild.
+ // Apple is aggressively phasing out support for 32-bit binaries:
+ // https://www.macrumors.com/2017/06/06/apple-to-phase-out-32-bit-mac-apps/
+ //
+ // Still, you can build 32-bit binaries as of this writing, and
+ // there are existing 32-bit binaries floating around, so we might
+ // as well support them.
+ ParseMachOHeaderImpl<mach_header>(macho_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ case MH_MAGIC_64:
+ ParseMachOHeaderImpl<mach_header_64>(
+ macho_file, overhead_sink, std::forward<Func>(loadcmd_func));
+ break;
+ case MH_CIGAM:
+ case MH_CIGAM_64:
+ // OS X and Darwin currently only run on x86/x86-64 (little-endian
+ // platforms), so we expect basically all Mach-O files to be
+ // little-endian. Additionally, pretty much all CPU architectures
+ // are little-endian these days. ARM has the option to be
+ // big-endian, but I can't find any OS that is actually compiled to
+ // use big-endian mode. debian-mips is the only big-endian OS I can
+ // find (and maybe SPARC).
+ //
+ // All of this is to say, this case should only happen if you are
+ // running Bloaty on debian-mips. I consider that uncommon enough
+ // (and hard enough to test) that we don't support this until there
+ // is a demonstrated need.
+ THROW("We don't support cross-endian Mach-O files.");
+ default:
+ THROW("Corrupt Mach-O file");
+ }
+}
+
+template <class Func>
+void ParseFatHeader(string_view fat_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ string_view header_data = fat_file;
+ auto header = GetStructPointerAndAdvance<fat_header>(&header_data);
+ MaybeAddOverhead(overhead_sink, "[Mach-O Headers]",
+ fat_file.substr(0, sizeof(fat_header)));
+ assert(ByteSwap(header->magic) == FAT_MAGIC);
+ uint32_t nfat_arch = ByteSwap(header->nfat_arch);
+ for (uint32_t i = 0; i < nfat_arch; i++) {
+ auto arch = GetStructPointerAndAdvance<fat_arch>(&header_data);
+ string_view macho_data = StrictSubstr(
+ fat_file, ByteSwap(arch->offset), ByteSwap(arch->size));
+ ParseMachOHeader(macho_data, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ }
+}
+
+template <class Func>
+void ForEachLoadCommand(string_view maybe_fat_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ uint32_t magic = ReadMagic(maybe_fat_file);
+ switch (magic) {
+ case MH_MAGIC:
+ case MH_MAGIC_64:
+ case MH_CIGAM:
+ case MH_CIGAM_64:
+ ParseMachOHeader(maybe_fat_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ case FAT_CIGAM:
+ ParseFatHeader(maybe_fat_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ }
+}
+
+template <class Segment, class Section>
+void AddSegmentAsFallback(string_view command_data, string_view file_data,
+ RangeSink* sink) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ uint32_t nsects = segment->nsects;
+ for (uint32_t j = 0; j < nsects; j++) {
+ auto section = GetStructPointerAndAdvance<Section>(&command_data);
+
+ // filesize equals vmsize unless the section is zerofill
+ uint64_t filesize = section->size;
+ switch (section->flags & SECTION_TYPE) {
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_THREAD_LOCAL_ZEROFILL:
+ filesize = 0;
+ break;
+ default:
+ break;
+ }
+
+ std::string label = absl::StrJoin(
+ std::make_tuple(segname, ArrayToStr(section->sectname, 16)), ",");
+ label = "[" + label + "]";
+ sink->AddRange("macho_fallback", label, section->addr, section->size,
+ StrictSubstr(file_data, section->offset, filesize));
+ }
+
+ sink->AddRange("macho_fallback", "[" + std::string(segname) + "]",
+ segment->vmaddr, segment->vmsize,
+ StrictSubstr(file_data, segment->fileoff, segment->filesize));
+}
+
+template <class Segment, class Section>
+void ParseSegment(LoadCommand cmd, RangeSink* sink) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ if (sink->data_source() == DataSource::kSegments) {
+ sink->AddRange(
+ "macho_segment", segname, segment->vmaddr, segment->vmsize,
+ StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
+ } else if (sink->data_source() == DataSource::kSections) {
+ uint32_t nsects = segment->nsects;
+ for (uint32_t j = 0; j < nsects; j++) {
+ auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
+
+ // filesize equals vmsize unless the section is zerofill
+ uint64_t filesize = section->size;
+ switch (section->flags & SECTION_TYPE) {
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_THREAD_LOCAL_ZEROFILL:
+ filesize = 0;
+ break;
+ default:
+ break;
+ }
+
+ std::string label = absl::StrJoin(
+ std::make_tuple(segname, ArrayToStr(section->sectname, 16)), ",");
+ sink->AddRange("macho_section", label, section->addr, section->size,
+ StrictSubstr(cmd.file_data, section->offset, filesize));
+ }
+ } else {
+ BLOATY_UNREACHABLE();
+ }
+}
+
+static void ParseDyldInfo(const LoadCommand& cmd, RangeSink* sink) {
+ auto info = GetStructPointer<dyld_info_command>(cmd.command_data);
+
+ sink->AddFileRange(
+ "macho_dyld", "Rebase Info",
+ StrictSubstr(cmd.file_data, info->rebase_off, info->rebase_size));
+ sink->AddFileRange(
+ "macho_dyld", "Binding Info",
+ StrictSubstr(cmd.file_data, info->bind_off, info->bind_size));
+ sink->AddFileRange(
+ "macho_dyld", "Weak Binding Info",
+ StrictSubstr(cmd.file_data, info->weak_bind_off, info->weak_bind_size));
+ sink->AddFileRange(
+ "macho_dyld", "Lazy Binding Info",
+ StrictSubstr(cmd.file_data, info->lazy_bind_off, info->lazy_bind_size));
+ sink->AddFileRange(
+ "macho_dyld", "Export Info",
+ StrictSubstr(cmd.file_data, info->export_off, info->export_size));
+}
+
+static void ParseSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+ auto symtab = GetStructPointer<symtab_command>(cmd.command_data);
+
+ size_t size = cmd.is64bit ? sizeof(nlist_64) : sizeof(struct nlist);
+ sink->AddFileRange(
+ "macho_symtab", "Symbol Table",
+ StrictSubstr(cmd.file_data, symtab->symoff, symtab->nsyms * size));
+ sink->AddFileRange(
+ "macho_symtab", "String Table",
+ StrictSubstr(cmd.file_data, symtab->stroff, symtab->strsize));
+}
+
+static void ParseDynamicSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+ auto dysymtab = GetStructPointer<dysymtab_command>(cmd.command_data);
+
+ sink->AddFileRange(
+ "macho_dynsymtab", "Table of Contents",
+ StrictSubstr(cmd.file_data, dysymtab->tocoff,
+ dysymtab->ntoc * sizeof(dylib_table_of_contents)));
+ sink->AddFileRange("macho_dynsymtab", "Module Table",
+ StrictSubstr(cmd.file_data, dysymtab->modtaboff,
+ dysymtab->nmodtab * sizeof(dylib_module_64)));
+ sink->AddFileRange(
+ "macho_dynsymtab", "Referenced Symbol Table",
+ StrictSubstr(cmd.file_data, dysymtab->extrefsymoff,
+ dysymtab->nextrefsyms * sizeof(dylib_reference)));
+ sink->AddFileRange("macho_dynsymtab", "Indirect Symbol Table",
+ StrictSubstr(cmd.file_data, dysymtab->indirectsymoff,
+ dysymtab->nindirectsyms * sizeof(uint32_t)));
+ sink->AddFileRange("macho_dynsymtab", "External Relocation Entries",
+ StrictSubstr(cmd.file_data, dysymtab->extreloff,
+ dysymtab->nextrel * sizeof(relocation_info)));
+ sink->AddFileRange(
+ "macho_dynsymtab", "Local Relocation Entries",
+ StrictSubstr(cmd.file_data, dysymtab->locreloff,
+ dysymtab->nlocrel * sizeof(struct relocation_info)));
+}
+
+static void ParseLinkeditCommand(string_view label, const LoadCommand& cmd,
+ RangeSink* sink) {
+ auto linkedit = GetStructPointer<linkedit_data_command>(cmd.command_data);
+ sink->AddFileRange(
+ "macho_linkedit", label,
+ StrictSubstr(cmd.file_data, linkedit->dataoff, linkedit->datasize));
+}
+
+void ParseLoadCommand(const LoadCommand& cmd, RangeSink* sink) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ ParseSegment<segment_command_64, section_64>(cmd, sink);
+ break;
+ case LC_SEGMENT:
+ ParseSegment<segment_command, section>(cmd, sink);
+ break;
+ case LC_DYLD_INFO:
+ case LC_DYLD_INFO_ONLY:
+ ParseDyldInfo(cmd, sink);
+ break;
+ case LC_SYMTAB:
+ ParseSymbolTable(cmd, sink);
+ break;
+ case LC_DYSYMTAB:
+ ParseDynamicSymbolTable(cmd, sink);
+ break;
+ case LC_CODE_SIGNATURE:
+ ParseLinkeditCommand("Code Signature", cmd, sink);
+ break;
+ case LC_SEGMENT_SPLIT_INFO:
+ ParseLinkeditCommand("Segment Split Info", cmd, sink);
+ break;
+ case LC_FUNCTION_STARTS:
+ ParseLinkeditCommand("Function Start Addresses", cmd, sink);
+ break;
+ case LC_DATA_IN_CODE:
+ ParseLinkeditCommand("Table of Non-instructions", cmd, sink);
+ break;
+ case LC_DYLIB_CODE_SIGN_DRS:
+ ParseLinkeditCommand("Code Signing DRs", cmd, sink);
+ break;
+ case LC_LINKER_OPTIMIZATION_HINT:
+ ParseLinkeditCommand("Optimization Hints", cmd, sink);
+ break;
+ }
+}
+
+void ParseLoadCommands(RangeSink* sink) {
+ ForEachLoadCommand(
+ sink->input_file().data(), sink,
+ [sink](const LoadCommand& cmd) { ParseLoadCommand(cmd, sink); });
+}
+
+template <class NList>
+void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, SymbolTable* table,
+ RangeSink* sink) {
+ auto symtab_cmd = GetStructPointer<symtab_command>(cmd.command_data);
+
+ string_view symtab = StrictSubstr(cmd.file_data, symtab_cmd->symoff,
+ symtab_cmd->nsyms * sizeof(NList));
+ string_view strtab =
+ StrictSubstr(cmd.file_data, symtab_cmd->stroff, symtab_cmd->strsize);
+
+ uint32_t nsyms = symtab_cmd->nsyms;
+ for (uint32_t i = 0; i < nsyms; i++) {
+ auto sym = GetStructPointerAndAdvance<NList>(&symtab);
+ string_view sym_range(reinterpret_cast<const char*>(sym), sizeof(NList));
+
+ if (sym->n_type & N_STAB || sym->n_value == 0) {
+ continue;
+ }
+
+ string_view name = ReadNullTerminated(strtab, sym->n_un.n_strx);
+
+ if (sink->data_source() >= DataSource::kSymbols) {
+ sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
+ ItaniumDemangle(name, sink->data_source()));
+ }
+
+ if (table) {
+ table->insert(std::make_pair(
+ name, std::make_pair(sym->n_value, RangeSink::kUnknownSize)));
+ }
+
+ // Capture the trailing NULL.
+ name = string_view(name.data(), name.size() + 1);
+ sink->AddFileRangeForVMAddr("macho_symtab_name", sym->n_value, name);
+ sink->AddFileRangeForVMAddr("macho_symtab_sym", sym->n_value, sym_range);
+ }
+}
+
+void ParseSymbols(string_view file_data, SymbolTable* symtab, RangeSink* sink) {
+ ForEachLoadCommand(
+ file_data, sink,
+ [symtab, sink](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
+ case LC_SYMTAB:
+ if (cmd.is64bit) {
+ ParseSymbolsFromSymbolTable<nlist_64>(cmd, symtab, sink);
+ } else {
+ ParseSymbolsFromSymbolTable<struct nlist>(cmd, symtab, sink);
+ }
+ break;
+ case LC_DYSYMTAB:
+ //ParseSymbolsFromDynamicSymbolTable(command_data, file_data, sink);
+ break;
+ }
+ });
+}
+
+static void AddMachOFallback(RangeSink* sink) {
+ ForEachLoadCommand(
+ sink->input_file().data(), sink,
+ [sink](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ AddSegmentAsFallback<segment_command_64, section_64>(
+ cmd.command_data, cmd.file_data, sink);
+ break;
+ case LC_SEGMENT:
+ AddSegmentAsFallback<segment_command, section>(cmd.command_data,
+ cmd.file_data, sink);
+ break;
+ }
+ });
+ sink->AddFileRange("macho_fallback", "[Unmapped]", sink->input_file().data());
+}
+
+template <class Segment, class Section>
+void ReadDebugSectionsFromSegment(LoadCommand cmd, dwarf::File* dwarf) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ if (segname != "__DWARF") {
+ return;
+ }
+
+ uint32_t nsects = segment->nsects;
+ for (uint32_t j = 0; j < nsects; j++) {
+ auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
+ string_view sectname = ArrayToStr(section->sectname, 16);
+
+ // filesize equals vmsize unless the section is zerofill
+ uint64_t filesize = section->size;
+ switch (section->flags & SECTION_TYPE) {
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_THREAD_LOCAL_ZEROFILL:
+ filesize = 0;
+ break;
+ default:
+ break;
+ }
+
+ string_view contents =
+ StrictSubstr(cmd.file_data, section->offset, filesize);
+
+ if (sectname == "__debug_aranges") {
+ dwarf->debug_aranges = contents;
+ } else if (sectname == "__debug_str") {
+ dwarf->debug_str = contents;
+ } else if (sectname == "__debug_info") {
+ dwarf->debug_info = contents;
+ } else if (sectname == "__debug_types") {
+ dwarf->debug_types = contents;
+ } else if (sectname == "__debug_abbrev") {
+ dwarf->debug_abbrev = contents;
+ } else if (sectname == "__debug_line") {
+ dwarf->debug_line = contents;
+ } else if (sectname == "__debug_loc") {
+ dwarf->debug_loc = contents;
+ } else if (sectname == "__debug_pubnames") {
+ dwarf->debug_pubnames = contents;
+ } else if (sectname == "__debug_pubtypes") {
+ dwarf->debug_pubtypes = contents;
+ } else if (sectname == "__debug_ranges") {
+ dwarf->debug_ranges = contents;
+ }
+ }
+}
+
+static void ReadDebugSectionsFromMachO(const InputFile& file, dwarf::File* dwarf) {
+ ForEachLoadCommand(file.data(), nullptr, [dwarf](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ ReadDebugSectionsFromSegment<segment_command_64, section_64>(cmd,
+ dwarf);
+ break;
+ case LC_SEGMENT:
+ ReadDebugSectionsFromSegment<segment_command, section>(cmd, dwarf);
+ break;
+ }
+ });
+}
+
+class MachOObjectFile : public ObjectFile {
+ public:
+ MachOObjectFile(std::unique_ptr<InputFile> file_data)
+ : ObjectFile(std::move(file_data)) {}
+
+ std::string GetBuildId() const override {
+ std::string id;
+
+ ForEachLoadCommand(file_data().data(), nullptr, [&id](LoadCommand cmd) {
+ if (cmd.cmd == LC_UUID) {
+ auto uuid_cmd =
+ GetStructPointerAndAdvance<uuid_command>(&cmd.command_data);
+ if (!cmd.command_data.empty()) {
+ THROWF("Unexpected excess uuid data: $0", cmd.command_data.size());
+ }
+ id.resize(sizeof(uuid_cmd->uuid));
+ memcpy(&id[0], &uuid_cmd->uuid[0], sizeof(uuid_cmd->uuid));
+ }
+ });
+
+ return id;
+ }
+
+ void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ case DataSource::kSections:
+ ParseLoadCommands(sink);
+ break;
+ case DataSource::kSymbols:
+ case DataSource::kRawSymbols:
+ case DataSource::kShortSymbols:
+ case DataSource::kFullSymbols:
+ ParseSymbols(debug_file().file_data().data(), nullptr, sink);
+ break;
+ case DataSource::kCompileUnits: {
+ SymbolTable symtab;
+ DualMap symbol_map;
+ NameMunger empty_munger;
+ RangeSink symbol_sink(&debug_file().file_data(),
+ sink->options(),
+ DataSource::kRawSymbols,
+ &sinks[0]->MapAtIndex(0));
+ symbol_sink.AddOutput(&symbol_map, &empty_munger);
+ ParseSymbols(debug_file().file_data().data(), &symtab, &symbol_sink);
+ dwarf::File dwarf;
+ ReadDebugSectionsFromMachO(debug_file().file_data(), &dwarf);
+ ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
+ ParseSymbols(sink->input_file().data(), nullptr, sink);
+ break;
+ }
+ case DataSource::kArchiveMembers:
+ case DataSource::kInlines:
+ default:
+ THROW("Mach-O doesn't support this data source");
+ }
+ AddMachOFallback(sink);
+ }
+ }
+
+ bool GetDisassemblyInfo(absl::string_view /*symbol*/,
+ DataSource /*symbol_source*/,
+ DisassemblyInfo* /*info*/) const override {
+ WARN("Mach-O files do not support disassembly yet");
+ return false;
+ }
+};
+
+} // namespace macho
+
+std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile> &file) {
+ uint32_t magic = macho::ReadMagic(file->data());
+
+ // We only support little-endian host and little endian binaries (see
+ // ParseMachOHeader() for more rationale). Fat headers are always on disk as
+ // big-endian.
+ if (magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_CIGAM) {
+ return std::unique_ptr<ObjectFile>(
+ new macho::MachOObjectFile(std::move(file)));
+ }
+
+ return nullptr;
+}
+
+} // namespace bloaty
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 0000000..f2c5ec2
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,45 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+#include "bloaty.pb.h"
+
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+ bloaty::Options options;
+ bloaty::OutputOptions output_options;
+ std::string error;
+ if (!bloaty::ParseOptions(false, &argc, &argv, &options, &output_options,
+ &error)) {
+ if (!error.empty()) {
+ fprintf(stderr, "bloaty: %s\n", error.c_str());
+ return 1;
+ } else {
+ return 0; // --help or similar.
+ }
+ }
+
+ bloaty::RollupOutput output;
+ bloaty::MmapInputFileFactory mmap_factory;
+ if (!bloaty::BloatyMain(options, mmap_factory, &output, &error)) {
+ if (!error.empty()) {
+ fprintf(stderr, "bloaty: %s\n", error.c_str());
+ }
+ return 1;
+ }
+
+ output.Print(output_options, &std::cout);
+ return 0;
+}
diff --git a/src/range_map.cc b/src/range_map.cc
new file mode 100644
index 0000000..08e1f18
--- /dev/null
+++ b/src/range_map.cc
@@ -0,0 +1,332 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "range_map.h"
+
+#include "bloaty.h"
+
+namespace bloaty {
+
+constexpr uint64_t RangeMap::kUnknownSize;
+
+template <class T>
+uint64_t RangeMap::TranslateWithEntry(T iter, uint64_t addr) const {
+ assert(EntryContains(iter, addr));
+ assert(iter->second.HasTranslation());
+ return addr - iter->first + iter->second.other_start;
+}
+
+template <class T>
+bool RangeMap::TranslateAndTrimRangeWithEntry(T iter, uint64_t addr,
+ uint64_t size, uint64_t* trimmed_addr,
+ uint64_t* translated_addr,
+ uint64_t* trimmed_size) const {
+ addr = std::max(addr, iter->first);
+ *trimmed_addr = addr;
+
+ if (size == kUnknownSize) {
+ *trimmed_size = kUnknownSize;
+ } else {
+ uint64_t end = std::min(addr + size, iter->first + iter->second.size);
+ if (addr >= end) {
+ *trimmed_size = 0;
+ return false;
+ }
+ *trimmed_size = end - addr;
+ }
+
+ if (!iter->second.HasTranslation()) {
+ return false;
+ }
+
+ *translated_addr = TranslateWithEntry(iter, addr);
+ return true;
+}
+
+RangeMap::Map::const_iterator RangeMap::FindContaining(uint64_t addr) const {
+ auto it = mappings_.upper_bound(addr); // Entry directly after.
+ if (it == mappings_.begin() || (--it, !EntryContains(it, addr))) {
+ return mappings_.end();
+ } else {
+ return it;
+ }
+}
+
+RangeMap::Map::iterator RangeMap::FindContainingOrAfter(uint64_t addr) {
+ auto after = mappings_.upper_bound(addr);
+ auto it = after;
+ if (it != mappings_.begin() && (--it, EntryContains(it, addr))) {
+ return it; // Containing
+ } else {
+ return after; // May be end().
+ }
+}
+
+RangeMap::Map::const_iterator RangeMap::FindContainingOrAfter(
+ uint64_t addr) const {
+ auto after = mappings_.upper_bound(addr);
+ auto it = after;
+ if (it != mappings_.begin() && (--it, EntryContains(it, addr))) {
+ return it; // Containing
+ } else {
+ return after; // May be end().
+ }
+}
+
+bool RangeMap::Translate(uint64_t addr, uint64_t* translated) const {
+ auto iter = FindContaining(addr);
+ if (iter == mappings_.end() || !iter->second.HasTranslation()) {
+ return false;
+ } else {
+ *translated = TranslateWithEntry(iter, addr);
+ return true;
+ }
+}
+
+bool RangeMap::TryGetLabel(uint64_t addr, std::string* label) const {
+ auto iter = FindContaining(addr);
+ if (iter == mappings_.end()) {
+ return false;
+ } else {
+ *label = iter->second.label;
+ return true;
+ }
+}
+
+bool RangeMap::TryGetLabelForRange(uint64_t addr, uint64_t size,
+ std::string* label) const {
+ uint64_t end = addr + size;
+ if (end < addr) {
+ return false;
+ }
+ auto iter = FindContaining(addr);
+ if (iter == mappings_.end()) {
+ return false;
+ } else {
+ *label = iter->second.label;
+ while (iter != mappings_.end() && iter->first + iter->second.size < end) {
+ if (iter->second.label != *label) {
+ return false;
+ }
+ ++iter;
+ }
+ return iter != mappings_.end();
+ }
+}
+
+bool RangeMap::TryGetSize(uint64_t addr, uint64_t* size) const {
+ auto iter = mappings_.find(addr);
+ if (iter == mappings_.end()) {
+ return false;
+ } else {
+ *size = iter->second.size;
+ return true;
+ }
+}
+
+std::string RangeMap::DebugString() const {
+ std::string ret;
+ for (auto it = mappings_.begin(); it != mappings_.end(); ++it) {
+ absl::StrAppend(&ret, EntryDebugString(it), "\n");
+ }
+ return ret;
+}
+
+void RangeMap::AddRange(uint64_t addr, uint64_t size, const std::string& val) {
+ AddDualRange(addr, size, kNoTranslation, val);
+}
+
+template <class T>
+void RangeMap::MaybeSetLabel(T iter, const std::string& label, uint64_t addr,
+ uint64_t size) {
+ assert(EntryContains(iter, addr));
+ if (iter->second.size == kUnknownSize && size != kUnknownSize) {
+ assert(addr + size >= addr);
+ assert(addr + size >= iter->first);
+ assert(addr >= iter->first);
+ if (addr == iter->first) {
+ T next = std::next(iter);
+ uint64_t end = addr + size;
+ if (!IterIsEnd(next)) {
+ end = std::min(end, next->first);
+ }
+ uint64_t new_size = end - iter->first;
+ if (verbose_level > 2) {
+ printf(" updating mapping (%s) with new size %" PRIx64 "\n",
+ EntryDebugString(addr, size, UINT64_MAX, label).c_str(),
+ new_size);
+ }
+ // This new defined range encompassess all of the unknown-length range, so
+ // just define the range to have our end.
+ iter->second.size = new_size;
+ CheckConsistency(iter);
+ }
+ } else if (verbose_level > 1) {
+ printf(" skipping existing mapping (%s)\n",
+ EntryDebugString(iter).c_str());
+ }
+}
+
+void RangeMap::AddDualRange(uint64_t addr, uint64_t size, uint64_t otheraddr,
+ const std::string& label) {
+ if (verbose_level > 2) {
+ printf("%p AddDualRange([%" PRIx64 ", %" PRIx64 "], %" PRIx64 ", %s)\n",
+ this, addr, size, otheraddr, label.c_str());
+ }
+
+ if (size == 0) return;
+
+ auto it = FindContainingOrAfter(addr);
+
+ if (size == kUnknownSize) {
+ assert(otheraddr == kNoTranslation);
+ if (it != mappings_.end() && EntryContainsStrict(it, addr)) {
+ MaybeSetLabel(it, label, addr, kUnknownSize);
+ } else {
+ auto iter = mappings_.emplace_hint(
+ it, std::make_pair(addr, Entry(label, kUnknownSize, kNoTranslation)));
+ if (verbose_level > 2) {
+ printf(" added entry: %s\n", EntryDebugString(iter).c_str());
+ }
+ }
+ return;
+ }
+
+ const uint64_t base = addr;
+ uint64_t end = addr + size;
+ assert(end >= addr);
+
+ while (1) {
+ // Advance past existing entries that intersect this range until we find a
+ // gap.
+ while (addr < end && !IterIsEnd(it) && EntryContains(it, addr)) {
+ assert(end >= addr);
+ MaybeSetLabel(it, label, addr, end - addr);
+ addr = RangeEndUnknownLimit(it, addr);
+ ++it;
+ }
+
+ if (addr >= end) {
+ return;
+ }
+
+ // We found a gap and need to create an entry. Need to make sure the new
+ // entry doesn't extend into a range that was previously defined.
+ uint64_t this_end = end;
+ if (it != mappings_.end() && end > it->first) {
+ assert(it->first >= addr);
+ this_end = std::min(end, it->first);
+ }
+
+ uint64_t other = (otheraddr == kNoTranslation) ? kNoTranslation
+ : addr - base + otheraddr;
+ assert(this_end >= addr);
+ auto iter = mappings_.emplace_hint(
+ it, std::make_pair(addr, Entry(label, this_end - addr, other)));
+ if (verbose_level > 2) {
+ printf(" added entry: %s\n", EntryDebugString(iter).c_str());
+ }
+ CheckConsistency(iter);
+ addr = this_end;
+ }
+}
+
+// In most cases we don't expect the range we're translating to span mappings
+// in the translator. For example, we would never expect a symbol to span
+// sections.
+//
+// However there are some examples. An archive member (in the file domain) can
+// span several section mappings. If we really wanted to get particular here,
+// we could pass a parameter indicating whether such spanning is expected, and
+// warn if not.
+bool RangeMap::AddRangeWithTranslation(uint64_t addr, uint64_t size,
+ const std::string& val,
+ const RangeMap& translator,
+ bool verbose,
+ RangeMap* other) {
+ auto it = translator.FindContaining(addr);
+ uint64_t end;
+ if (size == kUnknownSize) {
+ end = addr + 1;
+ } else {
+ end = addr + size;
+ assert(end >= addr);
+ }
+ uint64_t total_size = 0;
+
+ // TODO: optionally warn about when we span ranges of the translator. In some
+ // cases this would be a bug (ie. symbols VM->file). In other cases it's
+ // totally normal (ie. archive members file->VM).
+ while (!translator.IterIsEnd(it) && it->first < end) {
+ uint64_t translated_addr;
+ uint64_t trimmed_addr;
+ uint64_t trimmed_size;
+ if (translator.TranslateAndTrimRangeWithEntry(
+ it, addr, size, &trimmed_addr, &translated_addr, &trimmed_size)) {
+ if (verbose_level > 2 || verbose) {
+ printf(" -> translates to: [%" PRIx64 " %" PRIx64 "]\n", translated_addr,
+ trimmed_size);
+ }
+ other->AddRange(translated_addr, trimmed_size, val);
+ }
+ AddRange(trimmed_addr, trimmed_size, val);
+ total_size += trimmed_size;
+ ++it;
+ }
+
+ return total_size == size;
+}
+
+void RangeMap::Compress() {
+ auto prev = mappings_.begin();
+ auto it = prev;
+ while (it != mappings_.end()) {
+ if (prev->first + prev->second.size == it->first &&
+ (prev->second.label == it->second.label ||
+ (!prev->second.HasFallbackLabel() && it->second.IsShortFallback()))) {
+ prev->second.size += it->second.size;
+ mappings_.erase(it++);
+ } else {
+ prev = it;
+ ++it;
+ }
+ }
+}
+
+bool RangeMap::CoversRange(uint64_t addr, uint64_t size) const {
+ auto it = FindContaining(addr);
+ uint64_t end = addr + size;
+ assert(end >= addr);
+
+ while (true) {
+ if (addr >= end) {
+ return true;
+ } else if (it == mappings_.end() || !EntryContains(it, addr)) {
+ return false;
+ }
+ addr = RangeEnd(it);
+ it++;
+ }
+}
+
+uint64_t RangeMap::GetMaxAddress() const {
+ if (mappings_.empty()) {
+ return 0;
+ } else {
+ auto& entry = *mappings_.rbegin();
+ return entry.first + entry.second.size;
+ }
+}
+
+} // namespace bloaty
diff --git a/src/range_map.h b/src/range_map.h
new file mode 100644
index 0000000..89c58da
--- /dev/null
+++ b/src/range_map.h
@@ -0,0 +1,391 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// RagneMap maps
+//
+// [uint64_t, uint64_t) -> std::string, [optional other range base]
+//
+// where ranges must be non-overlapping.
+//
+// This is used to map the address space (either pointer offsets or file
+// offsets).
+//
+// The other range base allows us to use this RangeMap to translate addresses
+// from this domain to another one (like vm_addr -> file_addr or vice versa).
+//
+// This type is only exposed in the .h file for unit testing purposes.
+
+#ifndef BLOATY_RANGE_MAP_H_
+#define BLOATY_RANGE_MAP_H_
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <exception>
+#include <map>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+
+namespace bloaty {
+
+class RangeMapTest;
+
+class RangeMap {
+ public:
+ RangeMap() = default;
+ RangeMap(RangeMap&& other) = default;
+ RangeMap& operator=(RangeMap&& other) = default;
+ RangeMap(RangeMap& other) = delete;
+ RangeMap& operator=(RangeMap& other) = delete;
+
+ // Adds a range to this map.
+ void AddRange(uint64_t addr, uint64_t size, const std::string& val);
+
+ // Adds a range to this map (in domain D1) that also corresponds to a
+ // different range in a different map (in domain D2). The correspondance will
+ // be noted to allow us to translate into the other domain later.
+ void AddDualRange(uint64_t addr, uint64_t size, uint64_t otheraddr,
+ const std::string& val);
+
+ // Adds a range to this map (in domain D1), and also adds corresponding ranges
+ // to |other| (in domain D2), using |translator| (in domain D1) to translate
+ // D1->D2. The translation is performed using information from previous
+ // AddDualRange() calls on |translator|.
+ //
+ // Returns true if the entire range [addr, size] was present in the
+ // |translator| map. (This does not necessarily mean that every part of the
+ // range was actually translated). If the return value is false, then the
+ // contents of |this| and |other| are undefined (Bloaty will bail in this
+ // case).
+ bool AddRangeWithTranslation(uint64_t addr, uint64_t size,
+ const std::string& val,
+ const RangeMap& translator, bool verbose,
+ RangeMap* other);
+
+ // Collapses adjacent ranges with the same label. This reduces memory usage
+ // and removes redundant noise from the output when dumping a full memory map
+ // (in normal Bloaty output it makes no difference, because all labels with
+ // the same name are added together).
+ //
+ // TODO(haberman): see if we can do this at insertion time instead, so it
+ // doesn't require a second pass.
+ void Compress();
+
+ // Returns whether this RangeMap fully covers the given range.
+ bool CoversRange(uint64_t addr, uint64_t size) const;
+
+ // Returns the maximum address contained in this map.
+ uint64_t GetMaxAddress() const;
+
+ // Translates |addr| into the other domain, returning |true| if this was
+ // successful.
+ bool Translate(uint64_t addr, uint64_t *translated) const;
+
+ // Looks for a range within this map that contains |addr|. If found, returns
+ // true and sets |label| to the corresponding label, and |offset| to the
+ // offset from the beginning of this range.
+ bool TryGetLabel(uint64_t addr, std::string* label) const;
+ bool TryGetLabelForRange(uint64_t addr, uint64_t size,
+ std::string* label) const;
+
+ // Looks for a range that starts exactly on |addr|. If it exists, returns
+ // true and sets |size| to its size.
+ bool TryGetSize(uint64_t addr, uint64_t* size) const;
+
+ std::string DebugString() const;
+
+ static std::string EntryDebugString(uint64_t addr, uint64_t size,
+ uint64_t other_start,
+ const std::string& label) {
+ std::string end =
+ size == kUnknownSize ? "?" : absl::StrCat(absl::Hex(addr + size));
+ std::string ret = absl::StrCat("[", absl::Hex(addr), ", ", end,
+ "] (size=", absl::Hex(size), "): ", label);
+ if (other_start != UINT64_MAX) {
+ absl::StrAppend(&ret, ", other_start=", absl::Hex(other_start));
+ }
+ return ret;
+ }
+
+ template <class T>
+ std::string EntryDebugString(T it) const {
+ if (it == mappings_.end()) {
+ return "[end]";
+ } else {
+ return EntryDebugString(it->first, it->second.size,
+ it->second.other_start, it->second.label);
+ }
+ }
+
+ template <class Func>
+ static void ComputeRollup(const std::vector<const RangeMap*>& range_maps,
+ Func func);
+
+ template <class Func>
+ void ForEachRange(Func func) const {
+ for (auto iter = mappings_.begin(); iter != mappings_.end(); ++iter) {
+ func(iter->first, RangeEnd(iter) - iter->first);
+ }
+ }
+
+ template <class Func>
+ void ForEachRangeWithStart(uint64_t start, Func func) const {
+ for (auto iter = FindContaining(start); iter != mappings_.end(); ++iter) {
+ if (!func(iter->second.label, iter->first,
+ RangeEnd(iter) - iter->first)) {
+ return;
+ }
+ }
+ }
+
+ static constexpr uint64_t kUnknownSize = UINT64_MAX;
+
+ private:
+ friend class RangeMapTest;
+ static const uint64_t kNoTranslation = UINT64_MAX;
+
+ struct Entry {
+ Entry(const std::string& label_, uint64_t size_, uint64_t other_)
+ : label(label_), size(size_), other_start(other_) {}
+ std::string label;
+ uint64_t size;
+ uint64_t other_start; // kNoTranslation if there is no mapping.
+
+ bool HasTranslation() const { return other_start != kNoTranslation; }
+ bool HasFallbackLabel() const { return !label.empty() && label[0] == '['; }
+
+ // We assume that short regions that were unattributed (have fallback
+ // labels) are actually padding. We could probably make this heuristic
+ // a bit more robust.
+ bool IsShortFallback() const { return size <= 16 && HasFallbackLabel(); }
+ };
+
+ typedef std::map<uint64_t, Entry> Map;
+ Map mappings_;
+
+ template <class T>
+ void CheckConsistency(T iter) const {
+ assert(iter->first + iter->second.size > iter->first);
+ assert(iter == mappings_.begin() ||
+ RangeEnd(std::prev(iter)) <= iter->first);
+ assert(std::next(iter) == mappings_.end() ||
+ RangeEnd(iter) <= std::next(iter)->first);
+ }
+
+ template <class T>
+ bool EntryContains(T iter, uint64_t addr) const {
+ return addr >= iter->first && addr < RangeEnd(iter);
+ }
+
+ template <class T>
+ bool EntryContainsStrict(T iter, uint64_t addr) const {
+ if (iter->second.size == kUnknownSize) {
+ return iter->first == addr;
+ } else {
+ return addr >= iter->first && addr < RangeEnd(iter);
+ }
+ }
+
+ template <class T>
+ void MaybeSetLabel(T iter, const std::string& label, uint64_t addr,
+ uint64_t end);
+
+ // When the size is unknown return |unknown| for the end.
+ uint64_t RangeEndUnknownLimit(Map::const_iterator iter,
+ uint64_t unknown) const {
+ if (iter->second.size == kUnknownSize) {
+ Map::const_iterator next = std::next(iter);
+ if (IterIsEnd(next) || next->first > unknown) {
+ return unknown;
+ } else {
+ return next->first;
+ }
+ } else {
+ uint64_t ret = iter->first + iter->second.size;
+ assert(ret > iter->first);
+ return ret;
+ }
+ }
+
+ uint64_t RangeEnd(Map::const_iterator iter) const {
+ return RangeEndUnknownLimit(iter, UINT64_MAX);
+ }
+
+ bool IterIsEnd(Map::const_iterator iter) const {
+ return iter == mappings_.end();
+ }
+
+ template <class T>
+ uint64_t TranslateWithEntry(T iter, uint64_t addr) const;
+
+ template <class T>
+ bool TranslateAndTrimRangeWithEntry(T iter, uint64_t addr, uint64_t size,
+ uint64_t* trimmed_addr,
+ uint64_t* translated_addr,
+ uint64_t* trimmed_size) const;
+
+ // Finds the entry that contains |addr|. If no such mapping exists, returns
+ // mappings_.end().
+ Map::const_iterator FindContaining(uint64_t addr) const;
+
+ // Finds the entry that contains |addr|, or the very next entry (which may be
+ // mappings_.end()).
+ Map::iterator FindContainingOrAfter(uint64_t addr);
+ Map::const_iterator FindContainingOrAfter(uint64_t addr) const;
+};
+
+template <class Func>
+void RangeMap::ComputeRollup(const std::vector<const RangeMap*>& range_maps,
+ Func func) {
+ assert(range_maps.size() > 0);
+ std::vector<Map::const_iterator> iters;
+
+ if (range_maps[0]->mappings_.empty()) {
+ for (int i = 0; i < range_maps.size(); i++) {
+ const RangeMap* range_map = range_maps[i];
+ if (!range_map->mappings_.empty()) {
+ printf(
+ "Error, range (%s) exists at index %d, but base map is empty\n",
+ range_map->EntryDebugString(range_map->mappings_.begin()).c_str(),
+ i);
+ assert(false);
+ throw std::runtime_error("Range extends beyond base map.");
+ }
+ }
+ return;
+ }
+
+ for (auto range_map : range_maps) {
+ iters.push_back(range_map->mappings_.begin());
+ }
+
+ // Iterate over all ranges in parallel to perform this transformation:
+ //
+ // ----- ----- ----- ---------------
+ // | | 1 A,X,1
+ // | X ----- ---------------
+ // | | | A,X,2
+ // A ----- | ---------------
+ // | | | |
+ // | | 2 -----> |
+ // | Y | A,Y,2
+ // | | | |
+ // ----- | | ---------------
+ // B | | B,Y,2
+ // ----- ----- ----- ---------------
+ //
+ //
+ // ----- ----- ----- ---------------
+ // C Z 3 C,Z,3
+ // ----- ----- ----- ---------------
+ //
+ // All input maps must cover exactly the same domain.
+
+ // Outer loop: once per continuous (gapless) region.
+ while (true) {
+ std::vector<std::string> keys;
+ uint64_t current = 0;
+
+ if (range_maps[0]->IterIsEnd(iters[0])) {
+ // Termination condition: all iterators must be at end.
+ for (int i = 0; i < range_maps.size(); i++) {
+ if (!range_maps[i]->IterIsEnd(iters[i])) {
+ printf(
+ "Error, range (%s) extends beyond final base map range "
+ "(%s)\n",
+ range_maps[i]->EntryDebugString(iters[i]).c_str(),
+ range_maps[0]->EntryDebugString(std::prev(iters[0])).c_str());
+ assert(false);
+ throw std::runtime_error("Range extends beyond base map.");
+ }
+ }
+ return;
+ } else {
+ // Starting a new continuous range: all iterators must start at the same
+ // place.
+ current = iters[0]->first;
+ for (int i = 0; i < range_maps.size(); i++) {
+ if (range_maps[i]->IterIsEnd(iters[i])) {
+ printf(
+ "Error, no more ranges for index %d but we need one "
+ "to match (%s)\n",
+ i, range_maps[0]->EntryDebugString(iters[0]).c_str());
+ assert(false);
+ throw std::runtime_error("No more ranges.");
+ } else if (iters[i]->first != current) {
+ printf(
+ "Error, range (%s) doesn't match the beginning of base range "
+ "(%s)\n",
+ range_maps[i]->EntryDebugString(iters[i]).c_str(),
+ range_maps[0]->EntryDebugString(iters[0]).c_str());
+ assert(false);
+ throw std::runtime_error("No more ranges.");
+ }
+ keys.push_back(iters[i]->second.label);
+ }
+ }
+
+ bool continuous = true;
+
+ // Inner loop: once per range within the continuous region.
+ while (continuous) {
+ uint64_t next_break = UINT64_MAX;
+
+ for (int i = 0; i < iters.size(); i++) {
+ next_break = std::min(next_break, range_maps[i]->RangeEnd(iters[i]));
+ }
+
+ func(keys, current, next_break);
+
+ // Advance all iterators with ranges ending at next_break.
+ for (int i = 0; i < iters.size(); i++) {
+ const RangeMap& map = *range_maps[i];
+ Map::const_iterator& iter = iters[i];
+ uint64_t end = continuous ? map.RangeEnd(iter)
+ : map.RangeEndUnknownLimit(iter, next_break);
+
+ if (end != next_break) {
+ continue;
+ }
+ ++iter;
+
+ // Test for discontinuity.
+ if (map.IterIsEnd(iter) || iter->first != next_break) {
+ if (i > 0 && continuous) {
+ printf(
+ "Error, gap between ranges (%s) and (%s) fails to cover base "
+ "range (%s)\n",
+ map.EntryDebugString(std::prev(iter)).c_str(),
+ map.EntryDebugString(iter).c_str(),
+ range_maps[0]->EntryDebugString(iters[0]).c_str());
+ assert(false);
+ throw std::runtime_error("Entry range extends beyond base range");
+ }
+ assert(i == 0 || !continuous);
+ continuous = false;
+ } else {
+ assert(continuous);
+ keys[i] = iter->second.label;
+ }
+ }
+ current = next_break;
+ }
+ }
+}
+
+
+} // namespace bloaty
+
+#endif // BLOATY_RANGE_MAP_H_
diff --git a/src/re.h b/src/re.h
new file mode 100644
index 0000000..d990767
--- /dev/null
+++ b/src/re.h
@@ -0,0 +1,92 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BLOATY_RE_H_
+#define BLOATY_RE_H_
+
+#include <string>
+
+#ifdef USE_RE2
+#include "re2/re2.h"
+#endif
+
+#include "absl/base/attributes.h"
+#include "bloaty.h"
+
+namespace bloaty {
+
+#ifdef USE_RE2
+class ReImpl {
+ public:
+ ReImpl(const char* pattern) : re2_(pattern){};
+ ReImpl(const std::string& pattern) : re2_(pattern){};
+ bool ok() { return re2_.ok(); }
+
+ static bool Extract(std::string text, const ReImpl& re, std::string rewrite,
+ std::string* out) {
+ return RE2::Extract(text, re.re2_, rewrite, out);
+ }
+ template <typename... A>
+ static bool PartialMatch(const std::string& text, const ReImpl& re,
+ A&&... a) {
+ return RE2::PartialMatch(text, re.re2_, a...);
+ }
+
+ static int GlobalReplace(std::string* str, const ReImpl& re,
+ std::string rewrite) {
+ return RE2::GlobalReplace(str, re.re2_, rewrite);
+ }
+ static bool Replace(std::string* str, const ReImpl& re, std::string rewrite) {
+ return RE2::Replace(str, re.re2_, rewrite);
+ }
+
+ private:
+ RE2 re2_;
+};
+#else
+}
+
+ABSL_ATTRIBUTE_NORETURN
+static void _abort() { throw "No support for regular expressions"; }
+
+namespace bloaty {
+class ReImpl {
+ public:
+ ReImpl(const char*) { _abort(); }
+ ReImpl(const std::string&) { _abort(); }
+ bool ok() { _abort(); }
+
+ ABSL_ATTRIBUTE_NORETURN
+ static bool Extract(std::string, const ReImpl&, std::string, std::string*) {
+ _abort();
+ }
+ template <typename... A>
+ ABSL_ATTRIBUTE_NORETURN static bool PartialMatch(const std::string&,
+ const ReImpl&, A&&...) {
+ _abort();
+ }
+ ABSL_ATTRIBUTE_NORETURN
+ static int GlobalReplace(std::string*, const ReImpl&, std::string) {
+ _abort();
+ }
+ ABSL_ATTRIBUTE_NORETURN
+ static bool Replace(std::string*, const ReImpl&, std::string) { _abort(); }
+
+ private:
+};
+#endif
+
+} // namespace bloaty
+
+#endif // BLOATY_RE_H_
diff --git a/src/webassembly.cc b/src/webassembly.cc
new file mode 100644
index 0000000..044fffa
--- /dev/null
+++ b/src/webassembly.cc
@@ -0,0 +1,411 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+
+#include "absl/strings/substitute.h"
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+
+using absl::string_view;
+
+namespace bloaty {
+namespace wasm {
+
+template <class T>
+T ReadMemcpy(string_view* data) {
+ T ret;
+ if (data->size() < sizeof(T)) {
+ THROW("premature EOF reading fixed-length wasm data");
+ }
+ memcpy(&ret, data->data(), sizeof(T));
+ data->remove_prefix(sizeof(T));
+ return ret;
+}
+
+uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) {
+ uint64_t ret = 0;
+ int shift = 0;
+ int maxshift = 70;
+ const char* ptr = data->data();
+ const char* limit = ptr + data->size();
+
+ while (ptr < limit && shift < maxshift) {
+ char byte = *(ptr++);
+ ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0) {
+ data->remove_prefix(ptr - data->data());
+ if (is_signed && shift < size && (byte & 0x40)) {
+ ret |= -(1ULL << shift);
+ }
+ return ret;
+ }
+ }
+
+ THROW("corrupt wasm data, unterminated LEB128");
+}
+
+bool ReadVarUInt1(string_view* data) {
+ return static_cast<bool>(ReadLEB128Internal(false, 1, data));
+}
+
+uint8_t ReadVarUInt7(string_view* data) {
+ return static_cast<char>(ReadLEB128Internal(false, 7, data));
+}
+
+uint32_t ReadVarUInt32(string_view* data) {
+ return static_cast<uint32_t>(ReadLEB128Internal(false, 32, data));
+}
+
+int8_t ReadVarint7(string_view* data) {
+ return static_cast<int8_t>(ReadLEB128Internal(true, 7, data));
+}
+
+string_view ReadPiece(size_t bytes, string_view* data) {
+ if(data->size() < bytes) {
+ THROW("premature EOF reading variable-length DWARF data");
+ }
+ string_view ret = data->substr(0, bytes);
+ data->remove_prefix(bytes);
+ return ret;
+}
+
+bool ReadMagic(string_view* data) {
+ const uint32_t wasm_magic = 0x6d736100;
+ uint32_t magic = ReadMemcpy<uint32_t>(data);
+
+ if (magic != wasm_magic) {
+ return false;
+ }
+
+ // TODO(haberman): do we need to fail if this is >1?
+ uint32_t version = ReadMemcpy<uint32_t>(data);
+ (void)version;
+
+ return true;
+}
+
+class Section {
+ public:
+ uint32_t id;
+ std::string name;
+ string_view data;
+ string_view contents;
+
+ static Section Read(string_view* data_param) {
+ Section ret;
+ string_view data = *data_param;
+ string_view section_data = data;
+
+ ret.id = ReadVarUInt7(&data);
+ uint32_t size = ReadVarUInt32(&data);
+ ret.contents = ReadPiece(size, &data);
+ size_t header_size = ret.contents.data() - section_data.data();
+ ret.data = section_data.substr(0, size + header_size);
+
+ if (ret.id == 0) {
+ uint32_t name_len = ReadVarUInt32(&ret.contents);
+ ret.name = std::string(ReadPiece(name_len, &ret.contents));
+ } else if (ret.id <= 13) {
+ ret.name = names[ret.id];
+ } else {
+ THROWF("Unknown section id: $0", ret.id);
+ }
+
+ *data_param = data;
+ return ret;
+ }
+
+ enum Name {
+ kType = 1,
+ kImport = 2,
+ kFunction = 3,
+ kTable = 4,
+ kMemory = 5,
+ kGlobal = 6,
+ kExport = 7,
+ kStart = 8,
+ kElement = 9,
+ kCode = 10,
+ kData = 11,
+ kDataCount = 12,
+ kEvent = 13,
+ };
+
+ static const char* names[];
+};
+
+const char* Section::names[] = {
+ "<none>", // 0
+ "Type", // 1
+ "Import", // 2
+ "Function", // 3
+ "Table", // 4
+ "Memory", // 5
+ "Global", // 6
+ "Export", // 7
+ "Start", // 8
+ "Element", // 9
+ "Code", // 10
+ "Data", // 11
+ "DataCount", // 12
+ "Event", // 13
+};
+
+struct ExternalKind {
+ enum Kind {
+ kFunction = 0,
+ kTable = 1,
+ kMemory = 2,
+ kGlobal = 3,
+ };
+};
+
+template <class Func>
+void ForEachSection(string_view file, Func&& section_func) {
+ string_view data = file;
+ ReadMagic(&data);
+
+ while (!data.empty()) {
+ Section section = Section::Read(&data);
+ section_func(section);
+ }
+}
+
+void ParseSections(RangeSink* sink) {
+ ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+ sink->AddFileRange("wasm_sections", section.name, section.data);
+ });
+}
+
+typedef std::unordered_map<int, std::string> FuncNames;
+
+void ReadFunctionNames(const Section& section, FuncNames* names,
+ RangeSink* sink) {
+ enum class NameType {
+ kModule = 0,
+ kFunction = 1,
+ kLocal = 2,
+ };
+
+ string_view data = section.contents;
+
+ while (!data.empty()) {
+ char type = ReadVarUInt7(&data);
+ uint32_t size = ReadVarUInt32(&data);
+ string_view section = data.substr(0, size);
+ data = data.substr(size);
+
+ if (static_cast<NameType>(type) == NameType::kFunction) {
+ uint32_t count = ReadVarUInt32(&section);
+ for (uint32_t i = 0; i < count; i++) {
+ string_view entry = section;
+ uint32_t index = ReadVarUInt32(&section);
+ uint32_t name_len = ReadVarUInt32(&section);
+ string_view name = ReadPiece(name_len, &section);
+ entry = entry.substr(0, name.data() - entry.data() + name.size());
+ sink->AddFileRange("wasm_funcname", name, entry);
+ (*names)[index] = std::string(name);
+ }
+ }
+ }
+}
+
+int ReadValueType(string_view* data) {
+ return ReadVarint7(data);
+}
+
+int ReadElemType(string_view* data) {
+ return ReadVarint7(data);
+}
+
+void ReadResizableLimits(string_view* data) {
+ auto flags = ReadVarUInt1(data);
+ ReadVarUInt32(data);
+ if (flags) {
+ ReadVarUInt32(data);
+ }
+}
+
+void ReadGlobalType(string_view* data) {
+ ReadValueType(data);
+ ReadVarUInt1(data);
+}
+
+void ReadTableType(string_view* data) {
+ ReadElemType(data);
+ ReadResizableLimits(data);
+}
+
+void ReadMemoryType(string_view* data) {
+ ReadResizableLimits(data);
+}
+
+uint32_t GetNumFunctionImports(const Section& section) {
+ assert(section.id == Section::kImport);
+ string_view data = section.contents;
+
+ uint32_t count = ReadVarUInt32(&data);
+ uint32_t func_count = 0;
+
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t module_len = ReadVarUInt32(&data);
+ ReadPiece(module_len, &data);
+ uint32_t field_len = ReadVarUInt32(&data);
+ ReadPiece(field_len, &data);
+ auto kind = ReadMemcpy<uint8_t>(&data);
+
+ switch (kind) {
+ case ExternalKind::kFunction:
+ func_count++;
+ ReadVarUInt32(&data);
+ break;
+ case ExternalKind::kTable:
+ ReadTableType(&data);
+ break;
+ case ExternalKind::kMemory:
+ ReadMemoryType(&data);
+ break;
+ case ExternalKind::kGlobal:
+ ReadGlobalType(&data);
+ break;
+ default:
+ THROWF("Unrecognized import kind: $0", kind);
+ }
+ }
+
+ return func_count;
+}
+
+void ReadCodeSection(const Section& section, const FuncNames& names,
+ uint32_t num_imports, RangeSink* sink) {
+ string_view data = section.contents;
+
+ uint32_t count = ReadVarUInt32(&data);
+
+ for (uint32_t i = 0; i < count; i++) {
+ string_view func = data;
+ uint32_t size = ReadVarUInt32(&data);
+ uint32_t total_size = size + (data.data() - func.data());
+
+ func = func.substr(0, total_size);
+ data = data.substr(size);
+
+ auto iter = names.find(num_imports + i);
+
+ if (iter == names.end()) {
+ std::string name = "func[" + std::to_string(i) + "]";
+ sink->AddFileRange("wasm_function", name, func);
+ } else {
+ sink->AddFileRange("wasm_function", ItaniumDemangle(iter->second, sink->data_source()), func);
+ }
+ }
+}
+
+void ParseSymbols(RangeSink* sink) {
+ // First pass: read the custom naming section to get function names.
+ std::unordered_map<int, std::string> func_names;
+ uint32_t num_imports = 0;
+
+ ForEachSection(sink->input_file().data(),
+ [&func_names, sink](const Section& section) {
+ if (section.name == "name") {
+ ReadFunctionNames(section, &func_names, sink);
+ }
+ });
+
+ // Second pass: read the function/code sections.
+ ForEachSection(sink->input_file().data(),
+ [&func_names, &num_imports, sink](const Section& section) {
+ if (section.id == Section::kImport) {
+ num_imports = GetNumFunctionImports(section);
+ } else if (section.id == Section::kCode) {
+ ReadCodeSection(section, func_names, num_imports, sink);
+ }
+ });
+}
+
+void AddWebAssemblyFallback(RangeSink* sink) {
+ ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+ std::string name2 =
+ std::string("[section ") + std::string(section.name) + std::string("]");
+ sink->AddFileRange("wasm_overhead", name2, section.data);
+ });
+ sink->AddFileRange("wasm_overhead", "[WASM Header]",
+ sink->input_file().data().substr(0, 8));
+}
+
+class WebAssemblyObjectFile : public ObjectFile {
+ public:
+ WebAssemblyObjectFile(std::unique_ptr<InputFile> file_data)
+ : ObjectFile(std::move(file_data)) {}
+
+ std::string GetBuildId() const override {
+ // TODO(haberman): does WebAssembly support this?
+ return std::string();
+ }
+
+ void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ case DataSource::kSections:
+ ParseSections(sink);
+ break;
+ case DataSource::kSymbols:
+ case DataSource::kRawSymbols:
+ case DataSource::kShortSymbols:
+ case DataSource::kFullSymbols:
+ ParseSymbols(sink);
+ break;
+ case DataSource::kArchiveMembers:
+ case DataSource::kCompileUnits:
+ case DataSource::kInlines:
+ default:
+ THROW("WebAssembly doesn't support this data source");
+ }
+ AddWebAssemblyFallback(sink);
+ }
+ }
+
+ bool GetDisassemblyInfo(absl::string_view /*symbol*/,
+ DataSource /*symbol_source*/,
+ DisassemblyInfo* /*info*/) const override {
+ WARN("WebAssembly files do not support disassembly yet");
+ return false;
+ }
+};
+
+} // namespace wasm
+
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(
+ std::unique_ptr<InputFile>& file) {
+ string_view data = file->data();
+ if (wasm::ReadMagic(&data)) {
+ return std::unique_ptr<ObjectFile>(
+ new wasm::WebAssemblyObjectFile(std::move(file)));
+ }
+
+ return nullptr;
+}
+
+} // namespace bloaty
diff --git a/tests/bloaty_misc_test.cc b/tests/bloaty_misc_test.cc
new file mode 100644
index 0000000..ef23d5a
--- /dev/null
+++ b/tests/bloaty_misc_test.cc
@@ -0,0 +1,53 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test.h"
+
+TEST_F(BloatyTest, NoSections) {
+ RunBloaty({"bloaty", "01-no-sections.bin"});
+}
+
+TEST_F(BloatyTest, SectionCountOverflow) {
+ RunBloaty({"bloaty", "02-section-count-overflow.o"});
+}
+
+TEST_F(BloatyTest, InlinesOnSmallFile) {
+ RunBloaty(
+ {"bloaty", "-d", "compileunits", "03-small-binary-that-crashed-inlines.bin"});
+ RunBloaty(
+ {"bloaty", "-d", "inlines", "03-small-binary-that-crashed-inlines.bin"});
+ EXPECT_EQ(top_row_->vmsize, 2340);
+}
+
+TEST_F(BloatyTest, GoBinary) {
+ RunBloaty(
+ {"bloaty", "-d", "compileunits", "04-go-binary-with-ref-addr.bin"});
+ RunBloaty(
+ {"bloaty", "-d", "inlines", "04-go-binary-with-ref-addr.bin"});
+}
+
+TEST_F(BloatyTest, MultiThreaded) {
+ RunBloaty({"bloaty", "02-section-count-overflow.o"});
+ size_t file_size = top_row_->filesize;
+
+ // Bloaty doesn't know or care that you are passing the same file multiple
+ // times.
+ std::vector<std::string> args{"bloaty"};
+ const int count = 100;
+ for (int i = 0; i < count; i++) {
+ args.push_back("02-section-count-overflow.o");
+ }
+ RunBloaty(args); // Heavily multithreaded test.
+ EXPECT_EQ(top_row_->filesize, file_size * 100);
+}
diff --git a/tests/bloaty_test.cc b/tests/bloaty_test.cc
new file mode 100644
index 0000000..2783ac8
--- /dev/null
+++ b/tests/bloaty_test.cc
@@ -0,0 +1,291 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test.h"
+
+TEST_F(BloatyTest, EmptyObjectFile) {
+ std::string file = "01-empty.o";
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(file, &size));
+
+ // Empty .c file should result in a .o file with no vmsize.
+ RunBloaty({"bloaty", file});
+ EXPECT_EQ(top_row_->vmsize, 0);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 1);
+
+ // Same with segments (we fake segments on .o files).
+ RunBloaty({"bloaty", "-d", "segments", file});
+ EXPECT_EQ(top_row_->vmsize, 0);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 1);
+
+ // Same with symbols.
+ RunBloaty({"bloaty", "-d", "symbols", file});
+ EXPECT_EQ(top_row_->vmsize, 0);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 1);
+
+ // We can't run any of these targets against object files.
+ std::string errmsg = "can't use data source";
+ AssertBloatyFails({"bloaty", "-d", "compileunits", file}, errmsg);
+ AssertBloatyFails({"bloaty", "-d", "inlines", file}, errmsg);
+}
+
+TEST_F(BloatyTest, SimpleObjectFile) {
+ std::string file = "02-simple.o";
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(file, &size));
+
+ // Test "-n 0" which should return an unlimited number of rows.
+ RunBloaty({"bloaty", "-n", "0", file});
+ EXPECT_GT(top_row_->vmsize, 64);
+ EXPECT_LT(top_row_->vmsize, 300);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 1);
+
+ // Same with segments (we fake segments on .o files).
+ RunBloaty({"bloaty", "-d", "segments", file});
+ EXPECT_GT(top_row_->vmsize, 64);
+ EXPECT_LT(top_row_->vmsize, 300);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 1);
+
+ // For inputfiles we should get everything attributed to the input file.
+ RunBloaty({"bloaty", "-d", "inputfiles", file});
+ AssertChildren(*top_row_, {
+ std::make_tuple("02-simple.o", kUnknown, kUnknown)
+ });
+
+ // For symbols we should get entries for all our expected symbols.
+ RunBloaty({"bloaty", "-d", "symbols", "-n", "40", "-s", "vm", file});
+ AssertChildren(*top_row_, {
+ std::make_tuple("func1", kUnknown, kSameAsVM),
+ std::make_tuple("func2", kUnknown, kSameAsVM),
+ std::make_tuple("bss_a", 8, 0),
+ std::make_tuple("data_a", 8, 8),
+ std::make_tuple("rodata_a", 8, 8),
+ std::make_tuple("bss_b", 4, 0),
+ std::make_tuple("data_b", 4, 4),
+ std::make_tuple("rodata_b", 4, 4),
+ });
+
+ RunBloaty({"bloaty", "-d", "sections,symbols", "-n", "50", file});
+
+ auto row = FindRow(".bss");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("bss_a", 8, 0),
+ std::make_tuple("bss_b", 4, 0),
+ });
+
+ row = FindRow(".data");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("data_a", 8, 8),
+ std::make_tuple("data_b", 4, 4),
+ });
+
+ row = FindRow(".rodata");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("rodata_a", 8, 8),
+ std::make_tuple("rodata_b", 4, 4),
+ });
+}
+
+TEST_F(BloatyTest, SimpleArchiveFile) {
+ std::string file = "03-simple.a";
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(file, &size));
+
+ RunBloaty({"bloaty", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ //EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 3);
+
+ RunBloaty({"bloaty", "-d", "segments", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ //EXPECT_EQ(top_row_->filesize, size);
+
+ RunBloaty({"bloaty", "-d", "symbols", "-n", "40", "-s", "vm", file});
+ AssertChildren(*top_row_, {
+ std::make_tuple("bar_x", 4000, 4000),
+ std::make_tuple("foo_x", 4000, 0),
+ std::make_tuple("bar_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ std::make_tuple("long_filename_x", 12, 12),
+ std::make_tuple("bar_y", 4, 4),
+ std::make_tuple("bar_z", 4, 0),
+ std::make_tuple("foo_y", 4, 0),
+ std::make_tuple("long_filename_y", 4, 4),
+ });
+
+ RunBloaty({"bloaty", "-d", "armembers,symbols", file});
+ AssertChildren(*top_row_,
+ {
+ std::make_tuple("bar.o", kUnknown, kUnknown),
+ std::make_tuple("foo.o", kUnknown, kUnknown),
+ std::make_tuple("a_filename_longer_than_sixteen_chars.o",
+ kUnknown, kUnknown),
+ });
+
+ auto row = FindRow("bar.o");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("bar_x", 4000, 4000),
+ std::make_tuple("bar_func", kUnknown, kSameAsVM),
+ std::make_tuple("bar_y", 4, 4),
+ std::make_tuple("bar_z", 4, 0),
+ });
+
+ row = FindRow("foo.o");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("foo_x", 4000, 0),
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_y", 4, 0),
+ });
+
+ row = FindRow("a_filename_longer_than_sixteen_chars.o");
+ ASSERT_TRUE(row != nullptr);
+ AssertChildren(*row, {
+ std::make_tuple("long_filename_x", 12, 12),
+ std::make_tuple("long_filename_y", 4, 4),
+ });
+}
+
+TEST_F(BloatyTest, SimpleSharedObjectFile) {
+ std::string file = "04-simple.so";
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(file, &size));
+
+ RunBloaty({"bloaty", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 3);
+
+ RunBloaty({"bloaty", "-d", "segments", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ EXPECT_EQ(top_row_->filesize, size);
+
+ RunBloaty({"bloaty", "-d", "symbols", "-n", "50", file});
+ AssertChildren(*top_row_, {
+ std::make_tuple("bar_x", 4000, 4000),
+ std::make_tuple("foo_x", 4000, kUnknown),
+ std::make_tuple("bar_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ });
+}
+
+TEST_F(BloatyTest, SimpleBinary) {
+ std::string file = "05-binary.bin";
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(file, &size));
+
+ RunBloaty({"bloaty", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ EXPECT_EQ(top_row_->filesize, size);
+ EXPECT_GT(top_row_->sorted_children.size(), 3);
+
+ RunBloaty({"bloaty", "-d", "segments", file});
+ EXPECT_GT(top_row_->vmsize, 8000);
+ EXPECT_LT(top_row_->vmsize, 12000);
+ EXPECT_EQ(top_row_->filesize, size);
+
+ RunBloaty({"bloaty", "-d", "symbols", "-n", "50", "-s", "vm", file});
+ AssertChildren(*top_row_, {
+ std::make_tuple("bar_x", 4000, 4000),
+ std::make_tuple("foo_x", 4000, 0),
+ std::make_tuple("bar_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ std::make_tuple("main", kUnknown, kSameAsVM),
+ std::make_tuple("bar_y", 4, 4),
+ std::make_tuple("bar_z", 4, 0),
+ std::make_tuple("foo_y", 4, 0)
+ });
+
+ RunBloaty({"bloaty", "-d", "compileunits,symbols", file});
+ auto row = FindRow("bar.o.c");
+ ASSERT_TRUE(row != nullptr);
+
+ // This only includes functions (not data) for now.
+ AssertChildren(*row, {
+ std::make_tuple("bar_x", 4000, kSameAsVM),
+ std::make_tuple("bar_func", kUnknown, kSameAsVM),
+ std::make_tuple("bar_y", kUnknown, kSameAsVM),
+ std::make_tuple("bar_z", kUnknown, kSameAsVM),
+ });
+
+ row = FindRow("foo.o.c");
+ ASSERT_TRUE(row != nullptr);
+
+ // This only includes functions (not data) for now.
+ AssertChildren(*row, {
+ std::make_tuple("foo_x", 4000, 0),
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_y", kUnknown, kSameAsVM),
+ });
+
+ RunBloaty({"bloaty", "-d", "sections,inlines", file});
+}
+
+TEST_F(BloatyTest, InputFiles) {
+ std::string file1 = "05-binary.bin";
+ std::string file2 = "07-binary-stripped.bin";
+ uint64_t size1, size2;
+ ASSERT_TRUE(GetFileSize(file1, &size1));
+ ASSERT_TRUE(GetFileSize(file2, &size2));
+ RunBloaty({"bloaty", file1, file2, "-d", "inputfiles"});
+ AssertChildren(*top_row_, {std::make_tuple(file1, kUnknown, size1),
+ std::make_tuple(file2, kUnknown, size2)});
+
+ // Should work with custom data sources.
+ bloaty::Options options;
+ google::protobuf::TextFormat::ParseFromString(R"(
+ filename: "05-binary.bin"
+ filename: "07-binary-stripped.bin"
+ custom_data_source {
+ name: "rewritten_inputfiles"
+ base_data_source: "inputfiles"
+ rewrite: {
+ pattern: "binary"
+ replacement: "binary"
+ }
+ }
+ data_source: "rewritten_inputfiles"
+ )", &options);
+
+ RunBloatyWithOptions(options, bloaty::OutputOptions());
+ AssertChildren(*top_row_,
+ {std::make_tuple("binary", kUnknown, size1 + size2)});
+}
+
+TEST_F(BloatyTest, DiffMode) {
+ RunBloaty({"bloaty", "06-diff.a", "--", "03-simple.a", "-d", "symbols"});
+ AssertChildren(*top_row_, {
+ std::make_tuple("foo_func", kUnknown, kSameAsVM),
+ std::make_tuple("foo_y", 4, 0)
+ });
+}
+
+TEST_F(BloatyTest, SeparateDebug) {
+ RunBloaty({"bloaty", "--debug-file=05-binary.bin", "07-binary-stripped.bin",
+ "-d", "symbols"});
+}
diff --git a/tests/fuzz_driver.cc b/tests/fuzz_driver.cc
new file mode 100644
index 0000000..2e286d4
--- /dev/null
+++ b/tests/fuzz_driver.cc
@@ -0,0 +1,37 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cassert>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int main(int argc, char **argv) {
+ for (int i = 1; i < argc; i++) {
+ std::ifstream in(argv[i]);
+ in.seekg(0, in.end);
+ size_t length = in.tellg();
+ in.seekg (0, in.beg);
+ std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl;
+ // Allocate exactly length bytes so that we reliably catch buffer overflows.
+ std::vector<char> bytes(length);
+ in.read(bytes.data(), bytes.size());
+ assert(in);
+ LLVMFuzzerTestOneInput(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+ std::cout << "Execution successful" << std::endl;
+ }
+}
diff --git a/tests/fuzz_target.cc b/tests/fuzz_target.cc
new file mode 100644
index 0000000..037b414
--- /dev/null
+++ b/tests/fuzz_target.cc
@@ -0,0 +1,69 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+#include "bloaty.pb.h"
+#include "strarr.h"
+
+#include "absl/strings/string_view.h"
+
+using absl::string_view;
+
+namespace bloaty {
+
+class StringPieceInputFile : public InputFile {
+ public:
+ StringPieceInputFile(string_view data)
+ : InputFile("fake_StringPieceInputFile_file") {
+ data_ = data;
+ }
+};
+
+class StringPieceInputFileFactory : public InputFileFactory {
+ public:
+ StringPieceInputFileFactory(string_view data) : data_(data) {}
+ private:
+ string_view data_;
+ std::unique_ptr<InputFile> OpenFile(
+ const std::string& /* filename */) const override {
+ return std::unique_ptr<InputFile>(new StringPieceInputFile(data_));
+ }
+};
+
+void RunBloaty(const InputFileFactory& factory,
+ const std::string& data_source) {
+ bloaty::RollupOutput output;
+ bloaty::Options options;
+ std::string error;
+ options.add_data_source(data_source);
+ options.add_filename("dummy_filename");
+ bloaty::BloatyMain(options, factory, &output, &error);
+}
+
+} // namespace bloaty
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ const char *data2 = reinterpret_cast<const char*>(data);
+ bloaty::StringPieceInputFileFactory factory(string_view(data2, size));
+
+ // Try all of the data sources.
+ RunBloaty(factory, "segments");
+ RunBloaty(factory, "sections");
+ RunBloaty(factory, "symbols");
+ RunBloaty(factory, "compileunits");
+ RunBloaty(factory, "inlines");
+ RunBloaty(factory, "armembers");
+
+ return 0;
+}
diff --git a/tests/range_map_test.cc b/tests/range_map_test.cc
new file mode 100644
index 0000000..fe023ee
--- /dev/null
+++ b/tests/range_map_test.cc
@@ -0,0 +1,404 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <tuple>
+
+namespace bloaty {
+
+class RangeMapTest : public ::testing::Test {
+ protected:
+ void CheckConsistencyFor(const bloaty::RangeMap& map) {
+ uint64_t last_end = 0;
+ for (auto it = map.mappings_.begin(); it != map.mappings_.end(); ++it) {
+ ASSERT_GE(it->first, last_end);
+ last_end = map.RangeEnd(it);
+ }
+ }
+
+ void CheckConsistency() {
+ CheckConsistencyFor(map_);
+ CheckConsistencyFor(map2_);
+ CheckConsistencyFor(map3_);
+ }
+
+ struct Row {
+ std::vector<std::string> keys;
+ uint64_t start;
+ uint64_t end;
+ };
+
+ void AssertRollupEquals(const std::vector<const RangeMap*> maps,
+ const std::vector<Row>& rows) {
+ int i = 0;
+ RangeMap::ComputeRollup(
+ maps, [&i, &rows](const std::vector<std::string>& keys, uint64_t start,
+ uint64_t end) {
+ ASSERT_LT(i, rows.size());
+ const auto& row = rows[i];
+ ASSERT_EQ(row.keys, keys);
+ ASSERT_EQ(row.start, start);
+ ASSERT_EQ(row.end, end);
+ i++;
+ });
+ ASSERT_EQ(rows.size(), i);
+ }
+
+ struct Entry {
+ uint64_t addr;
+ uint64_t end;
+ uint64_t other_start;
+ std::string label;
+ };
+
+ void AssertMapEquals(const bloaty::RangeMap& map,
+ const std::vector<Entry>& entries) {
+ auto iter = map.mappings_.begin();
+ size_t i = 0;
+ for (; i < entries.size() && iter != map.mappings_.end(); ++i, ++iter) {
+ const auto& entry = entries[i];
+ ASSERT_EQ(entry.addr, iter->first) << i;
+ ASSERT_EQ(entry.end, map.RangeEnd(iter)) << i;
+ ASSERT_EQ(entry.other_start, iter->second.other_start) << i;
+ ASSERT_EQ(entry.label, iter->second.label) << i;
+ }
+ ASSERT_EQ(i, entries.size());
+ ASSERT_EQ(iter, map.mappings_.end());
+
+ // Also test that ComputeRollup yields the same thing.
+ i = 0;
+ RangeMap::ComputeRollup({&map},
+ [&i, &entries](const std::vector<std::string>& keys,
+ uint64_t start, uint64_t end) {
+ ASSERT_LT(i, entries.size());
+ const auto& entry = entries[i];
+ ASSERT_EQ(entry.addr, start);
+ ASSERT_EQ(entry.end, end);
+ ASSERT_EQ(entry.label, keys[0]);
+ i++;
+ });
+ ASSERT_EQ(entries.size(), i);
+ }
+
+ void AssertMainMapEquals(const std::vector<Entry>& entries) {
+ AssertMapEquals(map_, entries);
+ }
+
+ bloaty::RangeMap map_;
+ bloaty::RangeMap map2_;
+ bloaty::RangeMap map3_;
+
+ const uint64_t kNoTranslation = RangeMap::kNoTranslation;
+ const uint64_t kUnknownSize = RangeMap::kUnknownSize;
+};
+
+TEST_F(RangeMapTest, AddRange) {
+ CheckConsistency();
+ AssertMainMapEquals({});
+
+ map_.AddRange(4, 3, "foo");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"}
+ });
+
+ map_.AddRange(30, 5, "bar");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"},
+ {30, 35, kNoTranslation, "bar"}
+ });
+
+ map_.AddRange(50, 0, "baz"); // No-op due to 0 size.
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"},
+ {30, 35, kNoTranslation, "bar"}
+ });
+
+ map_.AddRange(20, 5, "baz");
+ map_.AddRange(25, 5, "baz2");
+ map_.AddRange(40, 5, "quux");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"},
+ {20, 25, kNoTranslation, "baz"},
+ {25, 30, kNoTranslation, "baz2"},
+ {30, 35, kNoTranslation, "bar"},
+ {40, 45, kNoTranslation, "quux"}
+ });
+
+ map_.AddRange(21, 25, "overlapping");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"},
+ {20, 25, kNoTranslation, "baz"},
+ {25, 30, kNoTranslation, "baz2"},
+ {30, 35, kNoTranslation, "bar"},
+ {35, 40, kNoTranslation, "overlapping"},
+ {40, 45, kNoTranslation, "quux"},
+ {45, 46, kNoTranslation, "overlapping"}
+ });
+
+ map_.AddRange(21, 25, "overlapping no-op");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {4, 7, kNoTranslation, "foo"},
+ {20, 25, kNoTranslation, "baz"},
+ {25, 30, kNoTranslation, "baz2"},
+ {30, 35, kNoTranslation, "bar"},
+ {35, 40, kNoTranslation, "overlapping"},
+ {40, 45, kNoTranslation, "quux"},
+ {45, 46, kNoTranslation, "overlapping"}
+ });
+
+ map_.AddRange(0, 100, "overlap everything");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {0, 4, kNoTranslation, "overlap everything"},
+ {4, 7, kNoTranslation, "foo"},
+ {7, 20, kNoTranslation, "overlap everything"},
+ {20, 25, kNoTranslation, "baz"},
+ {25, 30, kNoTranslation, "baz2"},
+ {30, 35, kNoTranslation, "bar"},
+ {35, 40, kNoTranslation, "overlapping"},
+ {40, 45, kNoTranslation, "quux"},
+ {45, 46, kNoTranslation, "overlapping"},
+ {46, 100, kNoTranslation, "overlap everything"},
+ });
+}
+
+TEST_F(RangeMapTest, UnknownSize) {
+ map_.AddRange(5, kUnknownSize, "foo");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {5, UINT64_MAX, kNoTranslation, "foo"}
+ });
+
+ map_.AddRange(100, 15, "bar");
+ map_.AddRange(200, kUnknownSize, "baz");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {5, 100, kNoTranslation, "foo"},
+ {100, 115, kNoTranslation, "bar"},
+ {200, UINT64_MAX, kNoTranslation, "baz"}
+ });
+
+ map2_.AddRange(5, 110, "base0");
+ map2_.AddRange(200, 50, "base1");
+
+ AssertRollupEquals({&map2_, &map_}, {
+ {{"base0", "foo"}, 5, 100},
+ {{"base0", "bar"}, 100, 115},
+ {{"base1", "baz"}, 200, 250},
+ });
+}
+
+TEST_F(RangeMapTest, UnknownSize2) {
+ // This case is slightly weird, but we do consider the "100" below to be a
+ // hard fact even if the size is unknown, so the "[95, 105]: bar" range
+ // doesn't override it.
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(95, 10, "bar");
+ AssertMainMapEquals({
+ {95, 100, kNoTranslation, "bar"},
+ {100, 105, kNoTranslation, "foo"},
+ });
+}
+
+TEST_F(RangeMapTest, UnknownSize3) {
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(150, kUnknownSize, "bar");
+ // This tells us the ultimate size of "foo", and we keep the "foo" label even
+ // though the new label is "baz".
+ map_.AddRange(100, 100, "baz");
+ AssertMainMapEquals({
+ {100, 150, kNoTranslation, "foo"},
+ {150, 200, kNoTranslation, "bar"},
+ });
+}
+
+TEST_F(RangeMapTest, UnknownSize4) {
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(150, 100, "bar");
+ // This tells us the ultimate size of "foo", and we keep the "foo" label even
+ // though the new label is "baz".
+ map_.AddRange(100, 100, "baz");
+ AssertMainMapEquals({
+ {100, 150, kNoTranslation, "foo"},
+ {150, 250, kNoTranslation, "bar"},
+ });
+}
+
+TEST_F(RangeMapTest, Bug1) {
+ map_.AddRange(100, 20, "foo");
+ map_.AddRange(120, 20, "bar");
+ map_.AddRange(100, 15, "baz");
+ AssertMainMapEquals({
+ {100, 120, kNoTranslation, "foo"},
+ {120, 140, kNoTranslation, "bar"},
+ });
+}
+
+TEST_F(RangeMapTest, Bug2) {
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(200, 50, "bar");
+ map_.AddRange(150, 10, "baz");
+ AssertMainMapEquals({
+ {100, 150, kNoTranslation, "foo"},
+ {150, 160, kNoTranslation, "baz"},
+ {200, 250, kNoTranslation, "bar"},
+ });
+}
+
+TEST_F(RangeMapTest, Bug3) {
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(200, kUnknownSize, "bar");
+ map_.AddRange(150, 10, "baz");
+ AssertMainMapEquals({
+ {100, 150, kNoTranslation, "foo"},
+ {150, 160, kNoTranslation, "baz"},
+ {200, UINT64_MAX, kNoTranslation, "bar"},
+ });
+}
+
+TEST_F(RangeMapTest, GetLabel) {
+ map_.AddRange(100, kUnknownSize, "foo");
+ map_.AddRange(200, 50, "bar");
+ map_.AddRange(150, 10, "baz");
+ AssertMainMapEquals({
+ {100, 150, kNoTranslation, "foo"},
+ {150, 160, kNoTranslation, "baz"},
+ {200, 250, kNoTranslation, "bar"},
+ });
+
+ std::string label;
+
+ ASSERT_TRUE(map_.TryGetLabel(100, &label));
+ ASSERT_EQ(label, "foo");
+ ASSERT_TRUE(map_.TryGetLabel(155, &label));
+ ASSERT_EQ(label, "baz");
+ ASSERT_TRUE(map_.TryGetLabel(249, &label));
+ ASSERT_EQ(label, "bar");
+ ASSERT_FALSE(map_.TryGetLabel(250, &label));
+
+ ASSERT_TRUE(map_.TryGetLabelForRange(100, 10, &label));
+ ASSERT_EQ(label, "foo");
+ ASSERT_TRUE(map_.TryGetLabelForRange(155, 3, &label));
+ ASSERT_EQ(label, "baz");
+ ASSERT_TRUE(map_.TryGetLabelForRange(200, 50, &label));
+ ASSERT_EQ(label, "bar");
+ ASSERT_FALSE(map_.TryGetLabelForRange(200, 51, &label));
+}
+
+TEST_F(RangeMapTest, Translation) {
+ map_.AddDualRange(20, 5, 120, "foo");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {20, 25, 120, "foo"}
+ });
+
+ ASSERT_TRUE(map2_.AddRangeWithTranslation(20, 5, "translate me", map_, false,
+ &map3_));
+
+ CheckConsistency();
+ AssertMapEquals(map2_, {
+ {20, 25, kNoTranslation, "translate me"}
+ });
+ AssertMapEquals(map3_, {
+ {120, 125, kNoTranslation, "translate me"}
+ });
+
+ map_.AddDualRange(1000, 30, 1100, "bar");
+ ASSERT_TRUE(map2_.AddRangeWithTranslation(1000, 5, "translate me2", map_,
+ false, &map3_));
+ AssertMapEquals(map2_, {
+ {20, 25, kNoTranslation, "translate me"},
+ {1000, 1005, kNoTranslation, "translate me2"}
+ });
+ AssertMapEquals(map3_, {
+ {120, 125, kNoTranslation, "translate me"},
+ {1100, 1105, kNoTranslation, "translate me2"}
+ });
+
+ // Starts before base map.
+ ASSERT_FALSE(map2_.AddRangeWithTranslation(15, 8, "translate me", map_, false,
+ &map3_));
+
+ // Extends past base map.
+ ASSERT_FALSE(map2_.AddRangeWithTranslation(22, 15, "translate me", map_,
+ false, &map3_));
+
+ // Starts and ends in base map, but skips range in the middle.
+ ASSERT_FALSE(map2_.AddRangeWithTranslation(20, 1000, "translate me", map_,
+ false, &map3_));
+}
+
+TEST_F(RangeMapTest, Translation2) {
+ map_.AddRange(5, 5, "foo");
+ map_.AddDualRange(20, 5, 120, "bar");
+ map_.AddRange(25, 5, "baz");
+ map_.AddDualRange(30, 5, 130, "quux");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {5, 10, kNoTranslation, "foo"},
+ {20, 25, 120, "bar"},
+ {25, 30, kNoTranslation, "baz"},
+ {30, 35, 130, "quux"}
+ });
+
+ ASSERT_TRUE(map2_.AddRangeWithTranslation(20, 15, "translate me", map_, false,
+ &map3_));
+ CheckConsistency();
+ AssertMapEquals(map2_, {
+ {20, 25, kNoTranslation, "translate me"},
+ {25, 30, kNoTranslation, "translate me"},
+ {30, 35, kNoTranslation, "translate me"}
+ });
+ AssertMapEquals(map3_, {
+ {120, 125, kNoTranslation, "translate me"},
+ {130, 135, kNoTranslation, "translate me"}
+ });
+}
+
+TEST_F(RangeMapTest, UnknownTranslation) {
+ map_.AddDualRange(20, 10, 120, "foo");
+ CheckConsistency();
+ AssertMainMapEquals({
+ {20, 30, 120, "foo"}
+ });
+
+ map2_.AddRangeWithTranslation(25, kUnknownSize, "translate me", map_, false,
+ &map3_);
+ CheckConsistency();
+ AssertMapEquals(map2_, {
+ {25, UINT64_MAX, kNoTranslation, "translate me"}
+ });
+ AssertMapEquals(map3_, {
+ {125, UINT64_MAX, kNoTranslation, "translate me"}
+ });
+
+ map2_.AddRange(20, 10, "fallback");
+
+ AssertRollupEquals({&map_, &map2_}, {
+ {{"foo", "fallback"}, 20, 25},
+ {{"foo", "translate me"}, 25, 30},
+ });
+}
+
+} // namespace bloaty
diff --git a/tests/strarr.h b/tests/strarr.h
new file mode 100644
index 0000000..b62ad22
--- /dev/null
+++ b/tests/strarr.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BLOATY_TESTS_STRARR_H_
+#define BLOATY_TESTS_STRARR_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+// For constructing arrays of strings in the slightly peculiar format
+// required by execve().
+class StrArr {
+ public:
+ explicit StrArr(const std::vector<std::string>& strings)
+ : size_(strings.size()), array_(new char*[size_ + 1]) {
+ array_[size_] = NULL;
+ for (size_t i = 0; i < strings.size(); i++) {
+ // Can't use c_str() directly because array_ is not const char*.
+ array_[i] = strdup(strings[i].c_str());
+ }
+ }
+
+ ~StrArr() {
+ // unique_ptr frees the array of pointers but not the pointed-to strings.
+ for (int i = 0; i < size_; i++) {
+ free(array_[i]);
+ }
+ }
+
+ char **get() const { return array_.get(); }
+
+ size_t size() const { return size_; }
+
+ private:
+ size_t size_;
+ // Can't use vector<char*> because execve() takes ptr to non-const array.
+ std::unique_ptr<char*[]> array_;
+};
+
+#endif // BLOATY_TESTS_STRARR_H_
diff --git a/tests/test.h b/tests/test.h
new file mode 100644
index 0000000..9fa9ecb
--- /dev/null
+++ b/tests/test.h
@@ -0,0 +1,291 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BLOATY_TESTS_TEST_H_
+#define BLOATY_TESTS_TEST_H_
+
+#include <fstream>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <tuple>
+#include <vector>
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "gmock/gmock.h"
+#include "google/protobuf/text_format.h"
+#include "gtest/gtest.h"
+
+#include "strarr.h"
+#include "bloaty.h"
+#include "bloaty.pb.h"
+
+inline bool GetFileSize(const std::string& filename, uint64_t* size) {
+ FILE* file = fopen(filename.c_str(), "rb");
+ if (!file) {
+ std::cerr << "Couldn't get file size for: " << filename << "\n";
+ return false;
+ }
+ fseek(file, 0L, SEEK_END);
+ *size = ftell(file);
+ fclose(file);
+ return true;
+}
+
+inline std::string GetTestDirectory() {
+ char pathbuf[PATH_MAX];
+ if (!getcwd(pathbuf, sizeof(pathbuf))) {
+ return "";
+ }
+ std::string path(pathbuf);
+ size_t pos = path.rfind('/');
+ return path.substr(pos + 1);
+}
+
+inline std::string DebugString(const google::protobuf::Message& message) {
+ std::string ret;
+ google::protobuf::TextFormat::PrintToString(message, &ret);
+ return ret;
+}
+
+#define NONE_STRING "[None]"
+
+// Testing Bloaty requires a delicate balance. Bloaty's output is by its
+// nature very compiler and platform dependent. So we want to verify correct
+// operation without overspecifying how the platform should behave.
+
+class BloatyTest : public ::testing::Test {
+ protected:
+ void CheckConsistencyForRow(const bloaty::RollupRow& row, bool is_toplevel,
+ bool diff_mode, int* count) {
+ // If any children exist, they should sum up to this row's values.
+ // Also none of the children should have the same name.
+ std::unordered_set<std::string> names;
+
+ if (row.sorted_children.size() > 0) {
+ uint64_t vmtotal = 0;
+ uint64_t filetotal = 0;
+ for (const auto& child : row.sorted_children) {
+ vmtotal += child.vmsize;
+ filetotal += child.filesize;
+ CheckConsistencyForRow(child, false, diff_mode, count);
+ ASSERT_TRUE(names.insert(child.name).second);
+ ASSERT_FALSE(child.vmsize == 0 && child.filesize == 0);
+ }
+
+ if (!diff_mode) {
+ ASSERT_EQ(vmtotal, row.vmsize);
+ ASSERT_EQ(filetotal, row.filesize);
+ }
+ } else {
+ // Count leaf rows.
+ *count += 1;
+ }
+
+ if (!is_toplevel && row.sorted_children.size() == 1) {
+ ASSERT_NE(NONE_STRING, row.sorted_children[0].name);
+ }
+ }
+
+ void CheckCSVConsistency(int row_count) {
+ std::ostringstream stream;
+ bloaty::OutputOptions options;
+ options.output_format = bloaty::OutputFormat::kCSV;
+ output_->Print(options, &stream);
+ std::string csv_output = stream.str();
+
+ std::vector<std::string> rows = absl::StrSplit(csv_output, '\n');
+ // Output ends with a final '\n', trim this.
+ ASSERT_EQ("", rows[rows.size() - 1]);
+ rows.pop_back();
+
+ ASSERT_GT(rows.size(), 0); // There should be a header row.
+
+ ASSERT_EQ(rows.size() - 1, row_count);
+ bool first = true;
+ for (const auto& row : rows) {
+ std::vector<std::string> cols = absl::StrSplit(row, ',');
+ if (first) {
+ // header row should be: header1,header2,...,vmsize,filesize
+ std::vector<std::string> expected_headers(output_->source_names());
+ expected_headers.push_back("vmsize");
+ expected_headers.push_back("filesize");
+ ASSERT_EQ(cols, expected_headers);
+ first = false;
+ } else {
+ // Final two columns should parse as integer.
+ int out;
+ ASSERT_EQ(output_->source_names().size() + 2, cols.size());
+ ASSERT_TRUE(absl::SimpleAtoi(cols[cols.size() - 1], &out));
+ ASSERT_TRUE(absl::SimpleAtoi(cols[cols.size() - 2], &out));
+ }
+ }
+ }
+
+ void CheckConsistency(const bloaty::Options& options) {
+ ASSERT_EQ(options.base_filename_size() > 0, output_->diff_mode());
+
+ if (!output_->diff_mode()) {
+ size_t total_input_size = 0;
+ for (const auto& filename : options.filename()) {
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(filename, &size));
+ total_input_size += size;
+ }
+ ASSERT_EQ(top_row_->filesize, total_input_size);
+ }
+
+ int rows = 0;
+ CheckConsistencyForRow(*top_row_, true, output_->diff_mode(), &rows);
+ CheckCSVConsistency(rows);
+ ASSERT_EQ("TOTAL", top_row_->name);
+ }
+
+ std::string JoinStrings(const std::vector<std::string>& strings) {
+ std::string ret = strings[0];
+ for (size_t i = 1; i < strings.size(); i++) {
+ ret += " " + strings[i];
+ }
+ return ret;
+ }
+
+ bool TryRunBloatyWithOptions(const bloaty::Options& options,
+ const bloaty::OutputOptions& output_options) {
+ output_.reset(new bloaty::RollupOutput);
+ top_row_ = &output_->toplevel_row();
+ std::string error;
+ bloaty::MmapInputFileFactory factory;
+ if (bloaty::BloatyMain(options, factory, output_.get(), &error)) {
+ CheckConsistency(options);
+ output_->Print(output_options, &std::cerr);
+ return true;
+ } else {
+ std::cerr << "Bloaty returned error:" << error << "\n";
+ return false;
+ }
+ }
+
+ bool TryRunBloaty(const std::vector<std::string>& strings) {
+ bloaty::Options options;
+ bloaty::OutputOptions output_options;
+ std::string error;
+ StrArr str_arr(strings);
+ int argc = strings.size();
+ char** argv = str_arr.get();
+ bool ok = bloaty::ParseOptions(false, &argc, &argv, &options,
+ &output_options, &error);
+ if (!ok) {
+ std::cerr << "Error parsing options: " << error;
+ return false;
+ }
+
+ return TryRunBloatyWithOptions(options, output_options);
+ }
+
+ void RunBloaty(const std::vector<std::string>& strings) {
+ std::cerr << "Running bloaty: " << JoinStrings(strings) << "\n";
+ ASSERT_TRUE(TryRunBloaty(strings));
+ }
+
+ void RunBloatyWithOptions(const bloaty::Options& options,
+ const bloaty::OutputOptions& output_options) {
+ std::cerr << "Running bloaty, options: " << DebugString(options) << "\n";
+ ASSERT_TRUE(TryRunBloatyWithOptions(options, output_options));
+ }
+
+ void AssertBloatyFails(const std::vector<std::string>& strings,
+ const std::string& /*msg_regex*/) {
+ // TODO(haberman): verify msg_regex by making all errors logged to a
+ // standard place.
+ ASSERT_FALSE(TryRunBloaty(strings));
+ }
+
+ // Special constants for asserting of children.
+ static constexpr int kUnknown = -1;
+ static constexpr int kSameAsVM = -2; // Only for file size.
+
+ void AssertChildren(
+ const bloaty::RollupRow& row,
+ const std::vector<std::tuple<std::string, int, int>>& children) {
+ size_t i = 0;
+ for (const auto& child : row.sorted_children) {
+ std::string expected_name;
+ int expected_vm, expected_file;
+ std::tie(expected_name, expected_vm, expected_file) = children[i];
+
+ // Excluding leading '_' is kind of a hack to exclude symbols
+ // automatically inserted by the compiler, like __x86.get_pc_thunk.bx
+ // for 32-bit x86 builds or _IO_stdin_used in binaries.
+ //
+ // Excluding leading '[' is for things like this:
+ //
+ // [None]
+ // [ELF Headers]
+ // [AR Headers]
+ // etc.
+ if (child.name[0] == '[' || child.name[0] == '_') {
+ continue;
+ }
+ EXPECT_EQ(expected_name, child.name);
+
+ // <0 indicates that we don't know what the exact size should be (for
+ // example for functions).
+ if (expected_vm == kUnknown) {
+ // Always pass.
+ } else if (expected_vm > 0) {
+ EXPECT_GE(child.vmsize, expected_vm);
+ // Allow some overhead.
+ EXPECT_LE(child.vmsize, (expected_vm * 1.1) + 100);
+ } else {
+ ASSERT_TRUE(false);
+ }
+
+ if (expected_file == kSameAsVM) {
+ expected_file = child.vmsize;
+ }
+
+ if (expected_file != kUnknown) {
+ EXPECT_GE(child.filesize, expected_file);
+ // Allow some overhead.
+ EXPECT_LE(child.filesize, (expected_file * 1.2) + 180);
+ }
+
+ if (++i == children.size()) {
+ // We allow the actual data to have excess elements.
+ break;
+ }
+ }
+
+ // All expected elements must be present.
+ ASSERT_EQ(i, children.size());
+ }
+
+ const bloaty::RollupRow* FindRow(const std::string& name) {
+ for (const auto& child : top_row_->sorted_children) {
+ if (child.name == name) {
+ return &child;
+ }
+ }
+ EXPECT_TRUE(false) << name;
+ return nullptr;
+ }
+
+ std::unique_ptr<bloaty::RollupOutput> output_;
+ const bloaty::RollupRow* top_row_;
+};
+
+constexpr int BloatyTest::kUnknown;
+constexpr int BloatyTest::kSameAsVM;
+
+#endif // BLOATY_TESTS_TEST_H_
diff --git a/tests/testdata/fuzz_corpus/0034ecacd5427aafc6b97413da2053b36de5059f b/tests/testdata/fuzz_corpus/0034ecacd5427aafc6b97413da2053b36de5059f
new file mode 100644
index 0000000..133d6fa
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0034ecacd5427aafc6b97413da2053b36de5059f
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/0153168d08d78a4eb486cdd421b3efd6a4e12844 b/tests/testdata/fuzz_corpus/0153168d08d78a4eb486cdd421b3efd6a4e12844
new file mode 100644
index 0000000..deef34e
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0153168d08d78a4eb486cdd421b3efd6a4e12844
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/04deff284542b1271c7ed6da11b4389342793f4d b/tests/testdata/fuzz_corpus/04deff284542b1271c7ed6da11b4389342793f4d
new file mode 100644
index 0000000..72d3413
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/04deff284542b1271c7ed6da11b4389342793f4d
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/0512fc56ee361da71476c098b91d1081e5dbc4ad b/tests/testdata/fuzz_corpus/0512fc56ee361da71476c098b91d1081e5dbc4ad
new file mode 100644
index 0000000..b2fe4f2
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0512fc56ee361da71476c098b91d1081e5dbc4ad
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/0c7d074fcd0d6863b497f6137c6cacffc59c2ae8 b/tests/testdata/fuzz_corpus/0c7d074fcd0d6863b497f6137c6cacffc59c2ae8
new file mode 100644
index 0000000..69d27e8
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0c7d074fcd0d6863b497f6137c6cacffc59c2ae8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/0efb04f81a05b500031405eccae9d7e8ea0721c5 b/tests/testdata/fuzz_corpus/0efb04f81a05b500031405eccae9d7e8ea0721c5
new file mode 100644
index 0000000..4a2a97c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0efb04f81a05b500031405eccae9d7e8ea0721c5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/0f6736109fcd5db53450385486c4586f884feb23 b/tests/testdata/fuzz_corpus/0f6736109fcd5db53450385486c4586f884feb23
new file mode 100644
index 0000000..089d14b
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/0f6736109fcd5db53450385486c4586f884feb23
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/110a37d69bfc8f6da2f8180e907d7d2f12da1eb2 b/tests/testdata/fuzz_corpus/110a37d69bfc8f6da2f8180e907d7d2f12da1eb2
new file mode 100644
index 0000000..8b3a6b4
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/110a37d69bfc8f6da2f8180e907d7d2f12da1eb2
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/14f1751b6ceb6aa262bced1c928c11d565c3d913 b/tests/testdata/fuzz_corpus/14f1751b6ceb6aa262bced1c928c11d565c3d913
new file mode 100644
index 0000000..e585f12
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/14f1751b6ceb6aa262bced1c928c11d565c3d913
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/15c502b13029920e528a2982fc1559689764aaf8 b/tests/testdata/fuzz_corpus/15c502b13029920e528a2982fc1559689764aaf8
new file mode 100644
index 0000000..f4d0e23
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/15c502b13029920e528a2982fc1559689764aaf8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/1846aea81a4e97327d5e82c8ab9e6d4c43bffee3 b/tests/testdata/fuzz_corpus/1846aea81a4e97327d5e82c8ab9e6d4c43bffee3
new file mode 100644
index 0000000..cf85b99
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/1846aea81a4e97327d5e82c8ab9e6d4c43bffee3
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/1930321f0302e111b64e38b8456ef8473f3e71d8 b/tests/testdata/fuzz_corpus/1930321f0302e111b64e38b8456ef8473f3e71d8
new file mode 100644
index 0000000..1bcd874
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/1930321f0302e111b64e38b8456ef8473f3e71d8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/1bfe776624349462cb1d818443af106215021470 b/tests/testdata/fuzz_corpus/1bfe776624349462cb1d818443af106215021470
new file mode 100644
index 0000000..381a6fe
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/1bfe776624349462cb1d818443af106215021470
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/2215ecf59e25dfb7b946e25bf9cbb9b6c862563f b/tests/testdata/fuzz_corpus/2215ecf59e25dfb7b946e25bf9cbb9b6c862563f
new file mode 100644
index 0000000..e834f63
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/2215ecf59e25dfb7b946e25bf9cbb9b6c862563f
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/22a284684c8b3f13e6070870dfb21d6453c7819e b/tests/testdata/fuzz_corpus/22a284684c8b3f13e6070870dfb21d6453c7819e
new file mode 100644
index 0000000..e733863
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/22a284684c8b3f13e6070870dfb21d6453c7819e
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/23350c9dafa9b9a1e2a2524a2b173fdec00037ab b/tests/testdata/fuzz_corpus/23350c9dafa9b9a1e2a2524a2b173fdec00037ab
new file mode 100644
index 0000000..0fd545c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/23350c9dafa9b9a1e2a2524a2b173fdec00037ab
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/270dcbc8975aaff7d869faa520be996460e6f7be b/tests/testdata/fuzz_corpus/270dcbc8975aaff7d869faa520be996460e6f7be
new file mode 100644
index 0000000..d8918ce
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/270dcbc8975aaff7d869faa520be996460e6f7be
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/2877069c49bf5773d158de6911842a58768b74c3 b/tests/testdata/fuzz_corpus/2877069c49bf5773d158de6911842a58768b74c3
new file mode 100644
index 0000000..691bb03
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/2877069c49bf5773d158de6911842a58768b74c3
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/28d7fbe0ff87b53a011656f9e3c9c3aeb2ce2018 b/tests/testdata/fuzz_corpus/28d7fbe0ff87b53a011656f9e3c9c3aeb2ce2018
new file mode 100644
index 0000000..285ace1
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/28d7fbe0ff87b53a011656f9e3c9c3aeb2ce2018
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/2eb47f37b3b0b4bb84cc55d0125d9bb7d176b1c5 b/tests/testdata/fuzz_corpus/2eb47f37b3b0b4bb84cc55d0125d9bb7d176b1c5
new file mode 100644
index 0000000..d0fb464
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/2eb47f37b3b0b4bb84cc55d0125d9bb7d176b1c5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/2f6f7647f2e81f50a3f787dda064cffe03354aa8 b/tests/testdata/fuzz_corpus/2f6f7647f2e81f50a3f787dda064cffe03354aa8
new file mode 100644
index 0000000..ab4d79c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/2f6f7647f2e81f50a3f787dda064cffe03354aa8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/2fd5be6e7a99d71434a756a4d59a8d44db4942bb b/tests/testdata/fuzz_corpus/2fd5be6e7a99d71434a756a4d59a8d44db4942bb
new file mode 100644
index 0000000..51969e6
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/2fd5be6e7a99d71434a756a4d59a8d44db4942bb
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/3115b1163086c5904008b9a5d17a761863910214 b/tests/testdata/fuzz_corpus/3115b1163086c5904008b9a5d17a761863910214
new file mode 100644
index 0000000..39d6185
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/3115b1163086c5904008b9a5d17a761863910214
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/327c150b2d13636bb3ea5129cb58af30675e5599 b/tests/testdata/fuzz_corpus/327c150b2d13636bb3ea5129cb58af30675e5599
new file mode 100644
index 0000000..2bf5486
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/327c150b2d13636bb3ea5129cb58af30675e5599
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/37209ceacf21ce2796c98824dc9be60b876274a2 b/tests/testdata/fuzz_corpus/37209ceacf21ce2796c98824dc9be60b876274a2
new file mode 100644
index 0000000..6f49440
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/37209ceacf21ce2796c98824dc9be60b876274a2
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/3e96523b6d0025b39ddd0a771fc9f99dd1590877 b/tests/testdata/fuzz_corpus/3e96523b6d0025b39ddd0a771fc9f99dd1590877
new file mode 100644
index 0000000..c7fb188
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/3e96523b6d0025b39ddd0a771fc9f99dd1590877
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/3f3c4745b7053aca15608204a7592bac44d690cb b/tests/testdata/fuzz_corpus/3f3c4745b7053aca15608204a7592bac44d690cb
new file mode 100644
index 0000000..b98a94b
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/3f3c4745b7053aca15608204a7592bac44d690cb
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/412f1573ff1a9675377481456d8809a850d03f1b b/tests/testdata/fuzz_corpus/412f1573ff1a9675377481456d8809a850d03f1b
new file mode 100644
index 0000000..43c14c5
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/412f1573ff1a9675377481456d8809a850d03f1b
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/42f2cd88cae45b6add339ed2c2a9074ff55d9db0 b/tests/testdata/fuzz_corpus/42f2cd88cae45b6add339ed2c2a9074ff55d9db0
new file mode 100644
index 0000000..ce0b3b8
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/42f2cd88cae45b6add339ed2c2a9074ff55d9db0
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/459ef92fc33d1d9fc6048f293bab5ddb584f94a4 b/tests/testdata/fuzz_corpus/459ef92fc33d1d9fc6048f293bab5ddb584f94a4
new file mode 100644
index 0000000..d0cb244
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/459ef92fc33d1d9fc6048f293bab5ddb584f94a4
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/48c3f1ae3089b0644c6af799af2ae94ee1c5ad30 b/tests/testdata/fuzz_corpus/48c3f1ae3089b0644c6af799af2ae94ee1c5ad30
new file mode 100644
index 0000000..b699120
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/48c3f1ae3089b0644c6af799af2ae94ee1c5ad30
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/4e6b7729619f66a429dd9ef88d186dd37c42d8ca b/tests/testdata/fuzz_corpus/4e6b7729619f66a429dd9ef88d186dd37c42d8ca
new file mode 100644
index 0000000..f06c0cb
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/4e6b7729619f66a429dd9ef88d186dd37c42d8ca
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/53a2d35a2dfe33981111fce5c8fb6514dd9570cb b/tests/testdata/fuzz_corpus/53a2d35a2dfe33981111fce5c8fb6514dd9570cb
new file mode 100644
index 0000000..336f4cb
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/53a2d35a2dfe33981111fce5c8fb6514dd9570cb
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/57354041fcdfcc3613a0762adfd5189ca60abc80 b/tests/testdata/fuzz_corpus/57354041fcdfcc3613a0762adfd5189ca60abc80
new file mode 100644
index 0000000..a1214a7
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/57354041fcdfcc3613a0762adfd5189ca60abc80
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/5a90c59187664f79cdf1ded1a6eef6854ddd9a07 b/tests/testdata/fuzz_corpus/5a90c59187664f79cdf1ded1a6eef6854ddd9a07
new file mode 100644
index 0000000..4ed21ca
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/5a90c59187664f79cdf1ded1a6eef6854ddd9a07
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5 b/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5
new file mode 100644
index 0000000..791827c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/5e8ec9cbd600dcc8f6dc5eafaf34226706378b60 b/tests/testdata/fuzz_corpus/5e8ec9cbd600dcc8f6dc5eafaf34226706378b60
new file mode 100644
index 0000000..1916a86
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/5e8ec9cbd600dcc8f6dc5eafaf34226706378b60
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/64779227c42248607f46879f9e4007e66ee68269 b/tests/testdata/fuzz_corpus/64779227c42248607f46879f9e4007e66ee68269
new file mode 100644
index 0000000..bb35594
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/64779227c42248607f46879f9e4007e66ee68269
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6511ded4f638705a5cdd071d7e21cb4febb7234c b/tests/testdata/fuzz_corpus/6511ded4f638705a5cdd071d7e21cb4febb7234c
new file mode 100644
index 0000000..1605989
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6511ded4f638705a5cdd071d7e21cb4febb7234c
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/66845a4bce637e02379f5dbf1b860ceb7725a96d b/tests/testdata/fuzz_corpus/66845a4bce637e02379f5dbf1b860ceb7725a96d
new file mode 100644
index 0000000..d36e4a7
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/66845a4bce637e02379f5dbf1b860ceb7725a96d
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/67630fad767a6918501667f56f09793d399a104a b/tests/testdata/fuzz_corpus/67630fad767a6918501667f56f09793d399a104a
new file mode 100644
index 0000000..1da14a9
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/67630fad767a6918501667f56f09793d399a104a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6b45a7d45ef354eb2d549918a804abc1e6d42d05 b/tests/testdata/fuzz_corpus/6b45a7d45ef354eb2d549918a804abc1e6d42d05
new file mode 100644
index 0000000..9ded4ca
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6b45a7d45ef354eb2d549918a804abc1e6d42d05
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6d2975ba5e8ad75fb4e743d96a279001b9ecd0e3 b/tests/testdata/fuzz_corpus/6d2975ba5e8ad75fb4e743d96a279001b9ecd0e3
new file mode 100644
index 0000000..e65676b
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6d2975ba5e8ad75fb4e743d96a279001b9ecd0e3
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6d385d65872fa08e194a8b806ccfd87e49f5a554 b/tests/testdata/fuzz_corpus/6d385d65872fa08e194a8b806ccfd87e49f5a554
new file mode 100644
index 0000000..2c88bfd
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6d385d65872fa08e194a8b806ccfd87e49f5a554
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6d7db4d97103830cd33688f18b7c6944218b58f8 b/tests/testdata/fuzz_corpus/6d7db4d97103830cd33688f18b7c6944218b58f8
new file mode 100644
index 0000000..3a1479a
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6d7db4d97103830cd33688f18b7c6944218b58f8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6ecf640685258c2bc0960ab1a797ba2db10ffd63 b/tests/testdata/fuzz_corpus/6ecf640685258c2bc0960ab1a797ba2db10ffd63
new file mode 100644
index 0000000..3841a59
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6ecf640685258c2bc0960ab1a797ba2db10ffd63
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6f0a1ec2ebc980c9296486ad1a5b8a564549aa9a b/tests/testdata/fuzz_corpus/6f0a1ec2ebc980c9296486ad1a5b8a564549aa9a
new file mode 100644
index 0000000..dc1ce42
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6f0a1ec2ebc980c9296486ad1a5b8a564549aa9a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/6fa62db4fbfc54538513558d0886ff8ae74e58ed b/tests/testdata/fuzz_corpus/6fa62db4fbfc54538513558d0886ff8ae74e58ed
new file mode 100644
index 0000000..bcbdc28
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/6fa62db4fbfc54538513558d0886ff8ae74e58ed
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/70619c7a97d684b6dd6dddfb62782ae40bc91370 b/tests/testdata/fuzz_corpus/70619c7a97d684b6dd6dddfb62782ae40bc91370
new file mode 100644
index 0000000..a2e5525
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/70619c7a97d684b6dd6dddfb62782ae40bc91370
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/7069a01603ddcf775682ee40d6814f4e5559bb5a b/tests/testdata/fuzz_corpus/7069a01603ddcf775682ee40d6814f4e5559bb5a
new file mode 100644
index 0000000..ac8bc4b
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/7069a01603ddcf775682ee40d6814f4e5559bb5a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/739b5ce9def832d301c95cfa6d1a9c6e9d46c73d b/tests/testdata/fuzz_corpus/739b5ce9def832d301c95cfa6d1a9c6e9d46c73d
new file mode 100644
index 0000000..e8c58ea
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/739b5ce9def832d301c95cfa6d1a9c6e9d46c73d
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/7b06150aa15f8aed1abd7a93f1772b893efc150e b/tests/testdata/fuzz_corpus/7b06150aa15f8aed1abd7a93f1772b893efc150e
new file mode 100644
index 0000000..fb01003
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/7b06150aa15f8aed1abd7a93f1772b893efc150e
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/7c06c448cd75d4af6621f578d6d8cb35d2e5279e b/tests/testdata/fuzz_corpus/7c06c448cd75d4af6621f578d6d8cb35d2e5279e
new file mode 100644
index 0000000..4d8e849
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/7c06c448cd75d4af6621f578d6d8cb35d2e5279e
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/7d09e7259aa0fb3da736b98b94211f71a5e513e6 b/tests/testdata/fuzz_corpus/7d09e7259aa0fb3da736b98b94211f71a5e513e6
new file mode 100644
index 0000000..6728660
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/7d09e7259aa0fb3da736b98b94211f71a5e513e6
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/7e290e80959e9f3b045387f7ec257182cb23721d b/tests/testdata/fuzz_corpus/7e290e80959e9f3b045387f7ec257182cb23721d
new file mode 100644
index 0000000..c4a8fac
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/7e290e80959e9f3b045387f7ec257182cb23721d
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/827e96b748c33f032574b9f2b7f084920feb76ab b/tests/testdata/fuzz_corpus/827e96b748c33f032574b9f2b7f084920feb76ab
new file mode 100644
index 0000000..43ae0c8
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/827e96b748c33f032574b9f2b7f084920feb76ab
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/84675e905d3771b59fd51f606bc2a14f549aba43 b/tests/testdata/fuzz_corpus/84675e905d3771b59fd51f606bc2a14f549aba43
new file mode 100644
index 0000000..b5b75bc
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/84675e905d3771b59fd51f606bc2a14f549aba43
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/8631458a27f52b7e3cdfb06a6bde899901bfd3ac b/tests/testdata/fuzz_corpus/8631458a27f52b7e3cdfb06a6bde899901bfd3ac
new file mode 100644
index 0000000..ba36762
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/8631458a27f52b7e3cdfb06a6bde899901bfd3ac
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/86a3d4b71ee172cd476d035fb9445bcbb835d92a b/tests/testdata/fuzz_corpus/86a3d4b71ee172cd476d035fb9445bcbb835d92a
new file mode 100644
index 0000000..31c7a5a
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/86a3d4b71ee172cd476d035fb9445bcbb835d92a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/8be73e77c819315082ac4f40acc964ddfa7be6fa b/tests/testdata/fuzz_corpus/8be73e77c819315082ac4f40acc964ddfa7be6fa
new file mode 100644
index 0000000..7bf02ff
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/8be73e77c819315082ac4f40acc964ddfa7be6fa
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/8fc314d43f2d412e20a822b5f595bf61005342a9 b/tests/testdata/fuzz_corpus/8fc314d43f2d412e20a822b5f595bf61005342a9
new file mode 100644
index 0000000..e35caef
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/8fc314d43f2d412e20a822b5f595bf61005342a9
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/91acbe9b1ef167d88e8a57f16db2aa740865accd b/tests/testdata/fuzz_corpus/91acbe9b1ef167d88e8a57f16db2aa740865accd
new file mode 100644
index 0000000..9cc9dfa
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/91acbe9b1ef167d88e8a57f16db2aa740865accd
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/922103ecc5f70b4235b20e30ce0a2e895ead8251 b/tests/testdata/fuzz_corpus/922103ecc5f70b4235b20e30ce0a2e895ead8251
new file mode 100644
index 0000000..8d998eb
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/922103ecc5f70b4235b20e30ce0a2e895ead8251
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/9b5a5fa4a46bcca17df149785daf9cd14f1c0443 b/tests/testdata/fuzz_corpus/9b5a5fa4a46bcca17df149785daf9cd14f1c0443
new file mode 100644
index 0000000..7415c4c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/9b5a5fa4a46bcca17df149785daf9cd14f1c0443
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/9e079b888e5d223ef0bebf13ce1e26ebdd82752a b/tests/testdata/fuzz_corpus/9e079b888e5d223ef0bebf13ce1e26ebdd82752a
new file mode 100644
index 0000000..03c73f7
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/9e079b888e5d223ef0bebf13ce1e26ebdd82752a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a22fdce1317617bf89f3283cbd44ef490a57b5e2 b/tests/testdata/fuzz_corpus/a22fdce1317617bf89f3283cbd44ef490a57b5e2
new file mode 100644
index 0000000..54973d6
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a22fdce1317617bf89f3283cbd44ef490a57b5e2
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a493f77d0d04aaed2e2dca8256c9a5cc591aa8fa b/tests/testdata/fuzz_corpus/a493f77d0d04aaed2e2dca8256c9a5cc591aa8fa
new file mode 100755
index 0000000..c768a24
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a493f77d0d04aaed2e2dca8256c9a5cc591aa8fa
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a4d1a2b246e0a1f133774daa28328c0d7ce5c3e5 b/tests/testdata/fuzz_corpus/a4d1a2b246e0a1f133774daa28328c0d7ce5c3e5
new file mode 100644
index 0000000..b70f241
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a4d1a2b246e0a1f133774daa28328c0d7ce5c3e5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a4da3e6bc07539aece56d12b2e15b89edd842fd8 b/tests/testdata/fuzz_corpus/a4da3e6bc07539aece56d12b2e15b89edd842fd8
new file mode 100644
index 0000000..03ddc04
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a4da3e6bc07539aece56d12b2e15b89edd842fd8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a64d39a8957a4f4c7012f78b41caa8e5f3f4e484 b/tests/testdata/fuzz_corpus/a64d39a8957a4f4c7012f78b41caa8e5f3f4e484
new file mode 100644
index 0000000..29684c8
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a64d39a8957a4f4c7012f78b41caa8e5f3f4e484
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a69662c2423b5a1d1859f7981c9e88c4f821b0b7 b/tests/testdata/fuzz_corpus/a69662c2423b5a1d1859f7981c9e88c4f821b0b7
new file mode 100644
index 0000000..4ab5e91
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a69662c2423b5a1d1859f7981c9e88c4f821b0b7
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/a743522a548f6321b69ee5081ce898ec5e4b9c23 b/tests/testdata/fuzz_corpus/a743522a548f6321b69ee5081ce898ec5e4b9c23
new file mode 100644
index 0000000..8c8751a
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/a743522a548f6321b69ee5081ce898ec5e4b9c23
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/af0df3abd6ff306ca9161b6f6ebc96f21c6dfa98 b/tests/testdata/fuzz_corpus/af0df3abd6ff306ca9161b6f6ebc96f21c6dfa98
new file mode 100644
index 0000000..c1198fe
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/af0df3abd6ff306ca9161b6f6ebc96f21c6dfa98
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/b3a904cebb1d3070ca96cf70ec0b9ef5d1612a45 b/tests/testdata/fuzz_corpus/b3a904cebb1d3070ca96cf70ec0b9ef5d1612a45
new file mode 100644
index 0000000..b511d55
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/b3a904cebb1d3070ca96cf70ec0b9ef5d1612a45
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/bf2cb5de1de6ca492f159dc3cce67cf88a6244aa b/tests/testdata/fuzz_corpus/bf2cb5de1de6ca492f159dc3cce67cf88a6244aa
new file mode 100644
index 0000000..8c8338b
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/bf2cb5de1de6ca492f159dc3cce67cf88a6244aa
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/c121e995dd4575473e468801b301da0f219f5de7 b/tests/testdata/fuzz_corpus/c121e995dd4575473e468801b301da0f219f5de7
new file mode 100644
index 0000000..1241e8c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/c121e995dd4575473e468801b301da0f219f5de7
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/c561ab1d99f16a04898518914dd1cea4afa7e358 b/tests/testdata/fuzz_corpus/c561ab1d99f16a04898518914dd1cea4afa7e358
new file mode 100644
index 0000000..e15376e
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/c561ab1d99f16a04898518914dd1cea4afa7e358
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/c98c037db24035a40d40f91084a56f470bb6fbc5 b/tests/testdata/fuzz_corpus/c98c037db24035a40d40f91084a56f470bb6fbc5
new file mode 100644
index 0000000..82dfe42
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/c98c037db24035a40d40f91084a56f470bb6fbc5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/cd838035892825e361fe0f936f93fa62aaf2fab1 b/tests/testdata/fuzz_corpus/cd838035892825e361fe0f936f93fa62aaf2fab1
new file mode 100644
index 0000000..67a809c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/cd838035892825e361fe0f936f93fa62aaf2fab1
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/cdeac0baddcbd150bfec97b7b88bff74f73e99f5 b/tests/testdata/fuzz_corpus/cdeac0baddcbd150bfec97b7b88bff74f73e99f5
new file mode 100644
index 0000000..0b3a2f1
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/cdeac0baddcbd150bfec97b7b88bff74f73e99f5
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-4796943898771456 b/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-4796943898771456
new file mode 100644
index 0000000..d52d0d3
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-4796943898771456
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-5711765729443840 b/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-5711765729443840
new file mode 100644
index 0000000..b35085d
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/clusterfuzz-testcase-minimized-fuzz_target-5711765729443840
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/d1128451375207da064d0f332e840af933280610 b/tests/testdata/fuzz_corpus/d1128451375207da064d0f332e840af933280610
new file mode 100755
index 0000000..b815fd1
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/d1128451375207da064d0f332e840af933280610
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/d3cc4e4dddf87cb0d41135b7a22d03fc4ec11bbc b/tests/testdata/fuzz_corpus/d3cc4e4dddf87cb0d41135b7a22d03fc4ec11bbc
new file mode 100644
index 0000000..2b9411a
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/d3cc4e4dddf87cb0d41135b7a22d03fc4ec11bbc
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/daebef8f49a59b71cf57d4771b09f9f8545b83d8 b/tests/testdata/fuzz_corpus/daebef8f49a59b71cf57d4771b09f9f8545b83d8
new file mode 100644
index 0000000..5669340
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/daebef8f49a59b71cf57d4771b09f9f8545b83d8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/dda6875f2313476f402e9a283ecaf2b50cfae316 b/tests/testdata/fuzz_corpus/dda6875f2313476f402e9a283ecaf2b50cfae316
new file mode 100644
index 0000000..bd7cfad
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/dda6875f2313476f402e9a283ecaf2b50cfae316
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/df7a639969efbe5943b6a7fa5eff4f732a50a4f6 b/tests/testdata/fuzz_corpus/df7a639969efbe5943b6a7fa5eff4f732a50a4f6
new file mode 100644
index 0000000..85ae79d
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/df7a639969efbe5943b6a7fa5eff4f732a50a4f6
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/e08b7c26f946f4761f2cecdc81f4e5e7651db9a7 b/tests/testdata/fuzz_corpus/e08b7c26f946f4761f2cecdc81f4e5e7651db9a7
new file mode 100644
index 0000000..a7799b8
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/e08b7c26f946f4761f2cecdc81f4e5e7651db9a7
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/e33d3e649729bea900f870b0cd5335f312d9ed42 b/tests/testdata/fuzz_corpus/e33d3e649729bea900f870b0cd5335f312d9ed42
new file mode 100644
index 0000000..47deab1
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/e33d3e649729bea900f870b0cd5335f312d9ed42
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/e4a3653bac41c8f39cc625286daa813e0ce603b0 b/tests/testdata/fuzz_corpus/e4a3653bac41c8f39cc625286daa813e0ce603b0
new file mode 100644
index 0000000..1844628
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/e4a3653bac41c8f39cc625286daa813e0ce603b0
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/e4c4e1789c10bbfe8ed1c05522483332bf2538f8 b/tests/testdata/fuzz_corpus/e4c4e1789c10bbfe8ed1c05522483332bf2538f8
new file mode 100644
index 0000000..1a626ca
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/e4c4e1789c10bbfe8ed1c05522483332bf2538f8
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/f0ac70f708130bb9cc4aba5ebe1a3c500c4ea11a b/tests/testdata/fuzz_corpus/f0ac70f708130bb9cc4aba5ebe1a3c500c4ea11a
new file mode 100644
index 0000000..ffb7c42
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/f0ac70f708130bb9cc4aba5ebe1a3c500c4ea11a
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/f5d4fb20f43ef7131ed49ff29f5c1bee69070ff2 b/tests/testdata/fuzz_corpus/f5d4fb20f43ef7131ed49ff29f5c1bee69070ff2
new file mode 100644
index 0000000..ee17dbf
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/f5d4fb20f43ef7131ed49ff29f5c1bee69070ff2
Binary files differ
diff --git a/tests/testdata/fuzz_corpus/fc88742708813d5dd57936fad4c6e9bd6ed125ac b/tests/testdata/fuzz_corpus/fc88742708813d5dd57936fad4c6e9bd6ed125ac
new file mode 100644
index 0000000..2ba4a8e
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/fc88742708813d5dd57936fad4c6e9bd6ed125ac
Binary files differ
diff --git a/tests/testdata/linux-x86/01-empty.o b/tests/testdata/linux-x86/01-empty.o
new file mode 100644
index 0000000..691bb03
--- /dev/null
+++ b/tests/testdata/linux-x86/01-empty.o
Binary files differ
diff --git a/tests/testdata/linux-x86/02-simple.o b/tests/testdata/linux-x86/02-simple.o
new file mode 100644
index 0000000..71bf2ad
--- /dev/null
+++ b/tests/testdata/linux-x86/02-simple.o
Binary files differ
diff --git a/tests/testdata/linux-x86/03-simple.a b/tests/testdata/linux-x86/03-simple.a
new file mode 100644
index 0000000..4e3e948
--- /dev/null
+++ b/tests/testdata/linux-x86/03-simple.a
Binary files differ
diff --git a/tests/testdata/linux-x86/04-simple.so b/tests/testdata/linux-x86/04-simple.so
new file mode 100755
index 0000000..8b3a6b4
--- /dev/null
+++ b/tests/testdata/linux-x86/04-simple.so
Binary files differ
diff --git a/tests/testdata/linux-x86/05-binary.bin b/tests/testdata/linux-x86/05-binary.bin
new file mode 100644
index 0000000..2bf5486
--- /dev/null
+++ b/tests/testdata/linux-x86/05-binary.bin
Binary files differ
diff --git a/tests/testdata/linux-x86/06-diff.a b/tests/testdata/linux-x86/06-diff.a
new file mode 100644
index 0000000..ba36762
--- /dev/null
+++ b/tests/testdata/linux-x86/06-diff.a
Binary files differ
diff --git a/tests/testdata/linux-x86/07-binary-stripped.bin b/tests/testdata/linux-x86/07-binary-stripped.bin
new file mode 100644
index 0000000..9bcbe46
--- /dev/null
+++ b/tests/testdata/linux-x86/07-binary-stripped.bin
Binary files differ
diff --git a/tests/testdata/linux-x86_64/01-empty.o b/tests/testdata/linux-x86_64/01-empty.o
new file mode 100644
index 0000000..0d103d4
--- /dev/null
+++ b/tests/testdata/linux-x86_64/01-empty.o
Binary files differ
diff --git a/tests/testdata/linux-x86_64/02-simple.o b/tests/testdata/linux-x86_64/02-simple.o
new file mode 100644
index 0000000..b511d55
--- /dev/null
+++ b/tests/testdata/linux-x86_64/02-simple.o
Binary files differ
diff --git a/tests/testdata/linux-x86_64/03-simple.a b/tests/testdata/linux-x86_64/03-simple.a
new file mode 100644
index 0000000..42af8cf
--- /dev/null
+++ b/tests/testdata/linux-x86_64/03-simple.a
Binary files differ
diff --git a/tests/testdata/linux-x86_64/04-simple.so b/tests/testdata/linux-x86_64/04-simple.so
new file mode 100755
index 0000000..464d9c5
--- /dev/null
+++ b/tests/testdata/linux-x86_64/04-simple.so
Binary files differ
diff --git a/tests/testdata/linux-x86_64/05-binary.bin b/tests/testdata/linux-x86_64/05-binary.bin
new file mode 100644
index 0000000..67bfc80
--- /dev/null
+++ b/tests/testdata/linux-x86_64/05-binary.bin
Binary files differ
diff --git a/tests/testdata/linux-x86_64/06-diff.a b/tests/testdata/linux-x86_64/06-diff.a
new file mode 100644
index 0000000..74ee1d9
--- /dev/null
+++ b/tests/testdata/linux-x86_64/06-diff.a
Binary files differ
diff --git a/tests/testdata/linux-x86_64/07-binary-stripped.bin b/tests/testdata/linux-x86_64/07-binary-stripped.bin
new file mode 100644
index 0000000..0035670
--- /dev/null
+++ b/tests/testdata/linux-x86_64/07-binary-stripped.bin
Binary files differ
diff --git a/tests/testdata/linux-x86_64/oldbloaty.bin b/tests/testdata/linux-x86_64/oldbloaty.bin
new file mode 100755
index 0000000..8b3ac73
--- /dev/null
+++ b/tests/testdata/linux-x86_64/oldbloaty.bin
Binary files differ
diff --git a/tests/testdata/make_test_files.sh b/tests/testdata/make_test_files.sh
new file mode 100755
index 0000000..bb75402
--- /dev/null
+++ b/tests/testdata/make_test_files.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ "$#" == "0" ] ; then
+ echo "Usage: make_test_files.sh <output dir>"
+ exit 1
+fi
+
+cd $1
+OUTPUT_DIR=`pwd`
+TMP=`mktemp -d`
+CC="${CC:-cc}"
+echo Writing output to $OUTPUT_DIR
+echo Working in $TMP
+cd $TMP
+
+function publish() {
+ echo $1
+ cp $1 $OUTPUT_DIR
+}
+
+function make_tmp_obj() {
+ FILE=$1
+ CONTENTS="$2"
+ CFILE=`basename $1`.c
+ echo "$CONTENTS" > $CFILE
+ $CC -g -fPIC -o $FILE -c $CFILE
+}
+
+function make_obj() {
+ FILE=$1
+ CONTENTS="$2"
+ make_tmp_obj $FILE "$CONTENTS"
+ publish $FILE
+}
+
+function make_ar() {
+ FILE=$1
+ shift
+ ar rcs $FILE "$@"
+ publish $FILE
+}
+
+function make_so() {
+ FILE=$1
+ shift
+ $CC -g -shared -o $FILE "$@"
+ publish $FILE
+}
+
+function make_binary() {
+ FILE=$1
+ shift
+ $CC -o $FILE "$@"
+ publish $FILE
+}
+
+make_obj "01-empty.o" ""
+
+make_obj "02-simple.o" "
+#include <stdint.h>
+uint64_t bss_a = 0;
+uint32_t bss_b = 0;
+uint64_t data_a = 1;
+uint32_t data_b = 2;
+const uint64_t rodata_a = 1;
+const uint32_t rodata_b = 2;
+uint32_t func1() { return bss_b / 17; }
+uint32_t func2() { return data_b / 17; }"
+
+make_tmp_obj "foo.o" "
+int foo_x[1000] = {0};
+int foo_y = 0;
+int foo_func() { return foo_y / 17; }
+"
+
+make_tmp_obj "bar.o" "
+int bar_x[1000] = {1};
+int bar_y = 1;
+int bar_z = 0;
+int bar_func() { return bar_y / 17; }
+"
+
+make_tmp_obj "a_filename_longer_than_sixteen_chars.o" "
+int long_filename_x[3] = {1};
+int long_filename_y = 2;
+"
+
+make_ar "03-simple.a" "foo.o" "bar.o" "a_filename_longer_than_sixteen_chars.o"
+make_so "04-simple.so" "foo.o" "bar.o"
+
+make_tmp_obj "main.o" "int main() {}"
+
+make_binary "05-binary.bin" "foo.o" "bar.o" "main.o"
+
+# Make an object like foo.o but with different sizes.
+
+make_tmp_obj "foo2.o" "
+int foo_x[500] = {0};
+long long foo_y = 0;
+int foo_func() { return foo_y / 17 * 37 / 21; }
+"
+
+make_ar "06-diff.a" "foo2.o" "bar.o" "a_filename_longer_than_sixteen_chars.o"
+
+cp "05-binary.bin" "07-binary-stripped.bin"
+strip "07-binary-stripped.bin"
+publish "07-binary-stripped.bin"
diff --git a/tests/testdata/misc/01-no-sections.bin b/tests/testdata/misc/01-no-sections.bin
new file mode 100644
index 0000000..3a1479a
--- /dev/null
+++ b/tests/testdata/misc/01-no-sections.bin
Binary files differ
diff --git a/tests/testdata/misc/02-section-count-overflow.o b/tests/testdata/misc/02-section-count-overflow.o
new file mode 100644
index 0000000..860c0ce
--- /dev/null
+++ b/tests/testdata/misc/02-section-count-overflow.o
Binary files differ
diff --git a/tests/testdata/misc/03-small-binary-that-crashed-inlines.bin b/tests/testdata/misc/03-small-binary-that-crashed-inlines.bin
new file mode 100644
index 0000000..c4a8fac
--- /dev/null
+++ b/tests/testdata/misc/03-small-binary-that-crashed-inlines.bin
Binary files differ
diff --git a/tests/testdata/misc/04-go-binary-with-ref-addr.bin b/tests/testdata/misc/04-go-binary-with-ref-addr.bin
new file mode 100755
index 0000000..b2085c0
--- /dev/null
+++ b/tests/testdata/misc/04-go-binary-with-ref-addr.bin
Binary files differ
diff --git a/third_party/abseil-cpp b/third_party/abseil-cpp
new file mode 160000
+Subproject 1948f6f967e34db9793cfa8b4bcbaf370d039fd
diff --git a/third_party/capstone b/third_party/capstone
new file mode 160000
+Subproject 852f46a467cb37559a1f3a18bd45d5ca8c6fc5e
diff --git a/third_party/darwin_xnu_macho/LICENSE b/third_party/darwin_xnu_macho/LICENSE
new file mode 100644
index 0000000..fe81a60
--- /dev/null
+++ b/third_party/darwin_xnu_macho/LICENSE
@@ -0,0 +1,367 @@
+APPLE PUBLIC SOURCE LICENSE
+Version 2.0 - August 6, 2003
+
+Please read this License carefully before downloading this software.
+By downloading or using this software, you are agreeing to be bound by
+the terms of this License. If you do not or cannot agree to the terms
+of this License, please do not download or use the software.
+
+1. General; Definitions. This License applies to any program or other
+work which Apple Computer, Inc. ("Apple") makes publicly available and
+which contains a notice placed by Apple identifying such program or
+work as "Original Code" and stating that it is subject to the terms of
+this Apple Public Source License version 2.0 ("License"). As used in
+this License:
+
+1.1 "Applicable Patent Rights" mean: (a) in the case where Apple is
+the grantor of rights, (i) claims of patents that are now or hereafter
+acquired, owned by or assigned to Apple and (ii) that cover subject
+matter contained in the Original Code, but only to the extent
+necessary to use, reproduce and/or distribute the Original Code
+without infringement; and (b) in the case where You are the grantor of
+rights, (i) claims of patents that are now or hereafter acquired,
+owned by or assigned to You and (ii) that cover subject matter in Your
+Modifications, taken alone or in combination with Original Code.
+
+1.2 "Contributor" means any person or entity that creates or
+contributes to the creation of Modifications.
+
+1.3 "Covered Code" means the Original Code, Modifications, the
+combination of Original Code and any Modifications, and/or any
+respective portions thereof.
+
+1.4 "Externally Deploy" means: (a) to sublicense, distribute or
+otherwise make Covered Code available, directly or indirectly, to
+anyone other than You; and/or (b) to use Covered Code, alone or as
+part of a Larger Work, in any way to provide a service, including but
+not limited to delivery of content, through electronic communication
+with a client other than You.
+
+1.5 "Larger Work" means a work which combines Covered Code or portions
+thereof with code not governed by the terms of this License.
+
+1.6 "Modifications" mean any addition to, deletion from, and/or change
+to, the substance and/or structure of the Original Code, any previous
+Modifications, the combination of Original Code and any previous
+Modifications, and/or any respective portions thereof. When code is
+released as a series of files, a Modification is: (a) any addition to
+or deletion from the contents of a file containing Covered Code;
+and/or (b) any new file or other representation of computer program
+statements that contains any part of Covered Code.
+
+1.7 "Original Code" means (a) the Source Code of a program or other
+work as originally made available by Apple under this License,
+including the Source Code of any updates or upgrades to such programs
+or works made available by Apple under this License, and that has been
+expressly identified by Apple as such in the header file(s) of such
+work; and (b) the object code compiled from such Source Code and
+originally made available by Apple under this License.
+
+1.8 "Source Code" means the human readable form of a program or other
+work that is suitable for making modifications to it, including all
+modules it contains, plus any associated interface definition files,
+scripts used to control compilation and installation of an executable
+(object code).
+
+1.9 "You" or "Your" means an individual or a legal entity exercising
+rights under this License. For legal entities, "You" or "Your"
+includes any entity which controls, is controlled by, or is under
+common control with, You, where "control" means (a) the power, direct
+or indirect, to cause the direction or management of such entity,
+whether by contract or otherwise, or (b) ownership of fifty percent
+(50%) or more of the outstanding shares or beneficial ownership of
+such entity.
+
+2. Permitted Uses; Conditions & Restrictions. Subject to the terms
+and conditions of this License, Apple hereby grants You, effective on
+the date You accept this License and download the Original Code, a
+world-wide, royalty-free, non-exclusive license, to the extent of
+Apple's Applicable Patent Rights and copyrights covering the Original
+Code, to do the following:
+
+2.1 Unmodified Code. You may use, reproduce, display, perform,
+internally distribute within Your organization, and Externally Deploy
+verbatim, unmodified copies of the Original Code, for commercial or
+non-commercial purposes, provided that in each instance:
+
+(a) You must retain and reproduce in all copies of Original Code the
+copyright and other proprietary notices and disclaimers of Apple as
+they appear in the Original Code, and keep intact all notices in the
+Original Code that refer to this License; and
+
+(b) You must include a copy of this License with every copy of Source
+Code of Covered Code and documentation You distribute or Externally
+Deploy, and You may not offer or impose any terms on such Source Code
+that alter or restrict this License or the recipients' rights
+hereunder, except as permitted under Section 6.
+
+2.2 Modified Code. You may modify Covered Code and use, reproduce,
+display, perform, internally distribute within Your organization, and
+Externally Deploy Your Modifications and Covered Code, for commercial
+or non-commercial purposes, provided that in each instance You also
+meet all of these conditions:
+
+(a) You must satisfy all the conditions of Section 2.1 with respect to
+the Source Code of the Covered Code;
+
+(b) You must duplicate, to the extent it does not already exist, the
+notice in Exhibit A in each file of the Source Code of all Your
+Modifications, and cause the modified files to carry prominent notices
+stating that You changed the files and the date of any change; and
+
+(c) If You Externally Deploy Your Modifications, You must make
+Source Code of all Your Externally Deployed Modifications either
+available to those to whom You have Externally Deployed Your
+Modifications, or publicly available. Source Code of Your Externally
+Deployed Modifications must be released under the terms set forth in
+this License, including the license grants set forth in Section 3
+below, for as long as you Externally Deploy the Covered Code or twelve
+(12) months from the date of initial External Deployment, whichever is
+longer. You should preferably distribute the Source Code of Your
+Externally Deployed Modifications electronically (e.g. download from a
+web site).
+
+2.3 Distribution of Executable Versions. In addition, if You
+Externally Deploy Covered Code (Original Code and/or Modifications) in
+object code, executable form only, You must include a prominent
+notice, in the code itself as well as in related documentation,
+stating that Source Code of the Covered Code is available under the
+terms of this License with information on how and where to obtain such
+Source Code.
+
+2.4 Third Party Rights. You expressly acknowledge and agree that
+although Apple and each Contributor grants the licenses to their
+respective portions of the Covered Code set forth herein, no
+assurances are provided by Apple or any Contributor that the Covered
+Code does not infringe the patent or other intellectual property
+rights of any other entity. Apple and each Contributor disclaim any
+liability to You for claims brought by any other entity based on
+infringement of intellectual property rights or otherwise. As a
+condition to exercising the rights and licenses granted hereunder, You
+hereby assume sole responsibility to secure any other intellectual
+property rights needed, if any. For example, if a third party patent
+license is required to allow You to distribute the Covered Code, it is
+Your responsibility to acquire that license before distributing the
+Covered Code.
+
+3. Your Grants. In consideration of, and as a condition to, the
+licenses granted to You under this License, You hereby grant to any
+person or entity receiving or distributing Covered Code under this
+License a non-exclusive, royalty-free, perpetual, irrevocable license,
+under Your Applicable Patent Rights and other intellectual property
+rights (other than patent) owned or controlled by You, to use,
+reproduce, display, perform, modify, sublicense, distribute and
+Externally Deploy Your Modifications of the same scope and extent as
+Apple's licenses under Sections 2.1 and 2.2 above.
+
+4. Larger Works. You may create a Larger Work by combining Covered
+Code with other code not governed by the terms of this License and
+distribute the Larger Work as a single product. In each such instance,
+You must make sure the requirements of this License are fulfilled for
+the Covered Code or any portion thereof.
+
+5. Limitations on Patent License. Except as expressly stated in
+Section 2, no other patent rights, express or implied, are granted by
+Apple herein. Modifications and/or Larger Works may require additional
+patent licenses from Apple which Apple may grant in its sole
+discretion.
+
+6. Additional Terms. You may choose to offer, and to charge a fee for,
+warranty, support, indemnity or liability obligations and/or other
+rights consistent with the scope of the license granted herein
+("Additional Terms") to one or more recipients of Covered Code.
+However, You may do so only on Your own behalf and as Your sole
+responsibility, and not on behalf of Apple or any Contributor. You
+must obtain the recipient's agreement that any such Additional Terms
+are offered by You alone, and You hereby agree to indemnify, defend
+and hold Apple and every Contributor harmless for any liability
+incurred by or claims asserted against Apple or such Contributor by
+reason of any such Additional Terms.
+
+7. Versions of the License. Apple may publish revised and/or new
+versions of this License from time to time. Each version will be given
+a distinguishing version number. Once Original Code has been published
+under a particular version of this License, You may continue to use it
+under the terms of that version. You may also choose to use such
+Original Code under the terms of any subsequent version of this
+License published by Apple. No one other than Apple has the right to
+modify the terms applicable to Covered Code created under this
+License.
+
+8. NO WARRANTY OR SUPPORT. The Covered Code may contain in whole or in
+part pre-release, untested, or not fully tested works. The Covered
+Code may contain errors that could cause failures or loss of data, and
+may be incomplete or contain inaccuracies. You expressly acknowledge
+and agree that use of the Covered Code, or any portion thereof, is at
+Your sole and entire risk. THE COVERED CODE IS PROVIDED "AS IS" AND
+WITHOUT WARRANTY, UPGRADES OR SUPPORT OF ANY KIND AND APPLE AND
+APPLE'S LICENSOR(S) (COLLECTIVELY REFERRED TO AS "APPLE" FOR THE
+PURPOSES OF SECTIONS 8 AND 9) AND ALL CONTRIBUTORS EXPRESSLY DISCLAIM
+ALL WARRANTIES AND/OR CONDITIONS, EXPRESS OR IMPLIED, INCLUDING, BUT
+NOT LIMITED TO, THE IMPLIED WARRANTIES AND/OR CONDITIONS OF
+MERCHANTABILITY, OF SATISFACTORY QUALITY, OF FITNESS FOR A PARTICULAR
+PURPOSE, OF ACCURACY, OF QUIET ENJOYMENT, AND NONINFRINGEMENT OF THIRD
+PARTY RIGHTS. APPLE AND EACH CONTRIBUTOR DOES NOT WARRANT AGAINST
+INTERFERENCE WITH YOUR ENJOYMENT OF THE COVERED CODE, THAT THE
+FUNCTIONS CONTAINED IN THE COVERED CODE WILL MEET YOUR REQUIREMENTS,
+THAT THE OPERATION OF THE COVERED CODE WILL BE UNINTERRUPTED OR
+ERROR-FREE, OR THAT DEFECTS IN THE COVERED CODE WILL BE CORRECTED. NO
+ORAL OR WRITTEN INFORMATION OR ADVICE GIVEN BY APPLE, AN APPLE
+AUTHORIZED REPRESENTATIVE OR ANY CONTRIBUTOR SHALL CREATE A WARRANTY.
+You acknowledge that the Covered Code is not intended for use in the
+operation of nuclear facilities, aircraft navigation, communication
+systems, or air traffic control machines in which case the failure of
+the Covered Code could lead to death, personal injury, or severe
+physical or environmental damage.
+
+9. LIMITATION OF LIABILITY. TO THE EXTENT NOT PROHIBITED BY LAW, IN NO
+EVENT SHALL APPLE OR ANY CONTRIBUTOR BE LIABLE FOR ANY INCIDENTAL,
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATING
+TO THIS LICENSE OR YOUR USE OR INABILITY TO USE THE COVERED CODE, OR
+ANY PORTION THEREOF, WHETHER UNDER A THEORY OF CONTRACT, WARRANTY,
+TORT (INCLUDING NEGLIGENCE), PRODUCTS LIABILITY OR OTHERWISE, EVEN IF
+APPLE OR SUCH CONTRIBUTOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES AND NOTWITHSTANDING THE FAILURE OF ESSENTIAL PURPOSE OF ANY
+REMEDY. SOME JURISDICTIONS DO NOT ALLOW THE LIMITATION OF LIABILITY OF
+INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS LIMITATION MAY NOT APPLY
+TO YOU. In no event shall Apple's total liability to You for all
+damages (other than as may be required by applicable law) under this
+License exceed the amount of fifty dollars ($50.00).
+
+10. Trademarks. This License does not grant any rights to use the
+trademarks or trade names "Apple", "Apple Computer", "Mac", "Mac OS",
+"QuickTime", "QuickTime Streaming Server" or any other trademarks,
+service marks, logos or trade names belonging to Apple (collectively
+"Apple Marks") or to any trademark, service mark, logo or trade name
+belonging to any Contributor. You agree not to use any Apple Marks in
+or as part of the name of products derived from the Original Code or
+to endorse or promote products derived from the Original Code other
+than as expressly permitted by and in strict compliance at all times
+with Apple's third party trademark usage guidelines which are posted
+at http://www.apple.com/legal/guidelinesfor3rdparties.html.
+
+11. Ownership. Subject to the licenses granted under this License,
+each Contributor retains all rights, title and interest in and to any
+Modifications made by such Contributor. Apple retains all rights,
+title and interest in and to the Original Code and any Modifications
+made by or on behalf of Apple ("Apple Modifications"), and such Apple
+Modifications will not be automatically subject to this License. Apple
+may, at its sole discretion, choose to license such Apple
+Modifications under this License, or on different terms from those
+contained in this License or may choose not to license them at all.
+
+12. Termination.
+
+12.1 Termination. This License and the rights granted hereunder will
+terminate:
+
+(a) automatically without notice from Apple if You fail to comply with
+any term(s) of this License and fail to cure such breach within 30
+days of becoming aware of such breach;
+
+(b) immediately in the event of the circumstances described in Section
+13.5(b); or
+
+(c) automatically without notice from Apple if You, at any time during
+the term of this License, commence an action for patent infringement
+against Apple; provided that Apple did not first commence
+an action for patent infringement against You in that instance.
+
+12.2 Effect of Termination. Upon termination, You agree to immediately
+stop any further use, reproduction, modification, sublicensing and
+distribution of the Covered Code. All sublicenses to the Covered Code
+which have been properly granted prior to termination shall survive
+any termination of this License. Provisions which, by their nature,
+should remain in effect beyond the termination of this License shall
+survive, including but not limited to Sections 3, 5, 8, 9, 10, 11,
+12.2 and 13. No party will be liable to any other for compensation,
+indemnity or damages of any sort solely as a result of terminating
+this License in accordance with its terms, and termination of this
+License will be without prejudice to any other right or remedy of
+any party.
+
+13. Miscellaneous.
+
+13.1 Government End Users. The Covered Code is a "commercial item" as
+defined in FAR 2.101. Government software and technical data rights in
+the Covered Code include only those rights customarily provided to the
+public as defined in this License. This customary commercial license
+in technical data and software is provided in accordance with FAR
+12.211 (Technical Data) and 12.212 (Computer Software) and, for
+Department of Defense purchases, DFAR 252.227-7015 (Technical Data --
+Commercial Items) and 227.7202-3 (Rights in Commercial Computer
+Software or Computer Software Documentation). Accordingly, all U.S.
+Government End Users acquire Covered Code with only those rights set
+forth herein.
+
+13.2 Relationship of Parties. This License will not be construed as
+creating an agency, partnership, joint venture or any other form of
+legal association between or among You, Apple or any Contributor, and
+You will not represent to the contrary, whether expressly, by
+implication, appearance or otherwise.
+
+13.3 Independent Development. Nothing in this License will impair
+Apple's right to acquire, license, develop, have others develop for
+it, market and/or distribute technology or products that perform the
+same or similar functions as, or otherwise compete with,
+Modifications, Larger Works, technology or products that You may
+develop, produce, market or distribute.
+
+13.4 Waiver; Construction. Failure by Apple or any Contributor to
+enforce any provision of this License will not be deemed a waiver of
+future enforcement of that or any other provision. Any law or
+regulation which provides that the language of a contract shall be
+construed against the drafter will not apply to this License.
+
+13.5 Severability. (a) If for any reason a court of competent
+jurisdiction finds any provision of this License, or portion thereof,
+to be unenforceable, that provision of the License will be enforced to
+the maximum extent permissible so as to effect the economic benefits
+and intent of the parties, and the remainder of this License will
+continue in full force and effect. (b) Notwithstanding the foregoing,
+if applicable law prohibits or restricts You from fully and/or
+specifically complying with Sections 2 and/or 3 or prevents the
+enforceability of either of those Sections, this License will
+immediately terminate and You must immediately discontinue any use of
+the Covered Code and destroy all copies of it that are in your
+possession or control.
+
+13.6 Dispute Resolution. Any litigation or other dispute resolution
+between You and Apple relating to this License shall take place in the
+Northern District of California, and You and Apple hereby consent to
+the personal jurisdiction of, and venue in, the state and federal
+courts within that District with respect to this License. The
+application of the United Nations Convention on Contracts for the
+International Sale of Goods is expressly excluded.
+
+13.7 Entire Agreement; Governing Law. This License constitutes the
+entire agreement between the parties with respect to the subject
+matter hereof. This License shall be governed by the laws of the
+United States and the State of California, except that body of
+California law concerning conflicts of law.
+
+Where You are located in the province of Quebec, Canada, the following
+clause applies: The parties hereby confirm that they have requested
+that this License and all related documents be drafted in English. Les
+parties ont exige que le present contrat et tous les documents
+connexes soient rediges en anglais.
+
+EXHIBIT A.
+
+"Portions Copyright (c) 1999-2003 Apple Computer, Inc. All Rights
+Reserved.
+
+This file contains Original Code and/or Modifications of Original Code
+as defined in and that are subject to the Apple Public Source License
+Version 2.0 (the 'License'). You may not use this file except in
+compliance with the License. Please obtain a copy of the License at
+http://www.opensource.apple.com/apsl/ and read it before using this
+file.
+
+The Original Code and all software distributed under the License are
+distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+Please see the License for the specific language governing rights and
+limitations under the License."
diff --git a/third_party/darwin_xnu_macho/METADATA b/third_party/darwin_xnu_macho/METADATA
new file mode 100644
index 0000000..22f9ce4
--- /dev/null
+++ b/third_party/darwin_xnu_macho/METADATA
@@ -0,0 +1,23 @@
+name: "Darwin XNU Mach-O headers"
+description:
+ "A few header files from the Darwin XNU project. These define the Mach-O format."
+ "They only contain struct and constant definitions (no code)."
+
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://opensource.apple.com/source/xnu/"
+ }
+ url {
+ type: GIT
+ value: "https://github.com/apple/darwin-xnu"
+ }
+ version: "0a798f6738bc1db01281fc08ae024145e84df927"
+ last_upgrade_date {
+ year: 2017
+ month: 11
+ day: 11
+ }
+ local_modifications:
+ "Removed some includes and definitions not related to the Mach-O file format."
+}
diff --git a/third_party/darwin_xnu_macho/mach-o/fat.h b/third_party/darwin_xnu_macho/mach-o/fat.h
new file mode 100644
index 0000000..79ad1b7
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach-o/fat.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#ifndef _MACH_O_FAT_H_
+#define _MACH_O_FAT_H_
+/*
+ * This header file describes the structures of the file format for "fat"
+ * architecture specific file (wrapper design). At the begining of the file
+ * there is one fat_header structure followed by a number of fat_arch
+ * structures. For each architecture in the file, specified by a pair of
+ * cputype and cpusubtype, the fat_header describes the file offset, file
+ * size and alignment in the file of the architecture specific member.
+ * The padded bytes in the file to place each member on it's specific alignment
+ * are defined to be read as zeros and can be left as "holes" if the file system
+ * can support them as long as they read as zeros.
+ *
+ * All structures defined here are always written and read to/from disk
+ * in big-endian order.
+ */
+
+/*
+ * <mach/machine.h> is needed here for the cpu_type_t and cpu_subtype_t types
+ * and contains the constants for the possible values of these types.
+ */
+#include <stdint.h>
+#include "third_party/darwin_xnu_macho/mach/machine.h"
+
+#define FAT_MAGIC 0xcafebabe
+#define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */
+
+struct fat_header {
+ uint32_t magic; /* FAT_MAGIC */
+ uint32_t nfat_arch; /* number of structs that follow */
+};
+
+struct fat_arch {
+ cpu_type_t cputype; /* cpu specifier (int) */
+ cpu_subtype_t cpusubtype; /* machine specifier (int) */
+ uint32_t offset; /* file offset to this object file */
+ uint32_t size; /* size of this object file */
+ uint32_t align; /* alignment as a power of 2 */
+};
+
+#endif /* _MACH_O_FAT_H_ */
diff --git a/third_party/darwin_xnu_macho/mach-o/loader.h b/third_party/darwin_xnu_macho/mach-o/loader.h
new file mode 100644
index 0000000..53a6875
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach-o/loader.h
@@ -0,0 +1,1531 @@
+/*
+ * Copyright (c) 1999-2010 Apple Inc. All Rights Reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#ifndef _MACHO_LOADER_H_
+#define _MACHO_LOADER_H_
+
+/*
+ * This file describes the format of mach object files.
+ */
+#include <stdint.h>
+
+/*
+ * <mach/machine.h> is needed here for the cpu_type_t and cpu_subtype_t types
+ * and contains the constants for the possible values of these types.
+ */
+#include "third_party/darwin_xnu_macho/mach/machine.h"
+
+/*
+ * <mach/vm_prot.h> is needed here for the vm_prot_t type and contains the
+ * constants that are or'ed together for the possible values of this type.
+ */
+#include "third_party/darwin_xnu_macho/mach/vm_prot.h"
+
+/*
+ * The 32-bit mach header appears at the very beginning of the object file for
+ * 32-bit architectures.
+ */
+struct mach_header {
+ uint32_t magic; /* mach magic number identifier */
+ cpu_type_t cputype; /* cpu specifier */
+ cpu_subtype_t cpusubtype; /* machine specifier */
+ uint32_t filetype; /* type of file */
+ uint32_t ncmds; /* number of load commands */
+ uint32_t sizeofcmds; /* the size of all the load commands */
+ uint32_t flags; /* flags */
+};
+
+/* Constant for the magic field of the mach_header (32-bit architectures) */
+#define MH_MAGIC 0xfeedface /* the mach magic number */
+#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */
+
+/*
+ * The 64-bit mach header appears at the very beginning of object files for
+ * 64-bit architectures.
+ */
+struct mach_header_64 {
+ uint32_t magic; /* mach magic number identifier */
+ cpu_type_t cputype; /* cpu specifier */
+ cpu_subtype_t cpusubtype; /* machine specifier */
+ uint32_t filetype; /* type of file */
+ uint32_t ncmds; /* number of load commands */
+ uint32_t sizeofcmds; /* the size of all the load commands */
+ uint32_t flags; /* flags */
+ uint32_t reserved; /* reserved */
+};
+
+/* Constant for the magic field of the mach_header_64 (64-bit architectures) */
+#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
+#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */
+
+/*
+ * The layout of the file depends on the filetype. For all but the MH_OBJECT
+ * file type the segments are padded out and aligned on a segment alignment
+ * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB,
+ * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part
+ * of their first segment.
+ *
+ * The file type MH_OBJECT is a compact format intended as output of the
+ * assembler and input (and possibly output) of the link editor (the .o
+ * format). All sections are in one unnamed segment with no segment padding.
+ * This format is used as an executable format when the file is so small the
+ * segment padding greatly increases its size.
+ *
+ * The file type MH_PRELOAD is an executable format intended for things that
+ * are not executed under the kernel (proms, stand alones, kernels, etc). The
+ * format can be executed under the kernel but may demand paged it and not
+ * preload it before execution.
+ *
+ * A core file is in MH_CORE format and can be any in an arbritray legal
+ * Mach-O file.
+ *
+ * Constants for the filetype field of the mach_header
+ */
+#define MH_OBJECT 0x1 /* relocatable object file */
+#define MH_EXECUTE 0x2 /* demand paged executable file */
+#define MH_FVMLIB 0x3 /* fixed VM shared library file */
+#define MH_CORE 0x4 /* core file */
+#define MH_PRELOAD 0x5 /* preloaded executable file */
+#define MH_DYLIB 0x6 /* dynamically bound shared library */
+#define MH_DYLINKER 0x7 /* dynamic link editor */
+#define MH_BUNDLE 0x8 /* dynamically bound bundle file */
+#define MH_DYLIB_STUB 0x9 /* shared library stub for static */
+ /* linking only, no section contents */
+#define MH_DSYM 0xa /* companion file with only debug */
+ /* sections */
+#define MH_KEXT_BUNDLE 0xb /* x86_64 kexts */
+
+/* Constants for the flags field of the mach_header */
+#define MH_NOUNDEFS 0x1 /* the object file has no undefined
+ references */
+#define MH_INCRLINK 0x2 /* the object file is the output of an
+ incremental link against a base file
+ and can't be link edited again */
+#define MH_DYLDLINK 0x4 /* the object file is input for the
+ dynamic linker and can't be staticly
+ link edited again */
+#define MH_BINDATLOAD 0x8 /* the object file's undefined
+ references are bound by the dynamic
+ linker when loaded. */
+#define MH_PREBOUND 0x10 /* the file has its dynamic undefined
+ references prebound. */
+#define MH_SPLIT_SEGS 0x20 /* the file has its read-only and
+ read-write segments split */
+#define MH_LAZY_INIT 0x40 /* the shared library init routine is
+ to be run lazily via catching memory
+ faults to its writeable segments
+ (obsolete) */
+#define MH_TWOLEVEL 0x80 /* the image is using two-level name
+ space bindings */
+#define MH_FORCE_FLAT 0x100 /* the executable is forcing all images
+ to use flat name space bindings */
+#define MH_NOMULTIDEFS 0x200 /* this umbrella guarantees no multiple
+ defintions of symbols in its
+ sub-images so the two-level namespace
+ hints can always be used. */
+#define MH_NOFIXPREBINDING 0x400 /* do not have dyld notify the
+ prebinding agent about this
+ executable */
+#define MH_PREBINDABLE 0x800 /* the binary is not prebound but can
+ have its prebinding redone. only used
+ when MH_PREBOUND is not set. */
+#define MH_ALLMODSBOUND 0x1000 /* indicates that this binary binds to
+ all two-level namespace modules of
+ its dependent libraries. only used
+ when MH_PREBINDABLE and MH_TWOLEVEL
+ are both set. */
+#define MH_SUBSECTIONS_VIA_SYMBOLS 0x2000/* safe to divide up the sections into
+ sub-sections via symbols for dead
+ code stripping */
+#define MH_CANONICAL 0x4000 /* the binary has been canonicalized
+ via the unprebind operation */
+#define MH_WEAK_DEFINES 0x8000 /* the final linked image contains
+ external weak symbols */
+#define MH_BINDS_TO_WEAK 0x10000 /* the final linked image uses
+ weak symbols */
+
+#define MH_ALLOW_STACK_EXECUTION 0x20000/* When this bit is set, all stacks
+ in the task will be given stack
+ execution privilege. Only used in
+ MH_EXECUTE filetypes. */
+#define MH_ROOT_SAFE 0x40000 /* When this bit is set, the binary
+ declares it is safe for use in
+ processes with uid zero */
+
+#define MH_SETUID_SAFE 0x80000 /* When this bit is set, the binary
+ declares it is safe for use in
+ processes when issetugid() is true */
+
+#define MH_NO_REEXPORTED_DYLIBS 0x100000 /* When this bit is set on a dylib,
+ the static linker does not need to
+ examine dependent dylibs to see
+ if any are re-exported */
+#define MH_PIE 0x200000 /* When this bit is set, the OS will
+ load the main executable at a
+ random address. Only used in
+ MH_EXECUTE filetypes. */
+#define MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs. When
+ linking against a dylib that
+ has this bit set, the static linker
+ will automatically not create a
+ LC_LOAD_DYLIB load command to the
+ dylib if no symbols are being
+ referenced from the dylib. */
+#define MH_HAS_TLV_DESCRIPTORS 0x800000 /* Contains a section of type
+ S_THREAD_LOCAL_VARIABLES */
+
+#define MH_NO_HEAP_EXECUTION 0x1000000 /* When this bit is set, the OS will
+ run the main executable with
+ a non-executable heap even on
+ platforms (e.g. i386) that don't
+ require it. Only used in MH_EXECUTE
+ filetypes. */
+
+#define MH_APP_EXTENSION_SAFE 0x02000000 /* The code was linked for use in an
+ application extension. */
+
+/*
+ * The load commands directly follow the mach_header. The total size of all
+ * of the commands is given by the sizeofcmds field in the mach_header. All
+ * load commands must have as their first two fields cmd and cmdsize. The cmd
+ * field is filled in with a constant for that command type. Each command type
+ * has a structure specifically for it. The cmdsize field is the size in bytes
+ * of the particular load command structure plus anything that follows it that
+ * is a part of the load command (i.e. section structures, strings, etc.). To
+ * advance to the next load command the cmdsize can be added to the offset or
+ * pointer of the current load command. The cmdsize for 32-bit architectures
+ * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple
+ * of 8 bytes (these are forever the maximum alignment of any load commands).
+ * The padded bytes must be zero. All tables in the object file must also
+ * follow these rules so the file can be memory mapped. Otherwise the pointers
+ * to these tables will not work well or at all on some machines. With all
+ * padding zeroed like objects will compare byte for byte.
+ */
+struct load_command {
+ uint32_t cmd; /* type of load command */
+ uint32_t cmdsize; /* total size of command in bytes */
+};
+
+/*
+ * After MacOS X 10.1 when a new load command is added that is required to be
+ * understood by the dynamic linker for the image to execute properly the
+ * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic
+ * linker sees such a load command it it does not understand will issue a
+ * "unknown load command required for execution" error and refuse to use the
+ * image. Other load commands without this bit that are not understood will
+ * simply be ignored.
+ */
+#define LC_REQ_DYLD 0x80000000
+
+/* Constants for the cmd field of all load commands, the type */
+#define LC_SEGMENT 0x1 /* segment of this file to be mapped */
+#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */
+#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */
+#define LC_THREAD 0x4 /* thread */
+#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */
+#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */
+#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */
+#define LC_IDENT 0x8 /* object identification info (obsolete) */
+#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */
+#define LC_PREPAGE 0xa /* prepage command (internal use) */
+#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
+#define LC_LOAD_DYLIB 0xc /* load a dynamically linked shared library */
+#define LC_ID_DYLIB 0xd /* dynamically linked shared lib ident */
+#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */
+#define LC_ID_DYLINKER 0xf /* dynamic linker identification */
+#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamically */
+ /* linked shared library */
+#define LC_ROUTINES 0x11 /* image routines */
+#define LC_SUB_FRAMEWORK 0x12 /* sub framework */
+#define LC_SUB_UMBRELLA 0x13 /* sub umbrella */
+#define LC_SUB_CLIENT 0x14 /* sub client */
+#define LC_SUB_LIBRARY 0x15 /* sub library */
+#define LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */
+#define LC_PREBIND_CKSUM 0x17 /* prebind checksum */
+
+/*
+ * load a dynamically linked shared library that is allowed to be missing
+ * (all symbols are weak imported).
+ */
+#define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD)
+
+#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be
+ mapped */
+#define LC_ROUTINES_64 0x1a /* 64-bit image routines */
+#define LC_UUID 0x1b /* the uuid */
+#define LC_RPATH (0x1c | LC_REQ_DYLD) /* runpath additions */
+#define LC_CODE_SIGNATURE 0x1d /* local of code signature */
+#define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */
+#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */
+#define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */
+#define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */
+#define LC_DYLD_INFO 0x22 /* compressed dyld information */
+#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD) /* compressed dyld information only */
+#define LC_LOAD_UPWARD_DYLIB (0x23 | LC_REQ_DYLD) /* load upward dylib */
+#define LC_VERSION_MIN_MACOSX 0x24 /* build for MacOSX min OS version */
+#define LC_VERSION_MIN_IPHONEOS 0x25 /* build for iPhoneOS min OS version */
+#define LC_FUNCTION_STARTS 0x26 /* compressed table of function start addresses */
+#define LC_DYLD_ENVIRONMENT 0x27 /* string for dyld to treat
+ like environment variable */
+#define LC_MAIN (0x28|LC_REQ_DYLD) /* replacement for LC_UNIXTHREAD */
+#define LC_DATA_IN_CODE 0x29 /* table of non-instructions in __text */
+#define LC_SOURCE_VERSION 0x2A /* source version used to build binary */
+#define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */
+#define LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */
+#define LC_LINKER_OPTION 0x2D /* linker options in MH_OBJECT files */
+#define LC_LINKER_OPTIMIZATION_HINT 0x2E /* optimization hints in MH_OBJECT files */
+#define LC_VERSION_MIN_TVOS 0x2F /* build for AppleTV min OS version */
+#define LC_VERSION_MIN_WATCHOS 0x30 /* build for Watch min OS version */
+#define LC_NOTE 0x31 /* arbitrary data included within a Mach-O file */
+#define LC_BUILD_VERSION 0x32 /* build for platform min OS version */
+
+/*
+ * A variable length string in a load command is represented by an lc_str
+ * union. The strings are stored just after the load command structure and
+ * the offset is from the start of the load command structure. The size
+ * of the string is reflected in the cmdsize field of the load command.
+ * Once again any padded bytes to bring the cmdsize field to a multiple
+ * of 4 bytes must be zero.
+ */
+union lc_str {
+ uint32_t offset; /* offset to the string */
+#ifndef __LP64__
+ char *ptr; /* pointer to the string */
+#endif
+};
+
+/*
+ * The segment load command indicates that a part of this file is to be
+ * mapped into the task's address space. The size of this segment in memory,
+ * vmsize, maybe equal to or larger than the amount to map from this file,
+ * filesize. The file is mapped starting at fileoff to the beginning of
+ * the segment in memory, vmaddr. The rest of the memory of the segment,
+ * if any, is allocated zero fill on demand. The segment's maximum virtual
+ * memory protection and initial virtual memory protection are specified
+ * by the maxprot and initprot fields. If the segment has sections then the
+ * section structures directly follow the segment command and their size is
+ * reflected in cmdsize.
+ */
+struct segment_command { /* for 32-bit architectures */
+ uint32_t cmd; /* LC_SEGMENT */
+ uint32_t cmdsize; /* includes sizeof section structs */
+ char segname[16]; /* segment name */
+ uint32_t vmaddr; /* memory address of this segment */
+ uint32_t vmsize; /* memory size of this segment */
+ uint32_t fileoff; /* file offset of this segment */
+ uint32_t filesize; /* amount to map from the file */
+ vm_prot_t maxprot; /* maximum VM protection */
+ vm_prot_t initprot; /* initial VM protection */
+ uint32_t nsects; /* number of sections in segment */
+ uint32_t flags; /* flags */
+};
+
+/*
+ * The 64-bit segment load command indicates that a part of this file is to be
+ * mapped into a 64-bit task's address space. If the 64-bit segment has
+ * sections then section_64 structures directly follow the 64-bit segment
+ * command and their size is reflected in cmdsize.
+ */
+struct segment_command_64 { /* for 64-bit architectures */
+ uint32_t cmd; /* LC_SEGMENT_64 */
+ uint32_t cmdsize; /* includes sizeof section_64 structs */
+ char segname[16]; /* segment name */
+ uint64_t vmaddr; /* memory address of this segment */
+ uint64_t vmsize; /* memory size of this segment */
+ uint64_t fileoff; /* file offset of this segment */
+ uint64_t filesize; /* amount to map from the file */
+ vm_prot_t maxprot; /* maximum VM protection */
+ vm_prot_t initprot; /* initial VM protection */
+ uint32_t nsects; /* number of sections in segment */
+ uint32_t flags; /* flags */
+};
+
+/* Constants for the flags field of the segment_command */
+#define SG_HIGHVM 0x1 /* the file contents for this segment is for
+ the high part of the VM space, the low part
+ is zero filled (for stacks in core files) */
+#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by
+ a fixed VM library, for overlap checking in
+ the link editor */
+#define SG_NORELOC 0x4 /* this segment has nothing that was relocated
+ in it and nothing relocated to it, that is
+ it maybe safely replaced without relocation*/
+#define SG_PROTECTED_VERSION_1 0x8 /* This segment is protected. If the
+ segment starts at file offset 0, the
+ first page of the segment is not
+ protected. All other pages of the
+ segment are protected. */
+
+/*
+ * A segment is made up of zero or more sections. Non-MH_OBJECT files have
+ * all of their segments with the proper sections in each, and padded to the
+ * specified segment alignment when produced by the link editor. The first
+ * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
+ * and load commands of the object file before its first section. The zero
+ * fill sections are always last in their segment (in all formats). This
+ * allows the zeroed segment padding to be mapped into memory where zero fill
+ * sections might be. The gigabyte zero fill sections, those with the section
+ * type S_GB_ZEROFILL, can only be in a segment with sections of this type.
+ * These segments are then placed after all other segments.
+ *
+ * The MH_OBJECT format has all of its sections in one segment for
+ * compactness. There is no padding to a specified segment boundary and the
+ * mach_header and load commands are not part of the segment.
+ *
+ * Sections with the same section name, sectname, going into the same segment,
+ * segname, are combined by the link editor. The resulting section is aligned
+ * to the maximum alignment of the combined sections and is the new section's
+ * alignment. The combined sections are aligned to their original alignment in
+ * the combined section. Any padded bytes to get the specified alignment are
+ * zeroed.
+ *
+ * The format of the relocation entries referenced by the reloff and nreloc
+ * fields of the section structure for mach object files is described in the
+ * header file <reloc.h>.
+ */
+struct section { /* for 32-bit architectures */
+ char sectname[16]; /* name of this section */
+ char segname[16]; /* segment this section goes in */
+ uint32_t addr; /* memory address of this section */
+ uint32_t size; /* size in bytes of this section */
+ uint32_t offset; /* file offset of this section */
+ uint32_t align; /* section alignment (power of 2) */
+ uint32_t reloff; /* file offset of relocation entries */
+ uint32_t nreloc; /* number of relocation entries */
+ uint32_t flags; /* flags (section type and attributes)*/
+ uint32_t reserved1; /* reserved (for offset or index) */
+ uint32_t reserved2; /* reserved (for count or sizeof) */
+};
+
+struct section_64 { /* for 64-bit architectures */
+ char sectname[16]; /* name of this section */
+ char segname[16]; /* segment this section goes in */
+ uint64_t addr; /* memory address of this section */
+ uint64_t size; /* size in bytes of this section */
+ uint32_t offset; /* file offset of this section */
+ uint32_t align; /* section alignment (power of 2) */
+ uint32_t reloff; /* file offset of relocation entries */
+ uint32_t nreloc; /* number of relocation entries */
+ uint32_t flags; /* flags (section type and attributes)*/
+ uint32_t reserved1; /* reserved (for offset or index) */
+ uint32_t reserved2; /* reserved (for count or sizeof) */
+ uint32_t reserved3; /* reserved */
+};
+
+/*
+ * The flags field of a section structure is separated into two parts a section
+ * type and section attributes. The section types are mutually exclusive (it
+ * can only have one type) but the section attributes are not (it may have more
+ * than one attribute).
+ */
+#define SECTION_TYPE 0x000000ff /* 256 section types */
+#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */
+
+/* Constants for the type of a section */
+#define S_REGULAR 0x0 /* regular section */
+#define S_ZEROFILL 0x1 /* zero fill on demand section */
+#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/
+#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */
+#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */
+#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */
+ /* literals */
+/*
+ * For the two types of symbol pointers sections and the symbol stubs section
+ * they have indirect symbol table entries. For each of the entries in the
+ * section the indirect symbol table entries, in corresponding order in the
+ * indirect symbol table, start at the index stored in the reserved1 field
+ * of the section structure. Since the indirect symbol table entries
+ * correspond to the entries in the section the number of indirect symbol table
+ * entries is inferred from the size of the section divided by the size of the
+ * entries in the section. For symbol pointers sections the size of the entries
+ * in the section is 4 bytes and for symbol stubs sections the byte size of the
+ * stubs is stored in the reserved2 field of the section structure.
+ */
+#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy
+ symbol pointers */
+#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol
+ pointers */
+#define S_SYMBOL_STUBS 0x8 /* section with only symbol
+ stubs, byte size of stub in
+ the reserved2 field */
+#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function
+ pointers for initialization*/
+#define S_MOD_TERM_FUNC_POINTERS 0xa /* section with only function
+ pointers for termination */
+#define S_COALESCED 0xb /* section contains symbols that
+ are to be coalesced */
+#define S_GB_ZEROFILL 0xc /* zero fill on demand section
+ (that can be larger than 4
+ gigabytes) */
+#define S_INTERPOSING 0xd /* section with only pairs of
+ function pointers for
+ interposing */
+#define S_16BYTE_LITERALS 0xe /* section with only 16 byte
+ literals */
+#define S_DTRACE_DOF 0xf /* section contains
+ DTrace Object Format */
+#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy
+ symbol pointers to lazy
+ loaded dylibs */
+/*
+ * Section types to support thread local variables
+ */
+#define S_THREAD_LOCAL_REGULAR 0x11 /* template of initial
+ values for TLVs */
+#define S_THREAD_LOCAL_ZEROFILL 0x12 /* template of initial
+ values for TLVs */
+#define S_THREAD_LOCAL_VARIABLES 0x13 /* TLV descriptors */
+#define S_THREAD_LOCAL_VARIABLE_POINTERS 0x14 /* pointers to TLV
+ descriptors */
+#define S_THREAD_LOCAL_INIT_FUNCTION_POINTERS 0x15 /* functions to call
+ to initialize TLV
+ values */
+
+/*
+ * Constants for the section attributes part of the flags field of a section
+ * structure.
+ */
+#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */
+#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true
+ machine instructions */
+#define S_ATTR_NO_TOC 0x40000000 /* section contains coalesced
+ symbols that are not to be
+ in a ranlib table of
+ contents */
+#define S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols
+ in this section in files
+ with the MH_DYLDLINK flag */
+#define S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */
+#define S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they
+ reference live blocks */
+#define S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs
+ written on by dyld */
+/*
+ * If a segment contains any sections marked with S_ATTR_DEBUG then all
+ * sections in that segment must have this attribute. No section other than
+ * a section marked with this attribute may reference the contents of this
+ * section. A section with this attribute may contain no symbols and must have
+ * a section type S_REGULAR. The static linker will not copy section contents
+ * from sections with this attribute into its output file. These sections
+ * generally contain DWARF debugging info.
+ */
+#define S_ATTR_DEBUG 0x02000000 /* a debug section */
+#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
+#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
+ machine instructions */
+#define S_ATTR_EXT_RELOC 0x00000200 /* section has external
+ relocation entries */
+#define S_ATTR_LOC_RELOC 0x00000100 /* section has local
+ relocation entries */
+
+
+/*
+ * The names of segments and sections in them are mostly meaningless to the
+ * link-editor. But there are few things to support traditional UNIX
+ * executables that require the link-editor and assembler to use some names
+ * agreed upon by convention.
+ *
+ * The initial protection of the "__TEXT" segment has write protection turned
+ * off (not writeable).
+ *
+ * The link-editor will allocate common symbols at the end of the "__common"
+ * section in the "__DATA" segment. It will create the section and segment
+ * if needed.
+ */
+
+/* The currently known segment names and the section names in those segments */
+
+#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */
+ /* protections and catches NULL */
+ /* references for MH_EXECUTE files */
+
+
+#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */
+#define SECT_TEXT "__text" /* the real text part of the text */
+ /* section no headers, and no padding */
+#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */
+ /* section */
+#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */
+ /* fvmlib initialization */
+ /* section */
+
+#define SEG_DATA "__DATA" /* the tradition UNIX data segment */
+#define SECT_DATA "__data" /* the real initialized data section */
+ /* no padding, no bss overlap */
+#define SECT_BSS "__bss" /* the real uninitialized data section*/
+ /* no padding */
+#define SECT_COMMON "__common" /* the section common symbols are */
+ /* allocated in by the link editor */
+
+#define SEG_OBJC "__OBJC" /* objective-C runtime segment */
+#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */
+#define SECT_OBJC_MODULES "__module_info" /* module information */
+#define SECT_OBJC_STRINGS "__selector_strs" /* string table */
+#define SECT_OBJC_REFS "__selector_refs" /* string table */
+
+#define SEG_ICON "__ICON" /* the icon segment */
+#define SECT_ICON_HEADER "__header" /* the icon headers */
+#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */
+
+#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */
+ /* created and maintained by the link */
+ /* editor. Created with -seglinkedit */
+ /* option to ld(1) for MH_EXECUTE and */
+ /* FVMLIB file types only */
+
+#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */
+
+#define SEG_IMPORT "__IMPORT" /* the segment for the self (dyld) */
+ /* modifing code stubs that has read, */
+ /* write and execute permissions */
+
+/*
+ * Fixed virtual memory shared libraries are identified by two things. The
+ * target pathname (the name of the library as found for execution), and the
+ * minor version number. The address of where the headers are loaded is in
+ * header_addr. (THIS IS OBSOLETE and no longer supported).
+ */
+struct fvmlib {
+ union lc_str name; /* library's target pathname */
+ uint32_t minor_version; /* library's minor version number */
+ uint32_t header_addr; /* library's header address */
+};
+
+/*
+ * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header)
+ * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library.
+ * An object that uses a fixed virtual shared library also contains a
+ * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses.
+ * (THIS IS OBSOLETE and no longer supported).
+ */
+struct fvmlib_command {
+ uint32_t cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */
+ uint32_t cmdsize; /* includes pathname string */
+ struct fvmlib fvmlib; /* the library identification */
+};
+
+/*
+ * Dynamicly linked shared libraries are identified by two things. The
+ * pathname (the name of the library as found for execution), and the
+ * compatibility version number. The pathname must match and the compatibility
+ * number in the user of the library must be greater than or equal to the
+ * library being used. The time stamp is used to record the time a library was
+ * built and copied into user so it can be use to determined if the library used
+ * at runtime is exactly the same as used to built the program.
+ */
+struct dylib {
+ union lc_str name; /* library's path name */
+ uint32_t timestamp; /* library's build time stamp */
+ uint32_t current_version; /* library's current version number */
+ uint32_t compatibility_version; /* library's compatibility vers number*/
+};
+
+/*
+ * A dynamically linked shared library (filetype == MH_DYLIB in the mach header)
+ * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library.
+ * An object that uses a dynamically linked shared library also contains a
+ * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or
+ * LC_REEXPORT_DYLIB) for each library it uses.
+ */
+struct dylib_command {
+ uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB,
+ LC_REEXPORT_DYLIB */
+ uint32_t cmdsize; /* includes pathname string */
+ struct dylib dylib; /* the library identification */
+};
+
+/*
+ * A dynamically linked shared library may be a subframework of an umbrella
+ * framework. If so it will be linked with "-umbrella umbrella_name" where
+ * Where "umbrella_name" is the name of the umbrella framework. A subframework
+ * can only be linked against by its umbrella framework or other subframeworks
+ * that are part of the same umbrella framework. Otherwise the static link
+ * editor produces an error and states to link against the umbrella framework.
+ * The name of the umbrella framework for subframeworks is recorded in the
+ * following structure.
+ */
+struct sub_framework_command {
+ uint32_t cmd; /* LC_SUB_FRAMEWORK */
+ uint32_t cmdsize; /* includes umbrella string */
+ union lc_str umbrella; /* the umbrella framework name */
+};
+
+/*
+ * For dynamically linked shared libraries that are subframework of an umbrella
+ * framework they can allow clients other than the umbrella framework or other
+ * subframeworks in the same umbrella framework. To do this the subframework
+ * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load
+ * command is created for each -allowable_client flag. The client_name is
+ * usually a framework name. It can also be a name used for bundles clients
+ * where the bundle is built with "-client_name client_name".
+ */
+struct sub_client_command {
+ uint32_t cmd; /* LC_SUB_CLIENT */
+ uint32_t cmdsize; /* includes client string */
+ union lc_str client; /* the client name */
+};
+
+/*
+ * A dynamically linked shared library may be a sub_umbrella of an umbrella
+ * framework. If so it will be linked with "-sub_umbrella umbrella_name" where
+ * Where "umbrella_name" is the name of the sub_umbrella framework. When
+ * staticly linking when -twolevel_namespace is in effect a twolevel namespace
+ * umbrella framework will only cause its subframeworks and those frameworks
+ * listed as sub_umbrella frameworks to be implicited linked in. Any other
+ * dependent dynamic libraries will not be linked it when -twolevel_namespace
+ * is in effect. The primary library recorded by the static linker when
+ * resolving a symbol in these libraries will be the umbrella framework.
+ * Zero or more sub_umbrella frameworks may be use by an umbrella framework.
+ * The name of a sub_umbrella framework is recorded in the following structure.
+ */
+struct sub_umbrella_command {
+ uint32_t cmd; /* LC_SUB_UMBRELLA */
+ uint32_t cmdsize; /* includes sub_umbrella string */
+ union lc_str sub_umbrella; /* the sub_umbrella framework name */
+};
+
+/*
+ * A dynamically linked shared library may be a sub_library of another shared
+ * library. If so it will be linked with "-sub_library library_name" where
+ * Where "library_name" is the name of the sub_library shared library. When
+ * staticly linking when -twolevel_namespace is in effect a twolevel namespace
+ * shared library will only cause its subframeworks and those frameworks
+ * listed as sub_umbrella frameworks and libraries listed as sub_libraries to
+ * be implicited linked in. Any other dependent dynamic libraries will not be
+ * linked it when -twolevel_namespace is in effect. The primary library
+ * recorded by the static linker when resolving a symbol in these libraries
+ * will be the umbrella framework (or dynamic library). Zero or more sub_library
+ * shared libraries may be use by an umbrella framework or (or dynamic library).
+ * The name of a sub_library framework is recorded in the following structure.
+ * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc".
+ */
+struct sub_library_command {
+ uint32_t cmd; /* LC_SUB_LIBRARY */
+ uint32_t cmdsize; /* includes sub_library string */
+ union lc_str sub_library; /* the sub_library name */
+};
+
+/*
+ * A program (filetype == MH_EXECUTE) that is
+ * prebound to its dynamic libraries has one of these for each library that
+ * the static linker used in prebinding. It contains a bit vector for the
+ * modules in the library. The bits indicate which modules are bound (1) and
+ * which are not (0) from the library. The bit for module 0 is the low bit
+ * of the first byte. So the bit for the Nth module is:
+ * (linked_modules[N/8] >> N%8) & 1
+ */
+struct prebound_dylib_command {
+ uint32_t cmd; /* LC_PREBOUND_DYLIB */
+ uint32_t cmdsize; /* includes strings */
+ union lc_str name; /* library's path name */
+ uint32_t nmodules; /* number of modules in library */
+ union lc_str linked_modules; /* bit vector of linked modules */
+};
+
+/*
+ * A program that uses a dynamic linker contains a dylinker_command to identify
+ * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
+ * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
+ * A file can have at most one of these.
+ * This struct is also used for the LC_DYLD_ENVIRONMENT load command and
+ * contains string for dyld to treat like environment variable.
+ */
+struct dylinker_command {
+ uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or
+ LC_DYLD_ENVIRONMENT */
+ uint32_t cmdsize; /* includes pathname string */
+ union lc_str name; /* dynamic linker's path name */
+};
+
+/*
+ * Thread commands contain machine-specific data structures suitable for
+ * use in the thread state primitives. The machine specific data structures
+ * follow the struct thread_command as follows.
+ * Each flavor of machine specific data structure is preceded by an unsigned
+ * long constant for the flavor of that data structure, an uint32_t
+ * that is the count of longs of the size of the state data structure and then
+ * the state data structure follows. This triple may be repeated for many
+ * flavors. The constants for the flavors, counts and state data structure
+ * definitions are expected to be in the header file <machine/thread_status.h>.
+ * These machine specific data structures sizes must be multiples of
+ * 4 bytes The cmdsize reflects the total size of the thread_command
+ * and all of the sizes of the constants for the flavors, counts and state
+ * data structures.
+ *
+ * For executable objects that are unix processes there will be one
+ * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor.
+ * This is the same as a LC_THREAD, except that a stack is automatically
+ * created (based on the shell's limit for the stack size). Command arguments
+ * and environment variables are copied onto that stack.
+ */
+struct thread_command {
+ uint32_t cmd; /* LC_THREAD or LC_UNIXTHREAD */
+ uint32_t cmdsize; /* total size of this command */
+ /* uint32_t flavor flavor of thread state */
+ /* uint32_t count count of longs in thread state */
+ /* struct XXX_thread_state state thread state for this flavor */
+ /* ... */
+};
+
+/*
+ * The routines command contains the address of the dynamic shared library
+ * initialization routine and an index into the module table for the module
+ * that defines the routine. Before any modules are used from the library the
+ * dynamic linker fully binds the module that defines the initialization routine
+ * and then calls it. This gets called before any module initialization
+ * routines (used for C++ static constructors) in the library.
+ */
+struct routines_command { /* for 32-bit architectures */
+ uint32_t cmd; /* LC_ROUTINES */
+ uint32_t cmdsize; /* total size of this command */
+ uint32_t init_address; /* address of initialization routine */
+ uint32_t init_module; /* index into the module table that */
+ /* the init routine is defined in */
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t reserved3;
+ uint32_t reserved4;
+ uint32_t reserved5;
+ uint32_t reserved6;
+};
+
+/*
+ * The 64-bit routines command. Same use as above.
+ */
+struct routines_command_64 { /* for 64-bit architectures */
+ uint32_t cmd; /* LC_ROUTINES_64 */
+ uint32_t cmdsize; /* total size of this command */
+ uint64_t init_address; /* address of initialization routine */
+ uint64_t init_module; /* index into the module table that */
+ /* the init routine is defined in */
+ uint64_t reserved1;
+ uint64_t reserved2;
+ uint64_t reserved3;
+ uint64_t reserved4;
+ uint64_t reserved5;
+ uint64_t reserved6;
+};
+
+/*
+ * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
+ * "stab" style symbol table information as described in the header files
+ * <nlist.h> and <stab.h>.
+ */
+struct symtab_command {
+ uint32_t cmd; /* LC_SYMTAB */
+ uint32_t cmdsize; /* sizeof(struct symtab_command) */
+ uint32_t symoff; /* symbol table offset */
+ uint32_t nsyms; /* number of symbol table entries */
+ uint32_t stroff; /* string table offset */
+ uint32_t strsize; /* string table size in bytes */
+};
+
+/*
+ * This is the second set of the symbolic information which is used to support
+ * the data structures for the dynamically link editor.
+ *
+ * The original set of symbolic information in the symtab_command which contains
+ * the symbol and string tables must also be present when this load command is
+ * present. When this load command is present the symbol table is organized
+ * into three groups of symbols:
+ * local symbols (static and debugging symbols) - grouped by module
+ * defined external symbols - grouped by module (sorted by name if not lib)
+ * undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
+ * and in order the were seen by the static
+ * linker if MH_BINDATLOAD is set)
+ * In this load command there are offsets and counts to each of the three groups
+ * of symbols.
+ *
+ * This load command contains a the offsets and sizes of the following new
+ * symbolic information tables:
+ * table of contents
+ * module table
+ * reference symbol table
+ * indirect symbol table
+ * The first three tables above (the table of contents, module table and
+ * reference symbol table) are only present if the file is a dynamically linked
+ * shared library. For executable and object modules, which are files
+ * containing only one module, the information that would be in these three
+ * tables is determined as follows:
+ * table of contents - the defined external symbols are sorted by name
+ * module table - the file contains only one module so everything in the
+ * file is part of the module.
+ * reference symbol table - is the defined and undefined external symbols
+ *
+ * For dynamically linked shared library files this load command also contains
+ * offsets and sizes to the pool of relocation entries for all sections
+ * separated into two groups:
+ * external relocation entries
+ * local relocation entries
+ * For executable and object modules the relocation entries continue to hang
+ * off the section structures.
+ */
+struct dysymtab_command {
+ uint32_t cmd; /* LC_DYSYMTAB */
+ uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
+
+ /*
+ * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
+ * are grouped into the following three groups:
+ * local symbols (further grouped by the module they are from)
+ * defined external symbols (further grouped by the module they are from)
+ * undefined symbols
+ *
+ * The local symbols are used only for debugging. The dynamic binding
+ * process may have to use them to indicate to the debugger the local
+ * symbols for a module that is being bound.
+ *
+ * The last two groups are used by the dynamic binding process to do the
+ * binding (indirectly through the module table and the reference symbol
+ * table when this is a dynamically linked shared library file).
+ */
+ uint32_t ilocalsym; /* index to local symbols */
+ uint32_t nlocalsym; /* number of local symbols */
+
+ uint32_t iextdefsym;/* index to externally defined symbols */
+ uint32_t nextdefsym;/* number of externally defined symbols */
+
+ uint32_t iundefsym; /* index to undefined symbols */
+ uint32_t nundefsym; /* number of undefined symbols */
+
+ /*
+ * For the for the dynamic binding process to find which module a symbol
+ * is defined in the table of contents is used (analogous to the ranlib
+ * structure in an archive) which maps defined external symbols to modules
+ * they are defined in. This exists only in a dynamically linked shared
+ * library file. For executable and object modules the defined external
+ * symbols are sorted by name and is use as the table of contents.
+ */
+ uint32_t tocoff; /* file offset to table of contents */
+ uint32_t ntoc; /* number of entries in table of contents */
+
+ /*
+ * To support dynamic binding of "modules" (whole object files) the symbol
+ * table must reflect the modules that the file was created from. This is
+ * done by having a module table that has indexes and counts into the merged
+ * tables for each module. The module structure that these two entries
+ * refer to is described below. This exists only in a dynamically linked
+ * shared library file. For executable and object modules the file only
+ * contains one module so everything in the file belongs to the module.
+ */
+ uint32_t modtaboff; /* file offset to module table */
+ uint32_t nmodtab; /* number of module table entries */
+
+ /*
+ * To support dynamic module binding the module structure for each module
+ * indicates the external references (defined and undefined) each module
+ * makes. For each module there is an offset and a count into the
+ * reference symbol table for the symbols that the module references.
+ * This exists only in a dynamically linked shared library file. For
+ * executable and object modules the defined external symbols and the
+ * undefined external symbols indicates the external references.
+ */
+ uint32_t extrefsymoff; /* offset to referenced symbol table */
+ uint32_t nextrefsyms; /* number of referenced symbol table entries */
+
+ /*
+ * The sections that contain "symbol pointers" and "routine stubs" have
+ * indexes and (implied counts based on the size of the section and fixed
+ * size of the entry) into the "indirect symbol" table for each pointer
+ * and stub. For every section of these two types the index into the
+ * indirect symbol table is stored in the section header in the field
+ * reserved1. An indirect symbol table entry is simply a 32bit index into
+ * the symbol table to the symbol that the pointer or stub is referring to.
+ * The indirect symbol table is ordered to match the entries in the section.
+ */
+ uint32_t indirectsymoff; /* file offset to the indirect symbol table */
+ uint32_t nindirectsyms; /* number of indirect symbol table entries */
+
+ /*
+ * To support relocating an individual module in a library file quickly the
+ * external relocation entries for each module in the library need to be
+ * accessed efficiently. Since the relocation entries can't be accessed
+ * through the section headers for a library file they are separated into
+ * groups of local and external entries further grouped by module. In this
+ * case the presents of this load command who's extreloff, nextrel,
+ * locreloff and nlocrel fields are non-zero indicates that the relocation
+ * entries of non-merged sections are not referenced through the section
+ * structures (and the reloff and nreloc fields in the section headers are
+ * set to zero).
+ *
+ * Since the relocation entries are not accessed through the section headers
+ * this requires the r_address field to be something other than a section
+ * offset to identify the item to be relocated. In this case r_address is
+ * set to the offset from the vmaddr of the first LC_SEGMENT command.
+ * For MH_SPLIT_SEGS images r_address is set to the the offset from the
+ * vmaddr of the first read-write LC_SEGMENT command.
+ *
+ * The relocation entries are grouped by module and the module table
+ * entries have indexes and counts into them for the group of external
+ * relocation entries for that the module.
+ *
+ * For sections that are merged across modules there must not be any
+ * remaining external relocation entries for them (for merged sections
+ * remaining relocation entries must be local).
+ */
+ uint32_t extreloff; /* offset to external relocation entries */
+ uint32_t nextrel; /* number of external relocation entries */
+
+ /*
+ * All the local relocation entries are grouped together (they are not
+ * grouped by their module since they are only used if the object is moved
+ * from it staticly link edited address).
+ */
+ uint32_t locreloff; /* offset to local relocation entries */
+ uint32_t nlocrel; /* number of local relocation entries */
+
+};
+
+/*
+ * An indirect symbol table entry is simply a 32bit index into the symbol table
+ * to the symbol that the pointer or stub is refering to. Unless it is for a
+ * non-lazy symbol pointer section for a defined symbol which strip(1) as
+ * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the
+ * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that.
+ */
+#define INDIRECT_SYMBOL_LOCAL 0x80000000
+#define INDIRECT_SYMBOL_ABS 0x40000000
+
+
+/* a table of contents entry */
+struct dylib_table_of_contents {
+ uint32_t symbol_index; /* the defined external symbol
+ (index into the symbol table) */
+ uint32_t module_index; /* index into the module table this symbol
+ is defined in */
+};
+
+/* a module table entry */
+struct dylib_module {
+ uint32_t module_name; /* the module name (index into string table) */
+
+ uint32_t iextdefsym; /* index into externally defined symbols */
+ uint32_t nextdefsym; /* number of externally defined symbols */
+ uint32_t irefsym; /* index into reference symbol table */
+ uint32_t nrefsym; /* number of reference symbol table entries */
+ uint32_t ilocalsym; /* index into symbols for local symbols */
+ uint32_t nlocalsym; /* number of local symbols */
+
+ uint32_t iextrel; /* index into external relocation entries */
+ uint32_t nextrel; /* number of external relocation entries */
+
+ uint32_t iinit_iterm; /* low 16 bits are the index into the init
+ section, high 16 bits are the index into
+ the term section */
+ uint32_t ninit_nterm; /* low 16 bits are the number of init section
+ entries, high 16 bits are the number of
+ term section entries */
+
+ uint32_t /* for this module address of the start of */
+ objc_module_info_addr; /* the (__OBJC,__module_info) section */
+ uint32_t /* for this module size of */
+ objc_module_info_size; /* the (__OBJC,__module_info) section */
+};
+
+/* a 64-bit module table entry */
+struct dylib_module_64 {
+ uint32_t module_name; /* the module name (index into string table) */
+
+ uint32_t iextdefsym; /* index into externally defined symbols */
+ uint32_t nextdefsym; /* number of externally defined symbols */
+ uint32_t irefsym; /* index into reference symbol table */
+ uint32_t nrefsym; /* number of reference symbol table entries */
+ uint32_t ilocalsym; /* index into symbols for local symbols */
+ uint32_t nlocalsym; /* number of local symbols */
+
+ uint32_t iextrel; /* index into external relocation entries */
+ uint32_t nextrel; /* number of external relocation entries */
+
+ uint32_t iinit_iterm; /* low 16 bits are the index into the init
+ section, high 16 bits are the index into
+ the term section */
+ uint32_t ninit_nterm; /* low 16 bits are the number of init section
+ entries, high 16 bits are the number of
+ term section entries */
+
+ uint32_t /* for this module size of */
+ objc_module_info_size; /* the (__OBJC,__module_info) section */
+ uint64_t /* for this module address of the start of */
+ objc_module_info_addr; /* the (__OBJC,__module_info) section */
+};
+
+/*
+ * The entries in the reference symbol table are used when loading the module
+ * (both by the static and dynamic link editors) and if the module is unloaded
+ * or replaced. Therefore all external symbols (defined and undefined) are
+ * listed in the module's reference table. The flags describe the type of
+ * reference that is being made. The constants for the flags are defined in
+ * <mach-o/nlist.h> as they are also used for symbol table entries.
+ */
+struct dylib_reference {
+ uint32_t isym:24, /* index into the symbol table */
+ flags:8; /* flags to indicate the type of reference */
+};
+
+/*
+ * The twolevel_hints_command contains the offset and number of hints in the
+ * two-level namespace lookup hints table.
+ */
+struct twolevel_hints_command {
+ uint32_t cmd; /* LC_TWOLEVEL_HINTS */
+ uint32_t cmdsize; /* sizeof(struct twolevel_hints_command) */
+ uint32_t offset; /* offset to the hint table */
+ uint32_t nhints; /* number of hints in the hint table */
+};
+
+/*
+ * The entries in the two-level namespace lookup hints table are twolevel_hint
+ * structs. These provide hints to the dynamic link editor where to start
+ * looking for an undefined symbol in a two-level namespace image. The
+ * isub_image field is an index into the sub-images (sub-frameworks and
+ * sub-umbrellas list) that made up the two-level image that the undefined
+ * symbol was found in when it was built by the static link editor. If
+ * isub-image is 0 the the symbol is expected to be defined in library and not
+ * in the sub-images. If isub-image is non-zero it is an index into the array
+ * of sub-images for the umbrella with the first index in the sub-images being
+ * 1. The array of sub-images is the ordered list of sub-images of the umbrella
+ * that would be searched for a symbol that has the umbrella recorded as its
+ * primary library. The table of contents index is an index into the
+ * library's table of contents. This is used as the starting point of the
+ * binary search or a directed linear search.
+ */
+struct twolevel_hint {
+ uint32_t
+ isub_image:8, /* index into the sub images */
+ itoc:24; /* index into the table of contents */
+};
+
+/*
+ * The prebind_cksum_command contains the value of the original check sum for
+ * prebound files or zero. When a prebound file is first created or modified
+ * for other than updating its prebinding information the value of the check sum
+ * is set to zero. When the file has it prebinding re-done and if the value of
+ * the check sum is zero the original check sum is calculated and stored in
+ * cksum field of this load command in the output file. If when the prebinding
+ * is re-done and the cksum field is non-zero it is left unchanged from the
+ * input file.
+ */
+struct prebind_cksum_command {
+ uint32_t cmd; /* LC_PREBIND_CKSUM */
+ uint32_t cmdsize; /* sizeof(struct prebind_cksum_command) */
+ uint32_t cksum; /* the check sum or zero */
+};
+
+/*
+ * The uuid load command contains a single 128-bit unique random number that
+ * identifies an object produced by the static link editor.
+ */
+struct uuid_command {
+ uint32_t cmd; /* LC_UUID */
+ uint32_t cmdsize; /* sizeof(struct uuid_command) */
+ uint8_t uuid[16]; /* the 128-bit uuid */
+};
+
+/*
+ * The rpath_command contains a path which at runtime should be added to
+ * the current run path used to find @rpath prefixed dylibs.
+ */
+struct rpath_command {
+ uint32_t cmd; /* LC_RPATH */
+ uint32_t cmdsize; /* includes string */
+ union lc_str path; /* path to add to run path */
+};
+
+/*
+ * The linkedit_data_command contains the offsets and sizes of a blob
+ * of data in the __LINKEDIT segment.
+ */
+struct linkedit_data_command {
+ uint32_t cmd; /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO,
+ LC_FUNCTION_STARTS, LC_DATA_IN_CODE,
+ LC_DYLIB_CODE_SIGN_DRS or
+ LC_LINKER_OPTIMIZATION_HINT. */
+ uint32_t cmdsize; /* sizeof(struct linkedit_data_command) */
+ uint32_t dataoff; /* file offset of data in __LINKEDIT segment */
+ uint32_t datasize; /* file size of data in __LINKEDIT segment */
+};
+
+/*
+ * The encryption_info_command contains the file offset and size of an
+ * of an encrypted segment.
+ */
+struct encryption_info_command {
+ uint32_t cmd; /* LC_ENCRYPTION_INFO */
+ uint32_t cmdsize; /* sizeof(struct encryption_info_command) */
+ uint32_t cryptoff; /* file offset of encrypted range */
+ uint32_t cryptsize; /* file size of encrypted range */
+ uint32_t cryptid; /* which enryption system,
+ 0 means not-encrypted yet */
+};
+
+/*
+ * The encryption_info_command_64 contains the file offset and size of an
+ * of an encrypted segment (for use in x86_64 targets).
+ */
+struct encryption_info_command_64 {
+ uint32_t cmd; /* LC_ENCRYPTION_INFO_64 */
+ uint32_t cmdsize; /* sizeof(struct encryption_info_command_64) */
+ uint32_t cryptoff; /* file offset of encrypted range */
+ uint32_t cryptsize; /* file size of encrypted range */
+ uint32_t cryptid; /* which enryption system,
+ 0 means not-encrypted yet */
+ uint32_t pad; /* padding to make this struct's size a multiple
+ of 8 bytes */
+};
+
+/*
+ * The version_min_command contains the min OS version on which this
+ * binary was built to run.
+ */
+struct version_min_command {
+ uint32_t cmd; /* LC_VERSION_MIN_MACOSX or
+ LC_VERSION_MIN_IPHONEOS or
+ LC_VERSION_MIN_WATCHOS or
+ LC_VERSION_MIN_TVOS */
+ uint32_t cmdsize; /* sizeof(struct min_version_command) */
+ uint32_t version; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+ uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+};
+
+/*
+ * The build_version_command contains the min OS version on which this
+ * binary was built to run for its platform. The list of known platforms and
+ * tool values following it.
+ */
+struct build_version_command {
+ uint32_t cmd; /* LC_BUILD_VERSION */
+ uint32_t cmdsize; /* sizeof(struct build_version_command) plus */
+ /* ntools * sizeof(struct build_tool_version) */
+ uint32_t platform; /* platform */
+ uint32_t minos; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+ uint32_t sdk; /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+ uint32_t ntools; /* number of tool entries following this */
+};
+
+struct build_tool_version {
+ uint32_t tool; /* enum for the tool */
+ uint32_t version; /* version number of the tool */
+};
+
+/* Known values for the platform field above. */
+#define PLATFORM_MACOS 1
+#define PLATFORM_IOS 2
+#define PLATFORM_TVOS 3
+#define PLATFORM_WATCHOS 4
+
+/* Known values for the tool field above. */
+#define TOOL_CLANG 1
+#define TOOL_SWIFT 2
+#define TOOL_LD 3
+
+/*
+ * The dyld_info_command contains the file offsets and sizes of
+ * the new compressed form of the information dyld needs to
+ * load the image. This information is used by dyld on Mac OS X
+ * 10.6 and later. All information pointed to by this command
+ * is encoded using byte streams, so no endian swapping is needed
+ * to interpret it.
+ */
+struct dyld_info_command {
+ uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */
+ uint32_t cmdsize; /* sizeof(struct dyld_info_command) */
+
+ /*
+ * Dyld rebases an image whenever dyld loads it at an address different
+ * from its preferred address. The rebase information is a stream
+ * of byte sized opcodes whose symbolic names start with REBASE_OPCODE_.
+ * Conceptually the rebase information is a table of tuples:
+ * <seg-index, seg-offset, type>
+ * The opcodes are a compressed way to encode the table by only
+ * encoding when a column changes. In addition simple patterns
+ * like "every n'th offset for m times" can be encoded in a few
+ * bytes.
+ */
+ uint32_t rebase_off; /* file offset to rebase info */
+ uint32_t rebase_size; /* size of rebase info */
+
+ /*
+ * Dyld binds an image during the loading process, if the image
+ * requires any pointers to be initialized to symbols in other images.
+ * The bind information is a stream of byte sized
+ * opcodes whose symbolic names start with BIND_OPCODE_.
+ * Conceptually the bind information is a table of tuples:
+ * <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend>
+ * The opcodes are a compressed way to encode the table by only
+ * encoding when a column changes. In addition simple patterns
+ * like for runs of pointers initialzed to the same value can be
+ * encoded in a few bytes.
+ */
+ uint32_t bind_off; /* file offset to binding info */
+ uint32_t bind_size; /* size of binding info */
+
+ /*
+ * Some C++ programs require dyld to unique symbols so that all
+ * images in the process use the same copy of some code/data.
+ * This step is done after binding. The content of the weak_bind
+ * info is an opcode stream like the bind_info. But it is sorted
+ * alphabetically by symbol name. This enable dyld to walk
+ * all images with weak binding information in order and look
+ * for collisions. If there are no collisions, dyld does
+ * no updating. That means that some fixups are also encoded
+ * in the bind_info. For instance, all calls to "operator new"
+ * are first bound to libstdc++.dylib using the information
+ * in bind_info. Then if some image overrides operator new
+ * that is detected when the weak_bind information is processed
+ * and the call to operator new is then rebound.
+ */
+ uint32_t weak_bind_off; /* file offset to weak binding info */
+ uint32_t weak_bind_size; /* size of weak binding info */
+
+ /*
+ * Some uses of external symbols do not need to be bound immediately.
+ * Instead they can be lazily bound on first use. The lazy_bind
+ * are contains a stream of BIND opcodes to bind all lazy symbols.
+ * Normal use is that dyld ignores the lazy_bind section when
+ * loading an image. Instead the static linker arranged for the
+ * lazy pointer to initially point to a helper function which
+ * pushes the offset into the lazy_bind area for the symbol
+ * needing to be bound, then jumps to dyld which simply adds
+ * the offset to lazy_bind_off to get the information on what
+ * to bind.
+ */
+ uint32_t lazy_bind_off; /* file offset to lazy binding info */
+ uint32_t lazy_bind_size; /* size of lazy binding infs */
+
+ /*
+ * The symbols exported by a dylib are encoded in a trie. This
+ * is a compact representation that factors out common prefixes.
+ * It also reduces LINKEDIT pages in RAM because it encodes all
+ * information (name, address, flags) in one small, contiguous range.
+ * The export area is a stream of nodes. The first node sequentially
+ * is the start node for the trie.
+ *
+ * Nodes for a symbol start with a uleb128 that is the length of
+ * the exported symbol information for the string so far.
+ * If there is no exported symbol, the node starts with a zero byte.
+ * If there is exported info, it follows the length.
+ *
+ * First is a uleb128 containing flags. Normally, it is followed by
+ * a uleb128 encoded offset which is location of the content named
+ * by the symbol from the mach_header for the image. If the flags
+ * is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
+ * a uleb128 encoded library ordinal, then a zero terminated
+ * UTF8 string. If the string is zero length, then the symbol
+ * is re-export from the specified dylib with the same name.
+ * If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
+ * the flags is two uleb128s: the stub offset and the resolver offset.
+ * The stub is used by non-lazy pointers. The resolver is used
+ * by lazy pointers and must be called to get the actual address to use.
+ *
+ * After the optional exported symbol information is a byte of
+ * how many edges (0-255) that this node has leaving it,
+ * followed by each edge.
+ * Each edge is a zero terminated UTF8 of the addition chars
+ * in the symbol, followed by a uleb128 offset for the node that
+ * edge points to.
+ *
+ */
+ uint32_t export_off; /* file offset to lazy binding info */
+ uint32_t export_size; /* size of lazy binding infs */
+};
+
+/*
+ * The following are used to encode rebasing information
+ */
+#define REBASE_TYPE_POINTER 1
+#define REBASE_TYPE_TEXT_ABSOLUTE32 2
+#define REBASE_TYPE_TEXT_PCREL32 3
+
+#define REBASE_OPCODE_MASK 0xF0
+#define REBASE_IMMEDIATE_MASK 0x0F
+#define REBASE_OPCODE_DONE 0x00
+#define REBASE_OPCODE_SET_TYPE_IMM 0x10
+#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20
+#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30
+#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40
+#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50
+#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60
+#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70
+#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80
+
+
+/*
+ * The following are used to encode binding information
+ */
+#define BIND_TYPE_POINTER 1
+#define BIND_TYPE_TEXT_ABSOLUTE32 2
+#define BIND_TYPE_TEXT_PCREL32 3
+
+#define BIND_SPECIAL_DYLIB_SELF 0
+#define BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE -1
+#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2
+
+#define BIND_SYMBOL_FLAGS_WEAK_IMPORT 0x1
+#define BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION 0x8
+
+#define BIND_OPCODE_MASK 0xF0
+#define BIND_IMMEDIATE_MASK 0x0F
+#define BIND_OPCODE_DONE 0x00
+#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10
+#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20
+#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30
+#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40
+#define BIND_OPCODE_SET_TYPE_IMM 0x50
+#define BIND_OPCODE_SET_ADDEND_SLEB 0x60
+#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70
+#define BIND_OPCODE_ADD_ADDR_ULEB 0x80
+#define BIND_OPCODE_DO_BIND 0x90
+#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0
+#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0
+#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0
+
+
+/*
+ * The following are used on the flags byte of a terminal node
+ * in the export information.
+ */
+#define EXPORT_SYMBOL_FLAGS_KIND_MASK 0x03
+#define EXPORT_SYMBOL_FLAGS_KIND_REGULAR 0x00
+#define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL 0x01
+#define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION 0x04
+#define EXPORT_SYMBOL_FLAGS_REEXPORT 0x08
+#define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER 0x10
+
+/*
+ * The linker_option_command contains linker options embedded in object files.
+ */
+struct linker_option_command {
+ uint32_t cmd; /* LC_LINKER_OPTION only used in MH_OBJECT filetypes */
+ uint32_t cmdsize;
+ uint32_t count; /* number of strings */
+ /* concatenation of zero terminated UTF8 strings.
+ Zero filled at end to align */
+};
+
+/*
+ * The symseg_command contains the offset and size of the GNU style
+ * symbol table information as described in the header file <symseg.h>.
+ * The symbol roots of the symbol segments must also be aligned properly
+ * in the file. So the requirement of keeping the offsets aligned to a
+ * multiple of a 4 bytes translates to the length field of the symbol
+ * roots also being a multiple of a long. Also the padding must again be
+ * zeroed. (THIS IS OBSOLETE and no longer supported).
+ */
+struct symseg_command {
+ uint32_t cmd; /* LC_SYMSEG */
+ uint32_t cmdsize; /* sizeof(struct symseg_command) */
+ uint32_t offset; /* symbol segment offset */
+ uint32_t size; /* symbol segment size in bytes */
+};
+
+/*
+ * The ident_command contains a free format string table following the
+ * ident_command structure. The strings are null terminated and the size of
+ * the command is padded out with zero bytes to a multiple of 4 bytes/
+ * (THIS IS OBSOLETE and no longer supported).
+ */
+struct ident_command {
+ uint32_t cmd; /* LC_IDENT */
+ uint32_t cmdsize; /* strings that follow this command */
+};
+
+/*
+ * The fvmfile_command contains a reference to a file to be loaded at the
+ * specified virtual address. (Presently, this command is reserved for
+ * internal use. The kernel ignores this command when loading a program into
+ * memory).
+ */
+struct fvmfile_command {
+ uint32_t cmd; /* LC_FVMFILE */
+ uint32_t cmdsize; /* includes pathname string */
+ union lc_str name; /* files pathname */
+ uint32_t header_addr; /* files virtual address */
+};
+
+
+/*
+ * The entry_point_command is a replacement for thread_command.
+ * It is used for main executables to specify the location (file offset)
+ * of main(). If -stack_size was used at link time, the stacksize
+ * field will contain the stack size need for the main thread.
+ */
+struct entry_point_command {
+ uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */
+ uint32_t cmdsize; /* 24 */
+ uint64_t entryoff; /* file (__TEXT) offset of main() */
+ uint64_t stacksize;/* if not zero, initial stack size */
+};
+
+
+/*
+ * The source_version_command is an optional load command containing
+ * the version of the sources used to build the binary.
+ */
+struct source_version_command {
+ uint32_t cmd; /* LC_SOURCE_VERSION */
+ uint32_t cmdsize; /* 16 */
+ uint64_t version; /* A.B.C.D.E packed as a24.b10.c10.d10.e10 */
+};
+
+
+/*
+ * The LC_DATA_IN_CODE load commands uses a linkedit_data_command
+ * to point to an array of data_in_code_entry entries. Each entry
+ * describes a range of data in a code section.
+ */
+struct data_in_code_entry {
+ uint32_t offset; /* from mach_header to start of data range*/
+ uint16_t length; /* number of bytes in data range */
+ uint16_t kind; /* a DICE_KIND_* value */
+};
+#define DICE_KIND_DATA 0x0001
+#define DICE_KIND_JUMP_TABLE8 0x0002
+#define DICE_KIND_JUMP_TABLE16 0x0003
+#define DICE_KIND_JUMP_TABLE32 0x0004
+#define DICE_KIND_ABS_JUMP_TABLE32 0x0005
+
+
+
+/*
+ * Sections of type S_THREAD_LOCAL_VARIABLES contain an array
+ * of tlv_descriptor structures.
+ */
+struct tlv_descriptor
+{
+ void* (*thunk)(struct tlv_descriptor*);
+ unsigned long key;
+ unsigned long offset;
+};
+
+/*
+ * LC_NOTE commands describe a region of arbitrary data included in a Mach-O
+ * file. Its initial use is to record extra data in MH_CORE files.
+ */
+struct note_command {
+ uint32_t cmd; /* LC_NOTE */
+ uint32_t cmdsize; /* sizeof(struct note_command) */
+ char data_owner[16]; /* owner name for this LC_NOTE */
+ uint64_t offset; /* file offset of this data */
+ uint64_t size; /* length of data region */
+};
+
+#endif /* _MACHO_LOADER_H_ */
diff --git a/third_party/darwin_xnu_macho/mach-o/nlist.h b/third_party/darwin_xnu_macho/mach-o/nlist.h
new file mode 100644
index 0000000..133e36b
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach-o/nlist.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#ifndef _MACHO_NLIST_H_
+#define _MACHO_NLIST_H_
+/* $NetBSD: nlist.h,v 1.5 1994/10/26 00:56:11 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nlist.h 8.2 (Berkeley) 1/21/94
+ */
+#include <stdint.h>
+
+/*
+ * Format of a symbol table entry of a Mach-O file for 32-bit architectures.
+ * Modified from the BSD format. The modifications from the original format
+ * were changing n_other (an unused field) to n_sect and the addition of the
+ * N_SECT type. These modifications are required to support symbols in a larger
+ * number of sections not just the three sections (text, data and bss) in a BSD
+ * file.
+ */
+struct nlist {
+ union {
+#ifndef __LP64__
+ char *n_name; /* for use when in-core */
+#endif
+ uint32_t n_strx; /* index into the string table */
+ } n_un;
+ uint8_t n_type; /* type flag, see below */
+ uint8_t n_sect; /* section number or NO_SECT */
+ int16_t n_desc; /* see <mach-o/stab.h> */
+ uint32_t n_value; /* value of this symbol (or stab offset) */
+};
+
+/*
+ * This is the symbol table entry structure for 64-bit architectures.
+ */
+struct nlist_64 {
+ union {
+ uint32_t n_strx; /* index into the string table */
+ } n_un;
+ uint8_t n_type; /* type flag, see below */
+ uint8_t n_sect; /* section number or NO_SECT */
+ uint16_t n_desc; /* see <mach-o/stab.h> */
+ uint64_t n_value; /* value of this symbol (or stab offset) */
+};
+
+/*
+ * Symbols with a index into the string table of zero (n_un.n_strx == 0) are
+ * defined to have a null, "", name. Therefore all string indexes to non null
+ * names must not have a zero string index. This is bit historical information
+ * that has never been well documented.
+ */
+
+/*
+ * The n_type field really contains four fields:
+ * unsigned char N_STAB:3,
+ * N_PEXT:1,
+ * N_TYPE:3,
+ * N_EXT:1;
+ * which are used via the following masks.
+ */
+#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */
+#define N_PEXT 0x10 /* private external symbol bit */
+#define N_TYPE 0x0e /* mask for the type bits */
+#define N_EXT 0x01 /* external symbol bit, set for external symbols */
+
+/*
+ * Only symbolic debugging entries have some of the N_STAB bits set and if any
+ * of these bits are set then it is a symbolic debugging entry (a stab). In
+ * which case then the values of the n_type field (the entire field) are given
+ * in <mach-o/stab.h>
+ */
+
+/*
+ * Values for N_TYPE bits of the n_type field.
+ */
+#define N_UNDF 0x0 /* undefined, n_sect == NO_SECT */
+#define N_ABS 0x2 /* absolute, n_sect == NO_SECT */
+#define N_SECT 0xe /* defined in section number n_sect */
+#define N_PBUD 0xc /* prebound undefined (defined in a dylib) */
+#define N_INDR 0xa /* indirect */
+
+/*
+ * If the type is N_INDR then the symbol is defined to be the same as another
+ * symbol. In this case the n_value field is an index into the string table
+ * of the other symbol's name. When the other symbol is defined then they both
+ * take on the defined type and value.
+ */
+
+/*
+ * If the type is N_SECT then the n_sect field contains an ordinal of the
+ * section the symbol is defined in. The sections are numbered from 1 and
+ * refer to sections in order they appear in the load commands for the file
+ * they are in. This means the same ordinal may very well refer to different
+ * sections in different files.
+ *
+ * The n_value field for all symbol table entries (including N_STAB's) gets
+ * updated by the link editor based on the value of it's n_sect field and where
+ * the section n_sect references gets relocated. If the value of the n_sect
+ * field is NO_SECT then it's n_value field is not changed by the link editor.
+ */
+#define NO_SECT 0 /* symbol is not in any section */
+#define MAX_SECT 255 /* 1 thru 255 inclusive */
+
+/*
+ * Common symbols are represented by undefined (N_UNDF) external (N_EXT) types
+ * who's values (n_value) are non-zero. In which case the value of the n_value
+ * field is the size (in bytes) of the common symbol. The n_sect field is set
+ * to NO_SECT. The alignment of a common symbol may be set as a power of 2
+ * between 2^1 and 2^15 as part of the n_desc field using the macros below. If
+ * the alignment is not set (a value of zero) then natural alignment based on
+ * the size is used.
+ */
+#define GET_COMM_ALIGN(n_desc) (((n_desc) >> 8) & 0x0f)
+#define SET_COMM_ALIGN(n_desc,align) \
+ (n_desc) = (((n_desc) & 0xf0ff) | (((align) & 0x0f) << 8))
+
+/*
+ * To support the lazy binding of undefined symbols in the dynamic link-editor,
+ * the undefined symbols in the symbol table (the nlist structures) are marked
+ * with the indication if the undefined reference is a lazy reference or
+ * non-lazy reference. If both a non-lazy reference and a lazy reference is
+ * made to the same symbol the non-lazy reference takes precedence. A reference
+ * is lazy only when all references to that symbol are made through a symbol
+ * pointer in a lazy symbol pointer section.
+ *
+ * The implementation of marking nlist structures in the symbol table for
+ * undefined symbols will be to use some of the bits of the n_desc field as a
+ * reference type. The mask REFERENCE_TYPE will be applied to the n_desc field
+ * of an nlist structure for an undefined symbol to determine the type of
+ * undefined reference (lazy or non-lazy).
+ *
+ * The constants for the REFERENCE FLAGS are propagated to the reference table
+ * in a shared library file. In that case the constant for a defined symbol,
+ * REFERENCE_FLAG_DEFINED, is also used.
+ */
+/* Reference type bits of the n_desc field of undefined symbols */
+#define REFERENCE_TYPE 0x7
+/* types of references */
+#define REFERENCE_FLAG_UNDEFINED_NON_LAZY 0
+#define REFERENCE_FLAG_UNDEFINED_LAZY 1
+#define REFERENCE_FLAG_DEFINED 2
+#define REFERENCE_FLAG_PRIVATE_DEFINED 3
+#define REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY 4
+#define REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY 5
+
+/*
+ * To simplify stripping of objects that use are used with the dynamic link
+ * editor, the static link editor marks the symbols defined an object that are
+ * referenced by a dynamicly bound object (dynamic shared libraries, bundles).
+ * With this marking strip knows not to strip these symbols.
+ */
+#define REFERENCED_DYNAMICALLY 0x0010
+
+/*
+ * For images created by the static link editor with the -twolevel_namespace
+ * option in effect the flags field of the mach header is marked with
+ * MH_TWOLEVEL. And the binding of the undefined references of the image are
+ * determined by the static link editor. Which library an undefined symbol is
+ * bound to is recorded by the static linker in the high 8 bits of the n_desc
+ * field using the SET_LIBRARY_ORDINAL macro below. The ordinal recorded
+ * references the libraries listed in the Mach-O's LC_LOAD_DYLIB,
+ * LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB, and
+ * LC_LAZY_LOAD_DYLIB, etc. load commands in the order they appear in the
+ * headers. The library ordinals start from 1.
+ * For a dynamic library that is built as a two-level namespace image the
+ * undefined references from module defined in another use the same nlist struct
+ * an in that case SELF_LIBRARY_ORDINAL is used as the library ordinal. For
+ * defined symbols in all images they also must have the library ordinal set to
+ * SELF_LIBRARY_ORDINAL. The EXECUTABLE_ORDINAL refers to the executable
+ * image for references from plugins that refer to the executable that loads
+ * them.
+ *
+ * The DYNAMIC_LOOKUP_ORDINAL is for undefined symbols in a two-level namespace
+ * image that are looked up by the dynamic linker with flat namespace semantics.
+ * This ordinal was added as a feature in Mac OS X 10.3 by reducing the
+ * value of MAX_LIBRARY_ORDINAL by one. So it is legal for existing binaries
+ * or binaries built with older tools to have 0xfe (254) dynamic libraries. In
+ * this case the ordinal value 0xfe (254) must be treated as a library ordinal
+ * for compatibility.
+ */
+#define GET_LIBRARY_ORDINAL(n_desc) (((n_desc) >> 8) & 0xff)
+#define SET_LIBRARY_ORDINAL(n_desc,ordinal) \
+ (n_desc) = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8))
+#define SELF_LIBRARY_ORDINAL 0x0
+#define MAX_LIBRARY_ORDINAL 0xfd
+#define DYNAMIC_LOOKUP_ORDINAL 0xfe
+#define EXECUTABLE_ORDINAL 0xff
+
+/*
+ * The bit 0x0020 of the n_desc field is used for two non-overlapping purposes
+ * and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED.
+ */
+
+/*
+ * The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a
+ * relocatable .o file (MH_OBJECT filetype). And is used to indicate to the
+ * static link editor it is never to dead strip the symbol.
+ */
+#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */
+
+/*
+ * The N_DESC_DISCARDED bit of the n_desc field never appears in linked image.
+ * But is used in very rare cases by the dynamic link editor to mark an in
+ * memory symbol as discared and longer used for linking.
+ */
+#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */
+
+/*
+ * The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that
+ * the undefined symbol is allowed to be missing and is to have the address of
+ * zero when missing.
+ */
+#define N_WEAK_REF 0x0040 /* symbol is weak referenced */
+
+/*
+ * The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic
+ * linkers that the symbol definition is weak, allowing a non-weak symbol to
+ * also be used which causes the weak definition to be discared. Currently this
+ * is only supported for symbols in coalesed sections.
+ */
+#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */
+
+/*
+ * The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker
+ * that the undefined symbol should be resolved using flat namespace searching.
+ */
+#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */
+
+/*
+ * The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is
+ * a defintion of a Thumb function.
+ */
+#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */
+
+/*
+ * The N_SYMBOL_RESOLVER bit of the n_desc field indicates that the
+ * that the function is actually a resolver function and should
+ * be called to get the address of the real function to use.
+ * This bit is only available in .o files (MH_OBJECT filetype)
+ */
+#define N_SYMBOL_RESOLVER 0x0100
+
+/*
+ * The N_ALT_ENTRY bit of the n_desc field indicates that the
+ * symbol is pinned to the previous content.
+ */
+#define N_ALT_ENTRY 0x0200
+
+#ifndef __STRICT_BSD__
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+/*
+ * The function nlist(3) from the C library.
+ */
+extern int nlist (const char *filename, struct nlist *list);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* __STRICT_BSD__ */
+
+#endif /* _MACHO_LIST_H_ */
diff --git a/third_party/darwin_xnu_macho/mach-o/reloc.h b/third_party/darwin_xnu_macho/mach-o/reloc.h
new file mode 100644
index 0000000..d5741ef
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach-o/reloc.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* $NetBSD: exec.h,v 1.6 1994/10/27 04:16:05 cgd Exp $ */
+
+/*
+ * Copyright (c) 1993 Christopher G. Demetriou
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MACHO_RELOC_H_
+#define _MACHO_RELOC_H_
+#include <stdint.h>
+
+/*
+ * Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD
+ * format. The modifications from the original format were changing the value
+ * of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
+ * This modification is required to support symbols in an arbitrary number of
+ * sections not just the three sections (text, data and bss) in a 4.3BSD file.
+ * Also the last 4 bits have had the r_type tag added to them.
+ */
+struct relocation_info {
+ int32_t r_address; /* offset in the section to what is being
+ relocated */
+ uint32_t r_symbolnum:24, /* symbol index if r_extern == 1 or section
+ ordinal if r_extern == 0 */
+ r_pcrel:1, /* was relocated pc relative already */
+ r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
+ r_extern:1, /* does not include value of sym referenced */
+ r_type:4; /* if not 0, machine specific relocation type */
+};
+#define R_ABS 0 /* absolute relocation type for Mach-O files */
+
+/*
+ * The r_address is not really the address as it's name indicates but an offset.
+ * In 4.3BSD a.out objects this offset is from the start of the "segment" for
+ * which relocation entry is for (text or data). For Mach-O object files it is
+ * also an offset but from the start of the "section" for which the relocation
+ * entry is for. See comments in <mach-o/loader.h> about the r_address feild
+ * in images for used with the dynamic linker.
+ *
+ * In 4.3BSD a.out objects if r_extern is zero then r_symbolnum is an ordinal
+ * for the segment the symbol being relocated is in. These ordinals are the
+ * symbol types N_TEXT, N_DATA, N_BSS or N_ABS. In Mach-O object files these
+ * ordinals refer to the sections in the object file in the order their section
+ * structures appear in the headers of the object file they are in. The first
+ * section has the ordinal 1, the second 2, and so on. This means that the
+ * same ordinal in two different object files could refer to two different
+ * sections. And further could have still different ordinals when combined
+ * by the link-editor. The value R_ABS is used for relocation entries for
+ * absolute symbols which need no further relocation.
+ */
+
+/*
+ * For RISC machines some of the references are split across two instructions
+ * and the instruction does not contain the complete value of the reference.
+ * In these cases a second, or paired relocation entry, follows each of these
+ * relocation entries, using a PAIR r_type, which contains the other part of the
+ * reference not contained in the instruction. This other part is stored in the
+ * pair's r_address field. The exact number of bits of the other part of the
+ * reference store in the r_address field is dependent on the particular
+ * relocation type for the particular architecture.
+ */
+
+/*
+ * To make scattered loading by the link editor work correctly "local"
+ * relocation entries can't be used when the item to be relocated is the value
+ * of a symbol plus an offset (where the resulting expresion is outside the
+ * block the link editor is moving, a blocks are divided at symbol addresses).
+ * In this case. where the item is a symbol value plus offset, the link editor
+ * needs to know more than just the section the symbol was defined. What is
+ * needed is the actual value of the symbol without the offset so it can do the
+ * relocation correctly based on where the value of the symbol got relocated to
+ * not the value of the expression (with the offset added to the symbol value).
+ * So for the NeXT 2.0 release no "local" relocation entries are ever used when
+ * there is a non-zero offset added to a symbol. The "external" and "local"
+ * relocation entries remain unchanged.
+ *
+ * The implemention is quite messy given the compatibility with the existing
+ * relocation entry format. The ASSUMPTION is that a section will never be
+ * bigger than 2**24 - 1 (0x00ffffff or 16,777,215) bytes. This assumption
+ * allows the r_address (which is really an offset) to fit in 24 bits and high
+ * bit of the r_address field in the relocation_info structure to indicate
+ * it is really a scattered_relocation_info structure. Since these are only
+ * used in places where "local" relocation entries are used and not where
+ * "external" relocation entries are used the r_extern field has been removed.
+ *
+ * For scattered loading to work on a RISC machine where some of the references
+ * are split across two instructions the link editor needs to be assured that
+ * each reference has a unique 32 bit reference (that more than one reference is
+ * NOT sharing the same high 16 bits for example) so it move each referenced
+ * item independent of each other. Some compilers guarantees this but the
+ * compilers don't so scattered loading can be done on those that do guarantee
+ * this.
+ */
+#if defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__)
+/*
+ * The reason for the ifdef's of __BIG_ENDIAN__ and __LITTLE_ENDIAN__ are that
+ * when stattered relocation entries were added the mistake of using a mask
+ * against a structure that is made up of bit fields was used. To make this
+ * design work this structure must be laid out in memory the same way so the
+ * mask can be applied can check the same bit each time (r_scattered).
+ */
+#endif /* defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__) */
+#define R_SCATTERED 0x80000000 /* mask to be applied to the r_address field
+ of a relocation_info structure to tell that
+ is is really a scattered_relocation_info
+ stucture */
+struct scattered_relocation_info {
+#ifdef __BIG_ENDIAN__
+ uint32_t r_scattered:1, /* 1=scattered, 0=non-scattered (see above) */
+ r_pcrel:1, /* was relocated pc relative already */
+ r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
+ r_type:4, /* if not 0, machine specific relocation type */
+ r_address:24; /* offset in the section to what is being
+ relocated */
+ int32_t r_value; /* the value the item to be relocated is
+ refering to (without any offset added) */
+#endif /* __BIG_ENDIAN__ */
+#ifdef __LITTLE_ENDIAN__
+ uint32_t
+ r_address:24, /* offset in the section to what is being
+ relocated */
+ r_type:4, /* if not 0, machine specific relocation type */
+ r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
+ r_pcrel:1, /* was relocated pc relative already */
+ r_scattered:1; /* 1=scattered, 0=non-scattered (see above) */
+ int32_t r_value; /* the value the item to be relocated is
+ refering to (without any offset added) */
+#endif /* __LITTLE_ENDIAN__ */
+};
+
+/*
+ * Relocation types used in a generic implementation. Relocation entries for
+ * normal things use the generic relocation as discribed above and their r_type
+ * is GENERIC_RELOC_VANILLA (a value of zero).
+ *
+ * Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support
+ * the difference of two symbols defined in different sections. That is the
+ * expression "symbol1 - symbol2 + constant" is a relocatable expression when
+ * both symbols are defined in some section. For this type of relocation the
+ * both relocations entries are scattered relocation entries. The value of
+ * symbol1 is stored in the first relocation entry's r_value field and the
+ * value of symbol2 is stored in the pair's r_value field.
+ *
+ * A special case for a prebound lazy pointer is needed to beable to set the
+ * value of the lazy pointer back to its non-prebound state. This is done
+ * using the GENERIC_RELOC_PB_LA_PTR r_type. This is a scattered relocation
+ * entry where the r_value feild is the value of the lazy pointer not prebound.
+ */
+enum reloc_type_generic
+{
+ GENERIC_RELOC_VANILLA, /* generic relocation as discribed above */
+ GENERIC_RELOC_PAIR, /* Only follows a GENERIC_RELOC_SECTDIFF */
+ GENERIC_RELOC_SECTDIFF,
+ GENERIC_RELOC_PB_LA_PTR, /* prebound lazy pointer */
+ GENERIC_RELOC_LOCAL_SECTDIFF,
+ GENERIC_RELOC_TLV /* thread local variables */
+};
+
+#endif /* _MACHO_RELOC_H_ */
diff --git a/third_party/darwin_xnu_macho/mach/machine.h b/third_party/darwin_xnu_macho/mach/machine.h
new file mode 100644
index 0000000..f8561a7
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach/machine.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2007-2016 Apple, Inc. All rights reserved.
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/* File: machine.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1986
+ *
+ * Machine independent machine abstraction.
+ */
+
+#ifndef _MACH_MACHINE_H_
+#define _MACH_MACHINE_H_
+
+#ifndef __ASSEMBLER__
+
+#include <stdint.h>
+
+typedef int32_t integer_t;
+
+typedef integer_t cpu_type_t;
+typedef integer_t cpu_subtype_t;
+typedef integer_t cpu_threadtype_t;
+
+#define CPU_STATE_MAX 4
+
+#define CPU_STATE_USER 0
+#define CPU_STATE_SYSTEM 1
+#define CPU_STATE_IDLE 2
+#define CPU_STATE_NICE 3
+
+
+/*
+ * Capability bits used in the definition of cpu_type.
+ */
+#define CPU_ARCH_MASK 0xff000000 /* mask for architecture bits */
+#define CPU_ARCH_ABI64 0x01000000 /* 64 bit ABI */
+
+/*
+ * Machine types known by all.
+ */
+
+#define CPU_TYPE_ANY ((cpu_type_t) -1)
+
+#define CPU_TYPE_VAX ((cpu_type_t) 1)
+/* skip ((cpu_type_t) 2) */
+/* skip ((cpu_type_t) 3) */
+/* skip ((cpu_type_t) 4) */
+/* skip ((cpu_type_t) 5) */
+#define CPU_TYPE_MC680x0 ((cpu_type_t) 6)
+#define CPU_TYPE_X86 ((cpu_type_t) 7)
+#define CPU_TYPE_I386 CPU_TYPE_X86 /* compatibility */
+#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64)
+
+/* skip CPU_TYPE_MIPS ((cpu_type_t) 8) */
+/* skip ((cpu_type_t) 9) */
+#define CPU_TYPE_MC98000 ((cpu_type_t) 10)
+#define CPU_TYPE_HPPA ((cpu_type_t) 11)
+#define CPU_TYPE_ARM ((cpu_type_t) 12)
+#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
+#define CPU_TYPE_MC88000 ((cpu_type_t) 13)
+#define CPU_TYPE_SPARC ((cpu_type_t) 14)
+#define CPU_TYPE_I860 ((cpu_type_t) 15)
+/* skip CPU_TYPE_ALPHA ((cpu_type_t) 16) */
+/* skip ((cpu_type_t) 17) */
+#define CPU_TYPE_POWERPC ((cpu_type_t) 18)
+#define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64)
+
+/*
+ * Machine subtypes (these are defined here, instead of in a machine
+ * dependent directory, so that any program can get all definitions
+ * regardless of where is it compiled).
+ */
+
+/*
+ * Capability bits used in the definition of cpu_subtype.
+ */
+#define CPU_SUBTYPE_MASK 0xff000000 /* mask for feature flags */
+#define CPU_SUBTYPE_LIB64 0x80000000 /* 64 bit libraries */
+
+
+/*
+ * Object files that are hand-crafted to run on any
+ * implementation of an architecture are tagged with
+ * CPU_SUBTYPE_MULTIPLE. This functions essentially the same as
+ * the "ALL" subtype of an architecture except that it allows us
+ * to easily find object files that may need to be modified
+ * whenever a new implementation of an architecture comes out.
+ *
+ * It is the responsibility of the implementor to make sure the
+ * software handles unsupported implementations elegantly.
+ */
+#define CPU_SUBTYPE_MULTIPLE ((cpu_subtype_t) -1)
+#define CPU_SUBTYPE_LITTLE_ENDIAN ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_BIG_ENDIAN ((cpu_subtype_t) 1)
+
+/*
+ * Machine threadtypes.
+ * This is none - not defined - for most machine types/subtypes.
+ */
+#define CPU_THREADTYPE_NONE ((cpu_threadtype_t) 0)
+
+/*
+ * VAX subtypes (these do *not* necessary conform to the actual cpu
+ * ID assigned by DEC available via the SID register).
+ */
+
+#define CPU_SUBTYPE_VAX_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_VAX780 ((cpu_subtype_t) 1)
+#define CPU_SUBTYPE_VAX785 ((cpu_subtype_t) 2)
+#define CPU_SUBTYPE_VAX750 ((cpu_subtype_t) 3)
+#define CPU_SUBTYPE_VAX730 ((cpu_subtype_t) 4)
+#define CPU_SUBTYPE_UVAXI ((cpu_subtype_t) 5)
+#define CPU_SUBTYPE_UVAXII ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_VAX8200 ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_VAX8500 ((cpu_subtype_t) 8)
+#define CPU_SUBTYPE_VAX8600 ((cpu_subtype_t) 9)
+#define CPU_SUBTYPE_VAX8650 ((cpu_subtype_t) 10)
+#define CPU_SUBTYPE_VAX8800 ((cpu_subtype_t) 11)
+#define CPU_SUBTYPE_UVAXIII ((cpu_subtype_t) 12)
+
+/*
+ * 680x0 subtypes
+ *
+ * The subtype definitions here are unusual for historical reasons.
+ * NeXT used to consider 68030 code as generic 68000 code. For
+ * backwards compatability:
+ *
+ * CPU_SUBTYPE_MC68030 symbol has been preserved for source code
+ * compatability.
+ *
+ * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same
+ * subtype as CPU_SUBTYPE_MC68030 for binary comatability.
+ *
+ * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object
+ * files to be tagged as containing 68030-specific instructions.
+ */
+
+#define CPU_SUBTYPE_MC680x0_ALL ((cpu_subtype_t) 1)
+#define CPU_SUBTYPE_MC68030 ((cpu_subtype_t) 1) /* compat */
+#define CPU_SUBTYPE_MC68040 ((cpu_subtype_t) 2)
+#define CPU_SUBTYPE_MC68030_ONLY ((cpu_subtype_t) 3)
+
+/*
+ * I386 subtypes
+ */
+
+#define CPU_SUBTYPE_INTEL(f, m) ((cpu_subtype_t) (f) + ((m) << 4))
+
+#define CPU_SUBTYPE_I386_ALL CPU_SUBTYPE_INTEL(3, 0)
+#define CPU_SUBTYPE_386 CPU_SUBTYPE_INTEL(3, 0)
+#define CPU_SUBTYPE_486 CPU_SUBTYPE_INTEL(4, 0)
+#define CPU_SUBTYPE_486SX CPU_SUBTYPE_INTEL(4, 8) // 8 << 4 = 128
+#define CPU_SUBTYPE_586 CPU_SUBTYPE_INTEL(5, 0)
+#define CPU_SUBTYPE_PENT CPU_SUBTYPE_INTEL(5, 0)
+#define CPU_SUBTYPE_PENTPRO CPU_SUBTYPE_INTEL(6, 1)
+#define CPU_SUBTYPE_PENTII_M3 CPU_SUBTYPE_INTEL(6, 3)
+#define CPU_SUBTYPE_PENTII_M5 CPU_SUBTYPE_INTEL(6, 5)
+#define CPU_SUBTYPE_CELERON CPU_SUBTYPE_INTEL(7, 6)
+#define CPU_SUBTYPE_CELERON_MOBILE CPU_SUBTYPE_INTEL(7, 7)
+#define CPU_SUBTYPE_PENTIUM_3 CPU_SUBTYPE_INTEL(8, 0)
+#define CPU_SUBTYPE_PENTIUM_3_M CPU_SUBTYPE_INTEL(8, 1)
+#define CPU_SUBTYPE_PENTIUM_3_XEON CPU_SUBTYPE_INTEL(8, 2)
+#define CPU_SUBTYPE_PENTIUM_M CPU_SUBTYPE_INTEL(9, 0)
+#define CPU_SUBTYPE_PENTIUM_4 CPU_SUBTYPE_INTEL(10, 0)
+#define CPU_SUBTYPE_PENTIUM_4_M CPU_SUBTYPE_INTEL(10, 1)
+#define CPU_SUBTYPE_ITANIUM CPU_SUBTYPE_INTEL(11, 0)
+#define CPU_SUBTYPE_ITANIUM_2 CPU_SUBTYPE_INTEL(11, 1)
+#define CPU_SUBTYPE_XEON CPU_SUBTYPE_INTEL(12, 0)
+#define CPU_SUBTYPE_XEON_MP CPU_SUBTYPE_INTEL(12, 1)
+
+#define CPU_SUBTYPE_INTEL_FAMILY(x) ((x) & 15)
+#define CPU_SUBTYPE_INTEL_FAMILY_MAX 15
+
+#define CPU_SUBTYPE_INTEL_MODEL(x) ((x) >> 4)
+#define CPU_SUBTYPE_INTEL_MODEL_ALL 0
+
+/*
+ * X86 subtypes.
+ */
+
+#define CPU_SUBTYPE_X86_ALL ((cpu_subtype_t)3)
+#define CPU_SUBTYPE_X86_64_ALL ((cpu_subtype_t)3)
+#define CPU_SUBTYPE_X86_ARCH1 ((cpu_subtype_t)4)
+#define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell feature subset */
+
+
+#define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1)
+
+/*
+ * Mips subtypes.
+ */
+
+#define CPU_SUBTYPE_MIPS_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_MIPS_R2300 ((cpu_subtype_t) 1)
+#define CPU_SUBTYPE_MIPS_R2600 ((cpu_subtype_t) 2)
+#define CPU_SUBTYPE_MIPS_R2800 ((cpu_subtype_t) 3)
+#define CPU_SUBTYPE_MIPS_R2000a ((cpu_subtype_t) 4) /* pmax */
+#define CPU_SUBTYPE_MIPS_R2000 ((cpu_subtype_t) 5)
+#define CPU_SUBTYPE_MIPS_R3000a ((cpu_subtype_t) 6) /* 3max */
+#define CPU_SUBTYPE_MIPS_R3000 ((cpu_subtype_t) 7)
+
+/*
+ * MC98000 (PowerPC) subtypes
+ */
+#define CPU_SUBTYPE_MC98000_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_MC98601 ((cpu_subtype_t) 1)
+
+/*
+ * HPPA subtypes for Hewlett-Packard HP-PA family of
+ * risc processors. Port by NeXT to 700 series.
+ */
+
+#define CPU_SUBTYPE_HPPA_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_HPPA_7100 ((cpu_subtype_t) 0) /* compat */
+#define CPU_SUBTYPE_HPPA_7100LC ((cpu_subtype_t) 1)
+
+/*
+ * MC88000 subtypes.
+ */
+#define CPU_SUBTYPE_MC88000_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_MC88100 ((cpu_subtype_t) 1)
+#define CPU_SUBTYPE_MC88110 ((cpu_subtype_t) 2)
+
+/*
+ * SPARC subtypes
+ */
+#define CPU_SUBTYPE_SPARC_ALL ((cpu_subtype_t) 0)
+
+/*
+ * I860 subtypes
+ */
+#define CPU_SUBTYPE_I860_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_I860_860 ((cpu_subtype_t) 1)
+
+/*
+ * PowerPC subtypes
+ */
+#define CPU_SUBTYPE_POWERPC_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_POWERPC_601 ((cpu_subtype_t) 1)
+#define CPU_SUBTYPE_POWERPC_602 ((cpu_subtype_t) 2)
+#define CPU_SUBTYPE_POWERPC_603 ((cpu_subtype_t) 3)
+#define CPU_SUBTYPE_POWERPC_603e ((cpu_subtype_t) 4)
+#define CPU_SUBTYPE_POWERPC_603ev ((cpu_subtype_t) 5)
+#define CPU_SUBTYPE_POWERPC_604 ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_POWERPC_604e ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_POWERPC_620 ((cpu_subtype_t) 8)
+#define CPU_SUBTYPE_POWERPC_750 ((cpu_subtype_t) 9)
+#define CPU_SUBTYPE_POWERPC_7400 ((cpu_subtype_t) 10)
+#define CPU_SUBTYPE_POWERPC_7450 ((cpu_subtype_t) 11)
+#define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100)
+
+/*
+ * ARM subtypes
+ */
+#define CPU_SUBTYPE_ARM_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_ARM_V4T ((cpu_subtype_t) 5)
+#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8)
+#define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9)
+#define CPU_SUBTYPE_ARM_V7F ((cpu_subtype_t) 10) /* Cortex A9 */
+#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) /* Swift */
+#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12)
+#define CPU_SUBTYPE_ARM_V6M ((cpu_subtype_t) 14) /* Not meant to be run under xnu */
+#define CPU_SUBTYPE_ARM_V7M ((cpu_subtype_t) 15) /* Not meant to be run under xnu */
+#define CPU_SUBTYPE_ARM_V7EM ((cpu_subtype_t) 16) /* Not meant to be run under xnu */
+
+#define CPU_SUBTYPE_ARM_V8 ((cpu_subtype_t) 13)
+
+/*
+ * ARM64 subtypes
+ */
+#define CPU_SUBTYPE_ARM64_ALL ((cpu_subtype_t) 0)
+#define CPU_SUBTYPE_ARM64_V8 ((cpu_subtype_t) 1)
+
+#endif /* !__ASSEMBLER__ */
+
+/*
+ * CPU families (sysctl hw.cpufamily)
+ *
+ * These are meant to identify the CPU's marketing name - an
+ * application can map these to (possibly) localized strings.
+ * NB: the encodings of the CPU families are intentionally arbitrary.
+ * There is no ordering, and you should never try to deduce whether
+ * or not some feature is available based on the family.
+ * Use feature flags (eg, hw.optional.altivec) to test for optional
+ * functionality.
+ */
+#define CPUFAMILY_UNKNOWN 0
+#define CPUFAMILY_POWERPC_G3 0xcee41549
+#define CPUFAMILY_POWERPC_G4 0x77c184ae
+#define CPUFAMILY_POWERPC_G5 0xed76d8aa
+#define CPUFAMILY_INTEL_6_13 0xaa33392b
+#define CPUFAMILY_INTEL_PENRYN 0x78ea4fbc
+#define CPUFAMILY_INTEL_NEHALEM 0x6b5a4cd2
+#define CPUFAMILY_INTEL_WESTMERE 0x573b5eec
+#define CPUFAMILY_INTEL_SANDYBRIDGE 0x5490b78c
+#define CPUFAMILY_INTEL_IVYBRIDGE 0x1f65e835
+#define CPUFAMILY_INTEL_HASWELL 0x10b282dc
+#define CPUFAMILY_INTEL_BROADWELL 0x582ed09c
+#define CPUFAMILY_INTEL_SKYLAKE 0x37fc219f
+#define CPUFAMILY_INTEL_KABYLAKE 0x0f817246
+#define CPUFAMILY_ARM_9 0xe73283ae
+#define CPUFAMILY_ARM_11 0x8ff620d8
+#define CPUFAMILY_ARM_XSCALE 0x53b005f5
+#define CPUFAMILY_ARM_12 0xbd1b0ae9
+#define CPUFAMILY_ARM_13 0x0cc90e64
+#define CPUFAMILY_ARM_14 0x96077ef1
+#define CPUFAMILY_ARM_15 0xa8511bca
+#define CPUFAMILY_ARM_SWIFT 0x1e2d6381
+#define CPUFAMILY_ARM_CYCLONE 0x37a09642
+#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
+#define CPUFAMILY_ARM_TWISTER 0x92fb37c8
+#define CPUFAMILY_ARM_HURRICANE 0x67ceee93
+
+/* The following synonyms are deprecated: */
+#define CPUFAMILY_INTEL_6_23 CPUFAMILY_INTEL_PENRYN
+#define CPUFAMILY_INTEL_6_26 CPUFAMILY_INTEL_NEHALEM
+
+
+#endif /* _MACH_MACHINE_H_ */
diff --git a/third_party/darwin_xnu_macho/mach/vm_prot.h b/third_party/darwin_xnu_macho/mach/vm_prot.h
new file mode 100644
index 0000000..6998a31
--- /dev/null
+++ b/third_party/darwin_xnu_macho/mach/vm_prot.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ */
+/*
+ * File: mach/vm_prot.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Virtual memory protection definitions.
+ *
+ */
+
+#ifndef _MACH_VM_PROT_H_
+#define _MACH_VM_PROT_H_
+
+/*
+ * Types defined:
+ *
+ * vm_prot_t VM protection values.
+ */
+
+typedef int vm_prot_t;
+
+/*
+ * Protection values, defined as bits within the vm_prot_t type
+ */
+
+#define VM_PROT_NONE ((vm_prot_t) 0x00)
+
+#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */
+#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */
+#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */
+
+/*
+ * The default protection for newly-created virtual memory
+ */
+
+#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE)
+
+/*
+ * The maximum privileges possible, for parameter checking.
+ */
+
+#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+/*
+ * An invalid protection value.
+ * Used only by memory_object_lock_request to indicate no change
+ * to page locks. Using -1 here is a bad idea because it
+ * looks like VM_PROT_ALL and then some.
+ */
+
+#define VM_PROT_NO_CHANGE ((vm_prot_t) 0x08)
+
+/*
+ * When a caller finds that he cannot obtain write permission on a
+ * mapped entry, the following flag can be used. The entry will
+ * be made "needs copy" effectively copying the object (using COW),
+ * and write permission will be added to the maximum protections
+ * for the associated entry.
+ */
+
+#define VM_PROT_COPY ((vm_prot_t) 0x10)
+
+
+/*
+ * Another invalid protection value.
+ * Used only by memory_object_data_request upon an object
+ * which has specified a copy_call copy strategy. It is used
+ * when the kernel wants a page belonging to a copy of the
+ * object, and is only asking the object as a result of
+ * following a shadow chain. This solves the race between pages
+ * being pushed up by the memory manager and the kernel
+ * walking down the shadow chain.
+ */
+
+#define VM_PROT_WANTS_COPY ((vm_prot_t) 0x10)
+
+#ifdef PRIVATE
+/*
+ * The caller wants this memory region treated as if it had a valid
+ * code signature.
+ */
+
+#define VM_PROT_TRUSTED ((vm_prot_t) 0x20)
+#endif /* PRIVATE */
+
+/*
+ * Another invalid protection value.
+ * Indicates that the other protection bits are to be applied as a mask
+ * against the actual protection bits of the map entry.
+ */
+#define VM_PROT_IS_MASK ((vm_prot_t) 0x40)
+
+/*
+ * Another invalid protection value to support execute-only protection.
+ * VM_PROT_STRIP_READ is a special marker that tells mprotect to not
+ * set VM_PROT_READ. We have to do it this way because existing code
+ * expects the system to set VM_PROT_READ if VM_PROT_EXECUTE is set.
+ * VM_PROT_EXECUTE_ONLY is just a convenience value to indicate that
+ * the memory should be executable and explicitly not readable. It will
+ * be ignored on platforms that do not support this type of protection.
+ */
+#define VM_PROT_STRIP_READ ((vm_prot_t) 0x80)
+#define VM_PROT_EXECUTE_ONLY (VM_PROT_EXECUTE|VM_PROT_STRIP_READ)
+
+#endif /* _MACH_VM_PROT_H_ */
diff --git a/third_party/demumble b/third_party/demumble
new file mode 160000
+Subproject 01098eab821b33bd31b9778aea38565cd796aa8
diff --git a/third_party/freebsd_elf/LICENSE b/third_party/freebsd_elf/LICENSE
new file mode 100644
index 0000000..94046dd
--- /dev/null
+++ b/third_party/freebsd_elf/LICENSE
@@ -0,0 +1,126 @@
+# $FreeBSD$
+# @(#)COPYRIGHT 8.2 (Berkeley) 3/21/94
+
+The compilation of software known as FreeBSD is distributed under the
+following terms:
+
+Copyright (c) 1992-2016 The FreeBSD Project. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+The 4.4BSD and 4.4BSD-Lite software is distributed under the following
+terms:
+
+All of the documentation and software included in the 4.4BSD and 4.4BSD-Lite
+Releases is copyrighted by The Regents of the University of California.
+
+Copyright 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
+ The Regents of the University of California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+ must display the following acknowledgement:
+This product includes software developed by the University of
+California, Berkeley and its contributors.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+The Institute of Electrical and Electronics Engineers and the American
+National Standards Committee X3, on Information Processing Systems have
+given us permission to reprint portions of their documentation.
+
+In the following statement, the phrase ``this text'' refers to portions
+of the system documentation.
+
+Portions of this text are reprinted and reproduced in electronic form in
+the second BSD Networking Software Release, from IEEE Std 1003.1-1988, IEEE
+Standard Portable Operating System Interface for Computer Environments
+(POSIX), copyright C 1988 by the Institute of Electrical and Electronics
+Engineers, Inc. In the event of any discrepancy between these versions
+and the original IEEE Standard, the original IEEE Standard is the referee
+document.
+
+In the following statement, the phrase ``This material'' refers to portions
+of the system documentation.
+
+This material is reproduced with permission from American National
+Standards Committee X3, on Information Processing Systems. Computer and
+Business Equipment Manufacturers Association (CBEMA), 311 First St., NW,
+Suite 500, Washington, DC 20001-2178. The developmental work of
+Programming Language C was completed by the X3J11 Technical Committee.
+
+The views and conclusions contained in the software and documentation are
+those of the authors and should not be interpreted as representing official
+policies, either expressed or implied, of the Regents of the University
+of California.
+
+
+NOTE: The copyright of UC Berkeley's Berkeley Software Distribution ("BSD")
+source has been updated. The copyright addendum may be found at
+ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change and is
+included below.
+
+July 22, 1999
+
+To All Licensees, Distributors of Any Version of BSD:
+
+As you know, certain of the Berkeley Software Distribution ("BSD") source
+code files require that further distributions of products containing all or
+portions of the software, acknowledge within their advertising materials
+that such products contain software developed by UC Berkeley and its
+contributors.
+
+Specifically, the provision reads:
+
+" * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors."
+
+Effective immediately, licensees and distributors are no longer required to
+include the acknowledgement within advertising materials. Accordingly, the
+foregoing paragraph of those BSD Unix files containing it is hereby deleted
+in its entirety.
+
+William Hoskins
+Director, Office of Technology Licensing
+University of California, Berkeley
diff --git a/third_party/freebsd_elf/METADATA b/third_party/freebsd_elf/METADATA
new file mode 100644
index 0000000..e006afc
--- /dev/null
+++ b/third_party/freebsd_elf/METADATA
@@ -0,0 +1,26 @@
+name: "FreeBSD ELF headers"
+description:
+ "A few header files from the FreeBSD project. These define the ELF format."
+ "They only contain struct and constant definitions (no code) and their"
+ "contents are almost entirely derivable from the ELF standard."
+
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://www.freebsd.org"
+ }
+ url {
+ type: GIT
+ value: "https://github.com/freebsd/freebsd"
+ }
+ version: "303d759f5bc75c9f96d98b44b797faadb4a6a1c6"
+ last_upgrade_date {
+ year: 2016
+ month: 10
+ day: 27
+ }
+ local_modifications:
+ "Uniquified header guards and fixed up include paths. "
+ "Also changed to use stdint.h integer types. Removed any "
+ "machine-specific definitions for now."
+}
diff --git a/third_party/freebsd_elf/elf.h b/third_party/freebsd_elf/elf.h
new file mode 100644
index 0000000..1a9a6f4
--- /dev/null
+++ b/third_party/freebsd_elf/elf.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2001 David E. O'Brien.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This is a Solaris compatibility header
+ */
+
+#ifndef _FREEBSD_ELF_SYS_ELF_H_
+#define _FREEBSD_ELF_SYS_ELF_H_
+
+/* Google-specific modifications follow. */
+
+/* We'll use stdint.h for fixed-size integers. */
+/* #include <sys/types.h> */
+
+/* If we need any machine-specific definitions later we can import them here. */
+/* #include <machine/elf.h> */
+
+#include "third_party/freebsd_elf/elf32.h"
+#include "third_party/freebsd_elf/elf64.h"
+
+#endif /* !_FREEBSD_ELF_SYS_ELF_H_ */
diff --git a/third_party/freebsd_elf/elf32.h b/third_party/freebsd_elf/elf32.h
new file mode 100644
index 0000000..a4d4d6a
--- /dev/null
+++ b/third_party/freebsd_elf/elf32.h
@@ -0,0 +1,259 @@
+/*-
+ * Copyright (c) 1996-1998 John D. Polstra.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <stdint.h>
+
+#ifndef _FREEBSD_ELF_SYS_ELF32_H_
+#define _FREEBSD_ELF_SYS_ELF32_H_
+
+#include "third_party/freebsd_elf/elf_common.h"
+
+/*
+ * ELF definitions common to all 32-bit architectures.
+ */
+
+typedef uint32_t Elf32_Addr;
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Off;
+typedef int32_t Elf32_Sword;
+typedef uint32_t Elf32_Word;
+typedef uint64_t Elf32_Lword;
+
+typedef Elf32_Word Elf32_Hashelt;
+
+/* Non-standard class-dependent datatype used for abstraction. */
+typedef Elf32_Word Elf32_Size;
+typedef Elf32_Sword Elf32_Ssize;
+
+/*
+ * ELF header.
+ */
+
+typedef struct {
+ unsigned char e_ident[EI_NIDENT]; /* File identification. */
+ Elf32_Half e_type; /* File type. */
+ Elf32_Half e_machine; /* Machine architecture. */
+ Elf32_Word e_version; /* ELF format version. */
+ Elf32_Addr e_entry; /* Entry point. */
+ Elf32_Off e_phoff; /* Program header file offset. */
+ Elf32_Off e_shoff; /* Section header file offset. */
+ Elf32_Word e_flags; /* Architecture-specific flags. */
+ Elf32_Half e_ehsize; /* Size of ELF header in bytes. */
+ Elf32_Half e_phentsize; /* Size of program header entry. */
+ Elf32_Half e_phnum; /* Number of program header entries. */
+ Elf32_Half e_shentsize; /* Size of section header entry. */
+ Elf32_Half e_shnum; /* Number of section header entries. */
+ Elf32_Half e_shstrndx; /* Section name strings section. */
+} Elf32_Ehdr;
+
+/*
+ * Shared object information, found in SHT_MIPS_LIBLIST.
+ */
+
+typedef struct {
+ Elf32_Word l_name; /* The name of a shared object. */
+ Elf32_Word l_time_stamp; /* 32-bit timestamp. */
+ Elf32_Word l_checksum; /* Checksum of visible symbols, sizes. */
+ Elf32_Word l_version; /* Interface version string index. */
+ Elf32_Word l_flags; /* Flags (LL_*). */
+} Elf32_Lib;
+
+/*
+ * Section header.
+ */
+
+typedef struct {
+ Elf32_Word sh_name; /* Section name (index into the
+ section header string table). */
+ Elf32_Word sh_type; /* Section type. */
+ Elf32_Word sh_flags; /* Section flags. */
+ Elf32_Addr sh_addr; /* Address in memory image. */
+ Elf32_Off sh_offset; /* Offset in file. */
+ Elf32_Word sh_size; /* Size in bytes. */
+ Elf32_Word sh_link; /* Index of a related section. */
+ Elf32_Word sh_info; /* Depends on section type. */
+ Elf32_Word sh_addralign; /* Alignment in bytes. */
+ Elf32_Word sh_entsize; /* Size of each entry in section. */
+} Elf32_Shdr;
+
+/*
+ * Program header.
+ */
+
+typedef struct {
+ Elf32_Word p_type; /* Entry type. */
+ Elf32_Off p_offset; /* File offset of contents. */
+ Elf32_Addr p_vaddr; /* Virtual address in memory image. */
+ Elf32_Addr p_paddr; /* Physical address (not used). */
+ Elf32_Word p_filesz; /* Size of contents in file. */
+ Elf32_Word p_memsz; /* Size of contents in memory. */
+ Elf32_Word p_flags; /* Access permission flags. */
+ Elf32_Word p_align; /* Alignment in memory and file. */
+} Elf32_Phdr;
+
+/*
+ * Dynamic structure. The ".dynamic" section contains an array of them.
+ */
+
+typedef struct {
+ Elf32_Sword d_tag; /* Entry type. */
+ union {
+ Elf32_Word d_val; /* Integer value. */
+ Elf32_Addr d_ptr; /* Address value. */
+ } d_un;
+} Elf32_Dyn;
+
+/*
+ * Relocation entries.
+ */
+
+/* Relocations that don't need an addend field. */
+typedef struct {
+ Elf32_Addr r_offset; /* Location to be relocated. */
+ Elf32_Word r_info; /* Relocation type and symbol index. */
+} Elf32_Rel;
+
+/* Relocations that need an addend field. */
+typedef struct {
+ Elf32_Addr r_offset; /* Location to be relocated. */
+ Elf32_Word r_info; /* Relocation type and symbol index. */
+ Elf32_Sword r_addend; /* Addend. */
+} Elf32_Rela;
+
+/* Macros for accessing the fields of r_info. */
+#define ELF32_R_SYM(info) ((info) >> 8)
+#define ELF32_R_TYPE(info) ((unsigned char)(info))
+
+/* Macro for constructing r_info from field values. */
+#define ELF32_R_INFO(sym, type) (((sym) << 8) + (unsigned char)(type))
+
+/*
+ * Note entry header
+ */
+typedef Elf_Note Elf32_Nhdr;
+
+/*
+ * Move entry
+ */
+typedef struct {
+ Elf32_Lword m_value; /* symbol value */
+ Elf32_Word m_info; /* size + index */
+ Elf32_Word m_poffset; /* symbol offset */
+ Elf32_Half m_repeat; /* repeat count */
+ Elf32_Half m_stride; /* stride info */
+} Elf32_Move;
+
+/*
+ * The macros compose and decompose values for Move.r_info
+ *
+ * sym = ELF32_M_SYM(M.m_info)
+ * size = ELF32_M_SIZE(M.m_info)
+ * M.m_info = ELF32_M_INFO(sym, size)
+ */
+#define ELF32_M_SYM(info) ((info)>>8)
+#define ELF32_M_SIZE(info) ((unsigned char)(info))
+#define ELF32_M_INFO(sym, size) (((sym)<<8)+(unsigned char)(size))
+
+/*
+ * Hardware/Software capabilities entry
+ */
+typedef struct {
+ Elf32_Word c_tag; /* how to interpret value */
+ union {
+ Elf32_Word c_val;
+ Elf32_Addr c_ptr;
+ } c_un;
+} Elf32_Cap;
+
+/*
+ * Symbol table entries.
+ */
+
+typedef struct {
+ Elf32_Word st_name; /* String table index of name. */
+ Elf32_Addr st_value; /* Symbol value. */
+ Elf32_Word st_size; /* Size of associated object. */
+ unsigned char st_info; /* Type and binding information. */
+ unsigned char st_other; /* Reserved (not used). */
+ Elf32_Half st_shndx; /* Section index of symbol. */
+} Elf32_Sym;
+
+/* Macros for accessing the fields of st_info. */
+#define ELF32_ST_BIND(info) ((info) >> 4)
+#define ELF32_ST_TYPE(info) ((info) & 0xf)
+
+/* Macro for constructing st_info from field values. */
+#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf))
+
+/* Macro for accessing the fields of st_other. */
+#define ELF32_ST_VISIBILITY(oth) ((oth) & 0x3)
+
+/* Structures used by Sun & GNU symbol versioning. */
+typedef struct
+{
+ Elf32_Half vd_version;
+ Elf32_Half vd_flags;
+ Elf32_Half vd_ndx;
+ Elf32_Half vd_cnt;
+ Elf32_Word vd_hash;
+ Elf32_Word vd_aux;
+ Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct
+{
+ Elf32_Word vda_name;
+ Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct
+{
+ Elf32_Half vn_version;
+ Elf32_Half vn_cnt;
+ Elf32_Word vn_file;
+ Elf32_Word vn_aux;
+ Elf32_Word vn_next;
+} Elf32_Verneed;
+
+typedef struct
+{
+ Elf32_Word vna_hash;
+ Elf32_Half vna_flags;
+ Elf32_Half vna_other;
+ Elf32_Word vna_name;
+ Elf32_Word vna_next;
+} Elf32_Vernaux;
+
+typedef Elf32_Half Elf32_Versym;
+
+typedef struct {
+ Elf32_Half si_boundto; /* direct bindings - symbol bound to */
+ Elf32_Half si_flags; /* per symbol flags */
+} Elf32_Syminfo;
+
+#endif /* !_FREEBSD_ELF_SYS_ELF32_H_ */
diff --git a/third_party/freebsd_elf/elf64.h b/third_party/freebsd_elf/elf64.h
new file mode 100644
index 0000000..c440048
--- /dev/null
+++ b/third_party/freebsd_elf/elf64.h
@@ -0,0 +1,262 @@
+/*-
+ * Copyright (c) 1996-1998 John D. Polstra.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <stdint.h>
+
+#ifndef _FREEBSD_ELF_SYS_ELF64_H_
+#define _FREEBSD_ELF_SYS_ELF64_H_ 1
+
+#include "third_party/freebsd_elf/elf_common.h"
+
+/*
+ * ELF definitions common to all 64-bit architectures.
+ */
+
+typedef uint64_t Elf64_Addr;
+typedef uint16_t Elf64_Half;
+typedef uint64_t Elf64_Off;
+typedef int32_t Elf64_Sword;
+typedef int64_t Elf64_Sxword;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Lword;
+typedef uint64_t Elf64_Xword;
+
+/*
+ * Types of dynamic symbol hash table bucket and chain elements.
+ *
+ * This is inconsistent among 64 bit architectures, so a machine dependent
+ * typedef is required.
+ */
+
+typedef Elf64_Word Elf64_Hashelt;
+
+/* Non-standard class-dependent datatype used for abstraction. */
+typedef Elf64_Xword Elf64_Size;
+typedef Elf64_Sxword Elf64_Ssize;
+
+/*
+ * ELF header.
+ */
+
+typedef struct {
+ unsigned char e_ident[EI_NIDENT]; /* File identification. */
+ Elf64_Half e_type; /* File type. */
+ Elf64_Half e_machine; /* Machine architecture. */
+ Elf64_Word e_version; /* ELF format version. */
+ Elf64_Addr e_entry; /* Entry point. */
+ Elf64_Off e_phoff; /* Program header file offset. */
+ Elf64_Off e_shoff; /* Section header file offset. */
+ Elf64_Word e_flags; /* Architecture-specific flags. */
+ Elf64_Half e_ehsize; /* Size of ELF header in bytes. */
+ Elf64_Half e_phentsize; /* Size of program header entry. */
+ Elf64_Half e_phnum; /* Number of program header entries. */
+ Elf64_Half e_shentsize; /* Size of section header entry. */
+ Elf64_Half e_shnum; /* Number of section header entries. */
+ Elf64_Half e_shstrndx; /* Section name strings section. */
+} Elf64_Ehdr;
+
+/*
+ * Shared object information, found in SHT_MIPS_LIBLIST.
+ */
+
+typedef struct {
+ Elf64_Word l_name; /* The name of a shared object. */
+ Elf64_Word l_time_stamp; /* 64-bit timestamp. */
+ Elf64_Word l_checksum; /* Checksum of visible symbols, sizes. */
+ Elf64_Word l_version; /* Interface version string index. */
+ Elf64_Word l_flags; /* Flags (LL_*). */
+} Elf64_Lib;
+
+/*
+ * Section header.
+ */
+
+typedef struct {
+ Elf64_Word sh_name; /* Section name (index into the
+ section header string table). */
+ Elf64_Word sh_type; /* Section type. */
+ Elf64_Xword sh_flags; /* Section flags. */
+ Elf64_Addr sh_addr; /* Address in memory image. */
+ Elf64_Off sh_offset; /* Offset in file. */
+ Elf64_Xword sh_size; /* Size in bytes. */
+ Elf64_Word sh_link; /* Index of a related section. */
+ Elf64_Word sh_info; /* Depends on section type. */
+ Elf64_Xword sh_addralign; /* Alignment in bytes. */
+ Elf64_Xword sh_entsize; /* Size of each entry in section. */
+} Elf64_Shdr;
+
+/*
+ * Program header.
+ */
+
+typedef struct {
+ Elf64_Word p_type; /* Entry type. */
+ Elf64_Word p_flags; /* Access permission flags. */
+ Elf64_Off p_offset; /* File offset of contents. */
+ Elf64_Addr p_vaddr; /* Virtual address in memory image. */
+ Elf64_Addr p_paddr; /* Physical address (not used). */
+ Elf64_Xword p_filesz; /* Size of contents in file. */
+ Elf64_Xword p_memsz; /* Size of contents in memory. */
+ Elf64_Xword p_align; /* Alignment in memory and file. */
+} Elf64_Phdr;
+
+/*
+ * Dynamic structure. The ".dynamic" section contains an array of them.
+ */
+
+typedef struct {
+ Elf64_Sxword d_tag; /* Entry type. */
+ union {
+ Elf64_Xword d_val; /* Integer value. */
+ Elf64_Addr d_ptr; /* Address value. */
+ } d_un;
+} Elf64_Dyn;
+
+/*
+ * Relocation entries.
+ */
+
+/* Relocations that don't need an addend field. */
+typedef struct {
+ Elf64_Addr r_offset; /* Location to be relocated. */
+ Elf64_Xword r_info; /* Relocation type and symbol index. */
+} Elf64_Rel;
+
+/* Relocations that need an addend field. */
+typedef struct {
+ Elf64_Addr r_offset; /* Location to be relocated. */
+ Elf64_Xword r_info; /* Relocation type and symbol index. */
+ Elf64_Sxword r_addend; /* Addend. */
+} Elf64_Rela;
+
+/* Macros for accessing the fields of r_info. */
+#define ELF64_R_SYM(info) ((info) >> 32)
+#define ELF64_R_TYPE(info) ((info) & 0xffffffffL)
+
+/* Macro for constructing r_info from field values. */
+#define ELF64_R_INFO(sym, type) (((sym) << 32) + ((type) & 0xffffffffL))
+
+#define ELF64_R_TYPE_DATA(info) (((Elf64_Xword)(info)<<32)>>40)
+#define ELF64_R_TYPE_ID(info) (((Elf64_Xword)(info)<<56)>>56)
+#define ELF64_R_TYPE_INFO(data, type) \
+ (((Elf64_Xword)(data)<<8)+(Elf64_Xword)(type))
+
+/*
+ * Note entry header
+ */
+typedef Elf_Note Elf64_Nhdr;
+
+/*
+ * Move entry
+ */
+typedef struct {
+ Elf64_Lword m_value; /* symbol value */
+ Elf64_Xword m_info; /* size + index */
+ Elf64_Xword m_poffset; /* symbol offset */
+ Elf64_Half m_repeat; /* repeat count */
+ Elf64_Half m_stride; /* stride info */
+} Elf64_Move;
+
+#define ELF64_M_SYM(info) ((info)>>8)
+#define ELF64_M_SIZE(info) ((unsigned char)(info))
+#define ELF64_M_INFO(sym, size) (((sym)<<8)+(unsigned char)(size))
+
+/*
+ * Hardware/Software capabilities entry
+ */
+typedef struct {
+ Elf64_Xword c_tag; /* how to interpret value */
+ union {
+ Elf64_Xword c_val;
+ Elf64_Addr c_ptr;
+ } c_un;
+} Elf64_Cap;
+
+/*
+ * Symbol table entries.
+ */
+
+typedef struct {
+ Elf64_Word st_name; /* String table index of name. */
+ unsigned char st_info; /* Type and binding information. */
+ unsigned char st_other; /* Reserved (not used). */
+ Elf64_Half st_shndx; /* Section index of symbol. */
+ Elf64_Addr st_value; /* Symbol value. */
+ Elf64_Xword st_size; /* Size of associated object. */
+} Elf64_Sym;
+
+/* Macros for accessing the fields of st_info. */
+#define ELF64_ST_BIND(info) ((info) >> 4)
+#define ELF64_ST_TYPE(info) ((info) & 0xf)
+
+/* Macro for constructing st_info from field values. */
+#define ELF64_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf))
+
+/* Macro for accessing the fields of st_other. */
+#define ELF64_ST_VISIBILITY(oth) ((oth) & 0x3)
+
+/* Structures used by Sun & GNU-style symbol versioning. */
+typedef struct {
+ Elf64_Half vd_version;
+ Elf64_Half vd_flags;
+ Elf64_Half vd_ndx;
+ Elf64_Half vd_cnt;
+ Elf64_Word vd_hash;
+ Elf64_Word vd_aux;
+ Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+ Elf64_Word vda_name;
+ Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+typedef struct {
+ Elf64_Half vn_version;
+ Elf64_Half vn_cnt;
+ Elf64_Word vn_file;
+ Elf64_Word vn_aux;
+ Elf64_Word vn_next;
+} Elf64_Verneed;
+
+typedef struct {
+ Elf64_Word vna_hash;
+ Elf64_Half vna_flags;
+ Elf64_Half vna_other;
+ Elf64_Word vna_name;
+ Elf64_Word vna_next;
+} Elf64_Vernaux;
+
+typedef Elf64_Half Elf64_Versym;
+
+typedef struct {
+ Elf64_Half si_boundto; /* direct bindings - symbol bound to */
+ Elf64_Half si_flags; /* per symbol flags */
+} Elf64_Syminfo;
+
+#endif /* !_FREEBSD_ELF_SYS_ELF64_H_ */
diff --git a/third_party/freebsd_elf/elf_common.h b/third_party/freebsd_elf/elf_common.h
new file mode 100644
index 0000000..44a97e0
--- /dev/null
+++ b/third_party/freebsd_elf/elf_common.h
@@ -0,0 +1,1336 @@
+/*-
+ * Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien
+ * Copyright (c) 1998 John D. Polstra.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <stdint.h>
+
+#ifndef _FREEBSD_ELF_SYS_ELF_COMMON_H_
+#define _FREEBSD_ELF_SYS_ELF_COMMON_H_ 1
+
+/*
+ * ELF definitions that are independent of architecture or word size.
+ */
+
+/*
+ * Note header. The ".note" section contains an array of notes. Each
+ * begins with this header, aligned to a word boundary. Immediately
+ * following the note header is n_namesz bytes of name, padded to the
+ * next word boundary. Then comes n_descsz bytes of descriptor, again
+ * padded to a word boundary. The values of n_namesz and n_descsz do
+ * not include the padding.
+ */
+
+typedef struct {
+ uint32_t n_namesz; /* Length of name. */
+ uint32_t n_descsz; /* Length of descriptor. */
+ uint32_t n_type; /* Type of this note. */
+} Elf_Note;
+
+/*
+ * Option kinds.
+ */
+#define ODK_NULL 0 /* undefined */
+#define ODK_REGINFO 1 /* register usage info */
+#define ODK_EXCEPTIONS 2 /* exception processing info */
+#define ODK_PAD 3 /* section padding */
+#define ODK_HWPATCH 4 /* hardware patch applied */
+#define ODK_FILL 5 /* fill value used by the linker */
+#define ODK_TAGS 6 /* reserved space for tools */
+#define ODK_HWAND 7 /* hardware AND patch applied */
+#define ODK_HWOR 8 /* hardware OR patch applied */
+#define ODK_GP_GROUP 9 /* GP group for text/data sections */
+#define ODK_IDENT 10 /* ID information */
+#define ODK_PAGESIZE 11 /* page size information */
+
+/*
+ * ODK_EXCEPTIONS info field masks.
+ */
+#define OEX_FPU_MIN 0x0000001f /* min FPU exception required */
+#define OEX_FPU_MAX 0x00001f00 /* max FPU exception allowed */
+#define OEX_PAGE0 0x00010000 /* page zero must be mapped */
+#define OEX_SMM 0x00020000 /* run in sequential memory mode */
+#define OEX_PRECISEFP 0x00040000 /* run in precise FP exception mode */
+#define OEX_DISMISS 0x00080000 /* dismiss invalid address traps */
+
+/*
+ * ODK_PAD info field masks.
+ */
+#define OPAD_PREFIX 0x0001
+#define OPAD_POSTFIX 0x0002
+#define OPAD_SYMBOL 0x0004
+
+/*
+ * ODK_HWPATCH info field masks.
+ */
+#define OHW_R4KEOP 0x00000001 /* patch for R4000 branch at end-of-page bug */
+#define OHW_R8KPFETCH 0x00000002 /* R8000 prefetch bug may occur */
+#define OHW_R5KEOP 0x00000004 /* patch for R5000 branch at end-of-page bug */
+#define OHW_R5KCVTL 0x00000008 /* R5000 cvt.[ds].l bug: clean == 1 */
+#define OHW_R10KLDL 0x00000010UL /* need patch for R10000 misaligned load */
+
+/*
+ * ODK_HWAND/ODK_HWOR info field and hwp_flags[12] masks.
+ */
+#define OHWA0_R4KEOP_CHECKED 0x00000001 /* object checked for R4000 end-of-page bug */
+#define OHWA0_R4KEOP_CLEAN 0x00000002 /* object verified clean for R4000 end-of-page bug */
+#define OHWO0_FIXADE 0x00000001 /* object requires call to fixade */
+
+/*
+ * ODK_IDENT/ODK_GP_GROUP info field masks.
+ */
+#define OGP_GROUP 0x0000ffff /* GP group number */
+#define OGP_SELF 0x00010000 /* GP group is self-contained */
+
+/*
+ * The header for GNU-style hash sections.
+ */
+
+typedef struct {
+ uint32_t gh_nbuckets; /* Number of hash buckets. */
+ uint32_t gh_symndx; /* First visible symbol in .dynsym. */
+ uint32_t gh_maskwords; /* #maskwords used in bloom filter. */
+ uint32_t gh_shift2; /* Bloom filter shift count. */
+} Elf_GNU_Hash_Header;
+
+/* Indexes into the e_ident array. Keep synced with
+ http://www.sco.com/developers/gabi/latest/ch4.eheader.html */
+#define EI_MAG0 0 /* Magic number, byte 0. */
+#define EI_MAG1 1 /* Magic number, byte 1. */
+#define EI_MAG2 2 /* Magic number, byte 2. */
+#define EI_MAG3 3 /* Magic number, byte 3. */
+#define EI_CLASS 4 /* Class of machine. */
+#define EI_DATA 5 /* Data format. */
+#define EI_VERSION 6 /* ELF format version. */
+#define EI_OSABI 7 /* Operating system / ABI identification */
+#define EI_ABIVERSION 8 /* ABI version */
+#define OLD_EI_BRAND 8 /* Start of architecture identification. */
+#define EI_PAD 9 /* Start of padding (per SVR4 ABI). */
+#define EI_NIDENT 16 /* Size of e_ident array. */
+
+/* Values for the magic number bytes. */
+#define ELFMAG0 0x7f
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF" /* magic string */
+#define SELFMAG 4 /* magic string size */
+
+/* Values for e_ident[EI_VERSION] and e_version. */
+#define EV_NONE 0
+#define EV_CURRENT 1
+
+/* Values for e_ident[EI_CLASS]. */
+#define ELFCLASSNONE 0 /* Unknown class. */
+#define ELFCLASS32 1 /* 32-bit architecture. */
+#define ELFCLASS64 2 /* 64-bit architecture. */
+
+/* Values for e_ident[EI_DATA]. */
+#define ELFDATANONE 0 /* Unknown data format. */
+#define ELFDATA2LSB 1 /* 2's complement little-endian. */
+#define ELFDATA2MSB 2 /* 2's complement big-endian. */
+
+/* Values for e_ident[EI_OSABI]. */
+#define ELFOSABI_NONE 0 /* UNIX System V ABI */
+#define ELFOSABI_HPUX 1 /* HP-UX operating system */
+#define ELFOSABI_NETBSD 2 /* NetBSD */
+#define ELFOSABI_LINUX 3 /* GNU/Linux */
+#define ELFOSABI_HURD 4 /* GNU/Hurd */
+#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */
+#define ELFOSABI_SOLARIS 6 /* Solaris */
+#define ELFOSABI_AIX 7 /* AIX */
+#define ELFOSABI_IRIX 8 /* IRIX */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD */
+#define ELFOSABI_TRU64 10 /* TRU64 UNIX */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD */
+#define ELFOSABI_OPENVMS 13 /* Open VMS */
+#define ELFOSABI_NSK 14 /* HP Non-Stop Kernel */
+#define ELFOSABI_AROS 15 /* Amiga Research OS */
+#define ELFOSABI_FENIXOS 16 /* FenixOS */
+#define ELFOSABI_CLOUDABI 17 /* Nuxi CloudABI */
+#define ELFOSABI_ARM_AEABI 64 /* ARM EABI */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+#define ELFOSABI_SYSV ELFOSABI_NONE /* symbol used in old spec */
+#define ELFOSABI_MONTEREY ELFOSABI_AIX /* Monterey */
+#define ELFOSABI_GNU ELFOSABI_LINUX
+
+/* e_ident */
+#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
+ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
+ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
+ (ehdr).e_ident[EI_MAG3] == ELFMAG3)
+
+/* Values for e_type. */
+#define ET_NONE 0 /* Unknown type. */
+#define ET_REL 1 /* Relocatable. */
+#define ET_EXEC 2 /* Executable. */
+#define ET_DYN 3 /* Shared object. */
+#define ET_CORE 4 /* Core file. */
+#define ET_LOOS 0xfe00 /* First operating system specific. */
+#define ET_HIOS 0xfeff /* Last operating system-specific. */
+#define ET_LOPROC 0xff00 /* First processor-specific. */
+#define ET_HIPROC 0xffff /* Last processor-specific. */
+
+/* Values for e_machine. */
+#define EM_NONE 0 /* Unknown machine. */
+#define EM_M32 1 /* AT&T WE32100. */
+#define EM_SPARC 2 /* Sun SPARC. */
+#define EM_386 3 /* Intel i386. */
+#define EM_68K 4 /* Motorola 68000. */
+#define EM_88K 5 /* Motorola 88000. */
+#define EM_IAMCU 6 /* Intel MCU. */
+#define EM_860 7 /* Intel i860. */
+#define EM_MIPS 8 /* MIPS R3000 Big-Endian only. */
+#define EM_S370 9 /* IBM System/370. */
+#define EM_MIPS_RS3_LE 10 /* MIPS R3000 Little-Endian. */
+#define EM_PARISC 15 /* HP PA-RISC. */
+#define EM_VPP500 17 /* Fujitsu VPP500. */
+#define EM_SPARC32PLUS 18 /* SPARC v8plus. */
+#define EM_960 19 /* Intel 80960. */
+#define EM_PPC 20 /* PowerPC 32-bit. */
+#define EM_PPC64 21 /* PowerPC 64-bit. */
+#define EM_S390 22 /* IBM System/390. */
+#define EM_V800 36 /* NEC V800. */
+#define EM_FR20 37 /* Fujitsu FR20. */
+#define EM_RH32 38 /* TRW RH-32. */
+#define EM_RCE 39 /* Motorola RCE. */
+#define EM_ARM 40 /* ARM. */
+#define EM_SH 42 /* Hitachi SH. */
+#define EM_SPARCV9 43 /* SPARC v9 64-bit. */
+#define EM_TRICORE 44 /* Siemens TriCore embedded processor. */
+#define EM_ARC 45 /* Argonaut RISC Core. */
+#define EM_H8_300 46 /* Hitachi H8/300. */
+#define EM_H8_300H 47 /* Hitachi H8/300H. */
+#define EM_H8S 48 /* Hitachi H8S. */
+#define EM_H8_500 49 /* Hitachi H8/500. */
+#define EM_IA_64 50 /* Intel IA-64 Processor. */
+#define EM_MIPS_X 51 /* Stanford MIPS-X. */
+#define EM_COLDFIRE 52 /* Motorola ColdFire. */
+#define EM_68HC12 53 /* Motorola M68HC12. */
+#define EM_MMA 54 /* Fujitsu MMA. */
+#define EM_PCP 55 /* Siemens PCP. */
+#define EM_NCPU 56 /* Sony nCPU. */
+#define EM_NDR1 57 /* Denso NDR1 microprocessor. */
+#define EM_STARCORE 58 /* Motorola Star*Core processor. */
+#define EM_ME16 59 /* Toyota ME16 processor. */
+#define EM_ST100 60 /* STMicroelectronics ST100 processor. */
+#define EM_TINYJ 61 /* Advanced Logic Corp. TinyJ processor. */
+#define EM_X86_64 62 /* Advanced Micro Devices x86-64 */
+#define EM_AMD64 EM_X86_64 /* Advanced Micro Devices x86-64 (compat) */
+#define EM_PDSP 63 /* Sony DSP Processor. */
+#define EM_FX66 66 /* Siemens FX66 microcontroller. */
+#define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16
+ microcontroller. */
+#define EM_ST7 68 /* STmicroelectronics ST7 8-bit
+ microcontroller. */
+#define EM_68HC16 69 /* Motorola MC68HC16 microcontroller. */
+#define EM_68HC11 70 /* Motorola MC68HC11 microcontroller. */
+#define EM_68HC08 71 /* Motorola MC68HC08 microcontroller. */
+#define EM_68HC05 72 /* Motorola MC68HC05 microcontroller. */
+#define EM_SVX 73 /* Silicon Graphics SVx. */
+#define EM_ST19 74 /* STMicroelectronics ST19 8-bit mc. */
+#define EM_VAX 75 /* Digital VAX. */
+#define EM_CRIS 76 /* Axis Communications 32-bit embedded
+ processor. */
+#define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded
+ processor. */
+#define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor. */
+#define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor. */
+#define EM_MMIX 80 /* Donald Knuth's educational 64-bit proc. */
+#define EM_HUANY 81 /* Harvard University machine-independent
+ object files. */
+#define EM_PRISM 82 /* SiTera Prism. */
+#define EM_AVR 83 /* Atmel AVR 8-bit microcontroller. */
+#define EM_FR30 84 /* Fujitsu FR30. */
+#define EM_D10V 85 /* Mitsubishi D10V. */
+#define EM_D30V 86 /* Mitsubishi D30V. */
+#define EM_V850 87 /* NEC v850. */
+#define EM_M32R 88 /* Mitsubishi M32R. */
+#define EM_MN10300 89 /* Matsushita MN10300. */
+#define EM_MN10200 90 /* Matsushita MN10200. */
+#define EM_PJ 91 /* picoJava. */
+#define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor. */
+#define EM_ARC_A5 93 /* ARC Cores Tangent-A5. */
+#define EM_XTENSA 94 /* Tensilica Xtensa Architecture. */
+#define EM_VIDEOCORE 95 /* Alphamosaic VideoCore processor. */
+#define EM_TMM_GPP 96 /* Thompson Multimedia General Purpose
+ Processor. */
+#define EM_NS32K 97 /* National Semiconductor 32000 series. */
+#define EM_TPC 98 /* Tenor Network TPC processor. */
+#define EM_SNP1K 99 /* Trebia SNP 1000 processor. */
+#define EM_ST200 100 /* STMicroelectronics ST200 microcontroller. */
+#define EM_IP2K 101 /* Ubicom IP2xxx microcontroller family. */
+#define EM_MAX 102 /* MAX Processor. */
+#define EM_CR 103 /* National Semiconductor CompactRISC
+ microprocessor. */
+#define EM_F2MC16 104 /* Fujitsu F2MC16. */
+#define EM_MSP430 105 /* Texas Instruments embedded microcontroller
+ msp430. */
+#define EM_BLACKFIN 106 /* Analog Devices Blackfin (DSP) processor. */
+#define EM_SE_C33 107 /* S1C33 Family of Seiko Epson processors. */
+#define EM_SEP 108 /* Sharp embedded microprocessor. */
+#define EM_ARCA 109 /* Arca RISC Microprocessor. */
+#define EM_UNICORE 110 /* Microprocessor series from PKU-Unity Ltd.
+ and MPRC of Peking University */
+#define EM_AARCH64 183 /* AArch64 (64-bit ARM) */
+#define EM_RISCV 243 /* RISC-V */
+
+/* Non-standard or deprecated. */
+#define EM_486 6 /* Intel i486. */
+#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */
+#define EM_ALPHA_STD 41 /* Digital Alpha (standard value). */
+#define EM_ALPHA 0x9026 /* Alpha (written in the absence of an ABI) */
+
+/**
+ * e_flags
+ */
+#define EF_ARM_RELEXEC 0x1
+#define EF_ARM_HASENTRY 0x2
+#define EF_ARM_SYMSARESORTED 0x4
+#define EF_ARM_DYNSYMSUSESEGIDX 0x8
+#define EF_ARM_MAPSYMSFIRST 0x10
+#define EF_ARM_LE8 0x00400000
+#define EF_ARM_BE8 0x00800000
+#define EF_ARM_EABIMASK 0xFF000000
+#define EF_ARM_EABI_UNKNOWN 0x00000000
+#define EF_ARM_EABI_VER1 0x01000000
+#define EF_ARM_EABI_VER2 0x02000000
+#define EF_ARM_EABI_VER3 0x03000000
+#define EF_ARM_EABI_VER4 0x04000000
+#define EF_ARM_EABI_VER5 0x05000000
+#define EF_ARM_INTERWORK 0x00000004
+#define EF_ARM_APCS_26 0x00000008
+#define EF_ARM_APCS_FLOAT 0x00000010
+#define EF_ARM_PIC 0x00000020
+#define EF_ARM_ALIGN8 0x00000040
+#define EF_ARM_NEW_ABI 0x00000080
+#define EF_ARM_OLD_ABI 0x00000100
+#define EF_ARM_SOFT_FLOAT 0x00000200
+#define EF_ARM_VFP_FLOAT 0x00000400
+#define EF_ARM_MAVERICK_FLOAT 0x00000800
+
+#define EF_MIPS_NOREORDER 0x00000001
+#define EF_MIPS_PIC 0x00000002 /* Contains PIC code */
+#define EF_MIPS_CPIC 0x00000004 /* STD PIC calling sequence */
+#define EF_MIPS_UCODE 0x00000010
+#define EF_MIPS_ABI2 0x00000020 /* N32 */
+#define EF_MIPS_OPTIONS_FIRST 0x00000080
+#define EF_MIPS_ARCH_ASE 0x0F000000 /* Architectural extensions */
+#define EF_MIPS_ARCH_ASE_MDMX 0x08000000 /* MDMX multimedia extension */
+#define EF_MIPS_ARCH_ASE_M16 0x04000000 /* MIPS-16 ISA extensions */
+#define EF_MIPS_ARCH 0xF0000000 /* Architecture field */
+
+#define EF_PPC_EMB 0x80000000
+#define EF_PPC_RELOCATABLE 0x00010000
+#define EF_PPC_RELOCATABLE_LIB 0x00008000
+
+#define EF_SPARC_EXT_MASK 0x00ffff00
+#define EF_SPARC_32PLUS 0x00000100
+#define EF_SPARC_SUN_US1 0x00000200
+#define EF_SPARC_HAL_R1 0x00000200
+#define EF_SPARC_SUN_US3 0x00000800
+
+#define EF_SPARCV9_MM 0x00000003
+#define EF_SPARCV9_TSO 0x00000000
+#define EF_SPARCV9_PSO 0x00000001
+#define EF_SPARCV9_RMO 0x00000002
+
+/* Special section indexes. */
+#define SHN_UNDEF 0 /* Undefined, missing, irrelevant. */
+#define SHN_LORESERVE 0xff00 /* First of reserved range. */
+#define SHN_LOPROC 0xff00 /* First processor-specific. */
+#define SHN_HIPROC 0xff1f /* Last processor-specific. */
+#define SHN_LOOS 0xff20 /* First operating system-specific. */
+#define SHN_FBSD_CACHED SHN_LOOS /* Transient, for sys/kern/link_elf_obj
+ linker only: Cached global in local
+ symtab. */
+#define SHN_HIOS 0xff3f /* Last operating system-specific. */
+#define SHN_ABS 0xfff1 /* Absolute values. */
+#define SHN_COMMON 0xfff2 /* Common data. */
+#define SHN_XINDEX 0xffff /* Escape -- index stored elsewhere. */
+#define SHN_HIRESERVE 0xffff /* Last of reserved range. */
+
+/* sh_type */
+#define SHT_NULL 0 /* inactive */
+#define SHT_PROGBITS 1 /* program defined information */
+#define SHT_SYMTAB 2 /* symbol table section */
+#define SHT_STRTAB 3 /* string table section */
+#define SHT_RELA 4 /* relocation section with addends */
+#define SHT_HASH 5 /* symbol hash table section */
+#define SHT_DYNAMIC 6 /* dynamic section */
+#define SHT_NOTE 7 /* note section */
+#define SHT_NOBITS 8 /* no space section */
+#define SHT_REL 9 /* relocation section - no addends */
+#define SHT_SHLIB 10 /* reserved - purpose unknown */
+#define SHT_DYNSYM 11 /* dynamic symbol table section */
+#define SHT_INIT_ARRAY 14 /* Initialization function pointers. */
+#define SHT_FINI_ARRAY 15 /* Termination function pointers. */
+#define SHT_PREINIT_ARRAY 16 /* Pre-initialization function ptrs. */
+#define SHT_GROUP 17 /* Section group. */
+#define SHT_SYMTAB_SHNDX 18 /* Section indexes (see SHN_XINDEX). */
+#define SHT_LOOS 0x60000000 /* First of OS specific semantics */
+#define SHT_LOSUNW 0x6ffffff4
+#define SHT_SUNW_dof 0x6ffffff4
+#define SHT_SUNW_cap 0x6ffffff5
+#define SHT_GNU_ATTRIBUTES 0x6ffffff5
+#define SHT_SUNW_SIGNATURE 0x6ffffff6
+#define SHT_GNU_HASH 0x6ffffff6
+#define SHT_GNU_LIBLIST 0x6ffffff7
+#define SHT_SUNW_ANNOTATE 0x6ffffff7
+#define SHT_SUNW_DEBUGSTR 0x6ffffff8
+#define SHT_SUNW_DEBUG 0x6ffffff9
+#define SHT_SUNW_move 0x6ffffffa
+#define SHT_SUNW_COMDAT 0x6ffffffb
+#define SHT_SUNW_syminfo 0x6ffffffc
+#define SHT_SUNW_verdef 0x6ffffffd
+#define SHT_GNU_verdef 0x6ffffffd /* Symbol versions provided */
+#define SHT_SUNW_verneed 0x6ffffffe
+#define SHT_GNU_verneed 0x6ffffffe /* Symbol versions required */
+#define SHT_SUNW_versym 0x6fffffff
+#define SHT_GNU_versym 0x6fffffff /* Symbol version table */
+#define SHT_HISUNW 0x6fffffff
+#define SHT_HIOS 0x6fffffff /* Last of OS specific semantics */
+#define SHT_LOPROC 0x70000000 /* reserved range for processor */
+#define SHT_X86_64_UNWIND 0x70000001 /* unwind information */
+#define SHT_AMD64_UNWIND SHT_X86_64_UNWIND
+
+#define SHT_ARM_EXIDX 0x70000001 /* Exception index table. */
+#define SHT_ARM_PREEMPTMAP 0x70000002 /* BPABI DLL dynamic linking
+ pre-emption map. */
+#define SHT_ARM_ATTRIBUTES 0x70000003 /* Object file compatibility
+ attributes. */
+#define SHT_ARM_DEBUGOVERLAY 0x70000004 /* See DBGOVL for details. */
+#define SHT_ARM_OVERLAYSECTION 0x70000005 /* See DBGOVL for details. */
+#define SHT_MIPS_LIBLIST 0x70000000
+#define SHT_MIPS_MSYM 0x70000001
+#define SHT_MIPS_CONFLICT 0x70000002
+#define SHT_MIPS_GPTAB 0x70000003
+#define SHT_MIPS_UCODE 0x70000004
+#define SHT_MIPS_DEBUG 0x70000005
+#define SHT_MIPS_REGINFO 0x70000006
+#define SHT_MIPS_PACKAGE 0x70000007
+#define SHT_MIPS_PACKSYM 0x70000008
+#define SHT_MIPS_RELD 0x70000009
+#define SHT_MIPS_IFACE 0x7000000b
+#define SHT_MIPS_CONTENT 0x7000000c
+#define SHT_MIPS_OPTIONS 0x7000000d
+#define SHT_MIPS_DELTASYM 0x7000001b
+#define SHT_MIPS_DELTAINST 0x7000001c
+#define SHT_MIPS_DELTACLASS 0x7000001d
+#define SHT_MIPS_DWARF 0x7000001e /* MIPS gcc uses MIPS_DWARF */
+#define SHT_MIPS_DELTADECL 0x7000001f
+#define SHT_MIPS_SYMBOL_LIB 0x70000020
+#define SHT_MIPS_EVENTS 0x70000021
+#define SHT_MIPS_TRANSLATE 0x70000022
+#define SHT_MIPS_PIXIE 0x70000023
+#define SHT_MIPS_XLATE 0x70000024
+#define SHT_MIPS_XLATE_DEBUG 0x70000025
+#define SHT_MIPS_WHIRL 0x70000026
+#define SHT_MIPS_EH_REGION 0x70000027
+#define SHT_MIPS_XLATE_OLD 0x70000028
+#define SHT_MIPS_PDR_EXCEPTION 0x70000029
+#define SHT_MIPS_ABIFLAGS 0x7000002a
+
+#define SHT_SPARC_GOTDATA 0x70000000
+
+#define SHTORDERED
+#define SHT_HIPROC 0x7fffffff /* specific section header types */
+#define SHT_LOUSER 0x80000000 /* reserved range for application */
+#define SHT_HIUSER 0xffffffff /* specific indexes */
+
+/* Flags for sh_flags. */
+#define SHF_WRITE 0x1 /* Section contains writable data. */
+#define SHF_ALLOC 0x2 /* Section occupies memory. */
+#define SHF_EXECINSTR 0x4 /* Section contains instructions. */
+#define SHF_MERGE 0x10 /* Section may be merged. */
+#define SHF_STRINGS 0x20 /* Section contains strings. */
+#define SHF_INFO_LINK 0x40 /* sh_info holds section index. */
+#define SHF_LINK_ORDER 0x80 /* Special ordering requirements. */
+#define SHF_OS_NONCONFORMING 0x100 /* OS-specific processing required. */
+#define SHF_GROUP 0x200 /* Member of section group. */
+#define SHF_TLS 0x400 /* Section contains TLS data. */
+#define SHF_COMPRESSED 0x800 /* Section contains compressed data. */
+#define SHF_MASKOS 0x0ff00000 /* OS-specific semantics. */
+#define SHF_MASKPROC 0xf0000000 /* Processor-specific semantics. */
+
+/* Flags for section groups. */
+#define GRP_COMDAT 0x1 /* COMDAT semantics. */
+
+/*
+ * Flags / mask for .gnu.versym sections.
+ */
+#define VERSYM_VERSION 0x7fff
+#define VERSYM_HIDDEN 0x8000
+
+/* Values for p_type. */
+#define PT_NULL 0 /* Unused entry. */
+#define PT_LOAD 1 /* Loadable segment. */
+#define PT_DYNAMIC 2 /* Dynamic linking information segment. */
+#define PT_INTERP 3 /* Pathname of interpreter. */
+#define PT_NOTE 4 /* Auxiliary information. */
+#define PT_SHLIB 5 /* Reserved (not used). */
+#define PT_PHDR 6 /* Location of program header itself. */
+#define PT_TLS 7 /* Thread local storage segment */
+#define PT_LOOS 0x60000000 /* First OS-specific. */
+#define PT_SUNW_UNWIND 0x6464e550 /* amd64 UNWIND program header */
+#define PT_GNU_EH_FRAME 0x6474e550
+#define PT_GNU_STACK 0x6474e551
+#define PT_GNU_RELRO 0x6474e552
+#define PT_DUMP_DELTA 0x6fb5d000 /* va->pa map for kernel dumps
+ (currently arm). */
+#define PT_LOSUNW 0x6ffffffa
+#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */
+#define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */
+#define PT_SUNWDTRACE 0x6ffffffc /* private */
+#define PT_SUNWCAP 0x6ffffffd /* hard/soft capabilities segment */
+#define PT_HISUNW 0x6fffffff
+#define PT_HIOS 0x6fffffff /* Last OS-specific. */
+#define PT_LOPROC 0x70000000 /* First processor-specific type. */
+#define PT_ARM_ARCHEXT 0x70000000 /* ARM arch compat information. */
+#define PT_ARM_EXIDX 0x70000001 /* ARM exception unwind tables. */
+#define PT_HIPROC 0x7fffffff /* Last processor-specific type. */
+
+/* Values for p_flags. */
+#define PF_X 0x1 /* Executable. */
+#define PF_W 0x2 /* Writable. */
+#define PF_R 0x4 /* Readable. */
+#define PF_MASKOS 0x0ff00000 /* Operating system-specific. */
+#define PF_MASKPROC 0xf0000000 /* Processor-specific. */
+
+/* Extended program header index. */
+#define PN_XNUM 0xffff
+
+/* Values for d_tag. */
+#define DT_NULL 0 /* Terminating entry. */
+#define DT_NEEDED 1 /* String table offset of a needed shared
+ library. */
+#define DT_PLTRELSZ 2 /* Total size in bytes of PLT relocations. */
+#define DT_PLTGOT 3 /* Processor-dependent address. */
+#define DT_HASH 4 /* Address of symbol hash table. */
+#define DT_STRTAB 5 /* Address of string table. */
+#define DT_SYMTAB 6 /* Address of symbol table. */
+#define DT_RELA 7 /* Address of ElfNN_Rela relocations. */
+#define DT_RELASZ 8 /* Total size of ElfNN_Rela relocations. */
+#define DT_RELAENT 9 /* Size of each ElfNN_Rela relocation entry. */
+#define DT_STRSZ 10 /* Size of string table. */
+#define DT_SYMENT 11 /* Size of each symbol table entry. */
+#define DT_INIT 12 /* Address of initialization function. */
+#define DT_FINI 13 /* Address of finalization function. */
+#define DT_SONAME 14 /* String table offset of shared object
+ name. */
+#define DT_RPATH 15 /* String table offset of library path. [sup] */
+#define DT_SYMBOLIC 16 /* Indicates "symbolic" linking. [sup] */
+#define DT_REL 17 /* Address of ElfNN_Rel relocations. */
+#define DT_RELSZ 18 /* Total size of ElfNN_Rel relocations. */
+#define DT_RELENT 19 /* Size of each ElfNN_Rel relocation. */
+#define DT_PLTREL 20 /* Type of relocation used for PLT. */
+#define DT_DEBUG 21 /* Reserved (not used). */
+#define DT_TEXTREL 22 /* Indicates there may be relocations in
+ non-writable segments. [sup] */
+#define DT_JMPREL 23 /* Address of PLT relocations. */
+#define DT_BIND_NOW 24 /* [sup] */
+#define DT_INIT_ARRAY 25 /* Address of the array of pointers to
+ initialization functions */
+#define DT_FINI_ARRAY 26 /* Address of the array of pointers to
+ termination functions */
+#define DT_INIT_ARRAYSZ 27 /* Size in bytes of the array of
+ initialization functions. */
+#define DT_FINI_ARRAYSZ 28 /* Size in bytes of the array of
+ termination functions. */
+#define DT_RUNPATH 29 /* String table offset of a null-terminated
+ library search path string. */
+#define DT_FLAGS 30 /* Object specific flag values. */
+#define DT_ENCODING 32 /* Values greater than or equal to DT_ENCODING
+ and less than DT_LOOS follow the rules for
+ the interpretation of the d_un union
+ as follows: even == 'd_ptr', odd == 'd_val'
+ or none */
+#define DT_PREINIT_ARRAY 32 /* Address of the array of pointers to
+ pre-initialization functions. */
+#define DT_PREINIT_ARRAYSZ 33 /* Size in bytes of the array of
+ pre-initialization functions. */
+#define DT_MAXPOSTAGS 34 /* number of positive tags */
+#define DT_LOOS 0x6000000d /* First OS-specific */
+#define DT_SUNW_AUXILIARY 0x6000000d /* symbol auxiliary name */
+#define DT_SUNW_RTLDINF 0x6000000e /* ld.so.1 info (private) */
+#define DT_SUNW_FILTER 0x6000000f /* symbol filter name */
+#define DT_SUNW_CAP 0x60000010 /* hardware/software */
+#define DT_HIOS 0x6ffff000 /* Last OS-specific */
+
+/*
+ * DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
+ * Dyn.d_un.d_val field of the Elf*_Dyn structure.
+ */
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_GNU_PRELINKED 0x6ffffdf5 /* prelinking timestamp */
+#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* size of conflict section */
+#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* size of library list */
+#define DT_CHECKSUM 0x6ffffdf8 /* elf checksum */
+#define DT_PLTPADSZ 0x6ffffdf9 /* pltpadding size */
+#define DT_MOVEENT 0x6ffffdfa /* move table entry size */
+#define DT_MOVESZ 0x6ffffdfb /* move table size */
+#define DT_FEATURE 0x6ffffdfc /* feature holder */
+#define DT_FEATURE_1 DT_FEATURE
+#define DT_POSFLAG_1 0x6ffffdfd /* flags for DT_* entries, effecting */
+ /* the following DT_* entry. */
+ /* See DF_P1_* definitions */
+#define DT_SYMINSZ 0x6ffffdfe /* syminfo table size (in bytes) */
+#define DT_SYMINENT 0x6ffffdff /* syminfo entry size (in bytes) */
+#define DT_VALRNGHI 0x6ffffdff
+
+/*
+ * DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
+ * Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
+ *
+ * If any adjustment is made to the ELF object after it has been
+ * built, these entries will need to be adjusted.
+ */
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5 /* GNU-style hash table */
+#define DT_TLSDESC_PLT 0x6ffffef6 /* loc. of PLT for tlsdesc resolver */
+#define DT_TLSDESC_GOT 0x6ffffef7 /* loc. of GOT for tlsdesc resolver */
+#define DT_GNU_CONFLICT 0x6ffffef8 /* address of conflict section */
+#define DT_GNU_LIBLIST 0x6ffffef9 /* address of library list */
+#define DT_CONFIG 0x6ffffefa /* configuration information */
+#define DT_DEPAUDIT 0x6ffffefb /* dependency auditing */
+#define DT_AUDIT 0x6ffffefc /* object auditing */
+#define DT_PLTPAD 0x6ffffefd /* pltpadding (sparcv9) */
+#define DT_MOVETAB 0x6ffffefe /* move table */
+#define DT_SYMINFO 0x6ffffeff /* syminfo table */
+#define DT_ADDRRNGHI 0x6ffffeff
+
+#define DT_VERSYM 0x6ffffff0 /* Address of versym section. */
+#define DT_RELACOUNT 0x6ffffff9 /* number of RELATIVE relocations */
+#define DT_RELCOUNT 0x6ffffffa /* number of RELATIVE relocations */
+#define DT_FLAGS_1 0x6ffffffb /* state flags - see DF_1_* defs */
+#define DT_VERDEF 0x6ffffffc /* Address of verdef section. */
+#define DT_VERDEFNUM 0x6ffffffd /* Number of elems in verdef section */
+#define DT_VERNEED 0x6ffffffe /* Address of verneed section. */
+#define DT_VERNEEDNUM 0x6fffffff /* Number of elems in verneed section */
+
+#define DT_LOPROC 0x70000000 /* First processor-specific type. */
+
+#define DT_ARM_SYMTABSZ 0x70000001
+#define DT_ARM_PREEMPTMAP 0x70000002
+
+#define DT_SPARC_REGISTER 0x70000001
+#define DT_DEPRECATED_SPARC_REGISTER 0x7000001
+
+#define DT_MIPS_RLD_VERSION 0x70000001
+#define DT_MIPS_TIME_STAMP 0x70000002
+#define DT_MIPS_ICHECKSUM 0x70000003
+#define DT_MIPS_IVERSION 0x70000004
+#define DT_MIPS_FLAGS 0x70000005
+#define DT_MIPS_BASE_ADDRESS 0x70000006
+#define DT_MIPS_CONFLICT 0x70000008
+#define DT_MIPS_LIBLIST 0x70000009
+#define DT_MIPS_LOCAL_GOTNO 0x7000000a
+#define DT_MIPS_CONFLICTNO 0x7000000b
+#define DT_MIPS_LIBLISTNO 0x70000010
+#define DT_MIPS_SYMTABNO 0x70000011
+#define DT_MIPS_UNREFEXTNO 0x70000012
+#define DT_MIPS_GOTSYM 0x70000013
+#define DT_MIPS_HIPAGENO 0x70000014
+#define DT_MIPS_RLD_MAP 0x70000016
+#define DT_MIPS_DELTA_CLASS 0x70000017
+#define DT_MIPS_DELTA_CLASS_NO 0x70000018
+#define DT_MIPS_DELTA_INSTANCE 0x70000019
+#define DT_MIPS_DELTA_INSTANCE_NO 0x7000001A
+#define DT_MIPS_DELTA_RELOC 0x7000001B
+#define DT_MIPS_DELTA_RELOC_NO 0x7000001C
+#define DT_MIPS_DELTA_SYM 0x7000001D
+#define DT_MIPS_DELTA_SYM_NO 0x7000001E
+#define DT_MIPS_DELTA_CLASSSYM 0x70000020
+#define DT_MIPS_DELTA_CLASSSYM_NO 0x70000021
+#define DT_MIPS_CXX_FLAGS 0x70000022
+#define DT_MIPS_PIXIE_INIT 0x70000023
+#define DT_MIPS_SYMBOL_LIB 0x70000024
+#define DT_MIPS_LOCALPAGE_GOTIDX 0x70000025
+#define DT_MIPS_LOCAL_GOTIDX 0x70000026
+#define DT_MIPS_HIDDEN_GOTIDX 0x70000027
+#define DT_MIPS_PROTECTED_GOTIDX 0x70000028
+#define DT_MIPS_OPTIONS 0x70000029
+#define DT_MIPS_INTERFACE 0x7000002A
+#define DT_MIPS_DYNSTR_ALIGN 0x7000002B
+#define DT_MIPS_INTERFACE_SIZE 0x7000002C
+#define DT_MIPS_RLD_TEXT_RESOLVE_ADDR 0x7000002D
+#define DT_MIPS_PERF_SUFFIX 0x7000002E
+#define DT_MIPS_COMPACT_SIZE 0x7000002F
+#define DT_MIPS_GP_VALUE 0x70000030
+#define DT_MIPS_AUX_DYNAMIC 0x70000031
+#define DT_MIPS_PLTGOT 0x70000032
+#define DT_MIPS_RLD_OBJ_UPDATE 0x70000033
+#define DT_MIPS_RWPLT 0x70000034
+
+#define DT_PPC_GOT 0x70000000
+#define DT_PPC_TLSOPT 0x70000001
+
+#define DT_PPC64_GLINK 0x70000000
+#define DT_PPC64_OPD 0x70000001
+#define DT_PPC64_OPDSZ 0x70000002
+#define DT_PPC64_TLSOPT 0x70000003
+
+#define DT_AUXILIARY 0x7ffffffd /* shared library auxiliary name */
+#define DT_USED 0x7ffffffe /* ignored - same as needed */
+#define DT_FILTER 0x7fffffff /* shared library filter name */
+#define DT_HIPROC 0x7fffffff /* Last processor-specific type. */
+
+/* Values for DT_FLAGS */
+#define DF_ORIGIN 0x0001 /* Indicates that the object being loaded may
+ make reference to the $ORIGIN substitution
+ string */
+#define DF_SYMBOLIC 0x0002 /* Indicates "symbolic" linking. */
+#define DF_TEXTREL 0x0004 /* Indicates there may be relocations in
+ non-writable segments. */
+#define DF_BIND_NOW 0x0008 /* Indicates that the dynamic linker should
+ process all relocations for the object
+ containing this entry before transferring
+ control to the program. */
+#define DF_STATIC_TLS 0x0010 /* Indicates that the shared object or
+ executable contains code using a static
+ thread-local storage scheme. */
+
+/* Values for DT_FLAGS_1 */
+#define DF_1_BIND_NOW 0x00000001 /* Same as DF_BIND_NOW */
+#define DF_1_GLOBAL 0x00000002 /* Set the RTLD_GLOBAL for object */
+#define DF_1_NODELETE 0x00000008 /* Set the RTLD_NODELETE for object */
+#define DF_1_LOADFLTR 0x00000010 /* Immediate loading of filtees */
+#define DF_1_NOOPEN 0x00000040 /* Do not allow loading on dlopen() */
+#define DF_1_ORIGIN 0x00000080 /* Process $ORIGIN */
+#define DF_1_INTERPOSE 0x00000400 /* Interpose all objects but main */
+#define DF_1_NODEFLIB 0x00000800 /* Do not search default paths */
+
+/* Values for l_flags. */
+#define LL_NONE 0x0 /* no flags */
+#define LL_EXACT_MATCH 0x1 /* require an exact match */
+#define LL_IGNORE_INT_VER 0x2 /* ignore version incompatibilities */
+#define LL_REQUIRE_MINOR 0x4
+#define LL_EXPORTS 0x8
+#define LL_DELAY_LOAD 0x10
+#define LL_DELTA 0x20
+
+/* Values for n_type used in executables. */
+#define NT_FREEBSD_ABI_TAG 1
+#define NT_FREEBSD_NOINIT_TAG 2
+#define NT_FREEBSD_ARCH_TAG 3
+
+/* Values for n_type. Used in core files. */
+#define NT_PRSTATUS 1 /* Process status. */
+#define NT_FPREGSET 2 /* Floating point registers. */
+#define NT_PRPSINFO 3 /* Process state info. */
+#define NT_THRMISC 7 /* Thread miscellaneous info. */
+#define NT_PROCSTAT_PROC 8 /* Procstat proc data. */
+#define NT_PROCSTAT_FILES 9 /* Procstat files data. */
+#define NT_PROCSTAT_VMMAP 10 /* Procstat vmmap data. */
+#define NT_PROCSTAT_GROUPS 11 /* Procstat groups data. */
+#define NT_PROCSTAT_UMASK 12 /* Procstat umask data. */
+#define NT_PROCSTAT_RLIMIT 13 /* Procstat rlimit data. */
+#define NT_PROCSTAT_OSREL 14 /* Procstat osreldate data. */
+#define NT_PROCSTAT_PSSTRINGS 15 /* Procstat ps_strings data. */
+#define NT_PROCSTAT_AUXV 16 /* Procstat auxv data. */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state. */
+
+/* Symbol Binding - ELFNN_ST_BIND - st_info */
+#define STB_LOCAL 0 /* Local symbol */
+#define STB_GLOBAL 1 /* Global symbol */
+#define STB_WEAK 2 /* like global - lower precedence */
+#define STB_LOOS 10 /* Start of operating system reserved range. */
+#define STB_GNU_UNIQUE 10 /* Unique symbol (GNU) */
+#define STB_HIOS 12 /* End of operating system reserved range. */
+#define STB_LOPROC 13 /* reserved range for processor */
+#define STB_HIPROC 15 /* specific semantics. */
+
+/* Symbol type - ELFNN_ST_TYPE - st_info */
+#define STT_NOTYPE 0 /* Unspecified type. */
+#define STT_OBJECT 1 /* Data object. */
+#define STT_FUNC 2 /* Function. */
+#define STT_SECTION 3 /* Section. */
+#define STT_FILE 4 /* Source file. */
+#define STT_COMMON 5 /* Uninitialized common block. */
+#define STT_TLS 6 /* TLS object. */
+#define STT_NUM 7
+#define STT_LOOS 10 /* Reserved range for operating system */
+#define STT_GNU_IFUNC 10
+#define STT_HIOS 12 /* specific semantics. */
+#define STT_LOPROC 13 /* Start of processor reserved range. */
+#define STT_SPARC_REGISTER 13 /* SPARC register information. */
+#define STT_HIPROC 15 /* End of processor reserved range. */
+
+/* Symbol visibility - ELFNN_ST_VISIBILITY - st_other */
+#define STV_DEFAULT 0x0 /* Default visibility (see binding). */
+#define STV_INTERNAL 0x1 /* Special meaning in relocatable objects. */
+#define STV_HIDDEN 0x2 /* Not visible. */
+#define STV_PROTECTED 0x3 /* Visible but not preemptible. */
+#define STV_EXPORTED 0x4
+#define STV_SINGLETON 0x5
+#define STV_ELIMINATE 0x6
+
+/* Special symbol table indexes. */
+#define STN_UNDEF 0 /* Undefined symbol index. */
+
+/* Symbol versioning flags. */
+#define VER_DEF_CURRENT 1
+#define VER_DEF_IDX(x) VER_NDX(x)
+
+#define VER_FLG_BASE 0x01
+#define VER_FLG_WEAK 0x02
+
+#define VER_NEED_CURRENT 1
+#define VER_NEED_WEAK (1u << 15)
+#define VER_NEED_HIDDEN VER_NDX_HIDDEN
+#define VER_NEED_IDX(x) VER_NDX(x)
+
+#define VER_NDX_LOCAL 0
+#define VER_NDX_GLOBAL 1
+#define VER_NDX_GIVEN 2
+
+#define VER_NDX_HIDDEN (1u << 15)
+#define VER_NDX(x) ((x) & ~(1u << 15))
+
+#define CA_SUNW_NULL 0
+#define CA_SUNW_HW_1 1 /* first hardware capabilities entry */
+#define CA_SUNW_SF_1 2 /* first software capabilities entry */
+
+/*
+ * Syminfo flag values
+ */
+#define SYMINFO_FLG_DIRECT 0x0001 /* symbol ref has direct association */
+ /* to object containing defn. */
+#define SYMINFO_FLG_PASSTHRU 0x0002 /* ignored - see SYMINFO_FLG_FILTER */
+#define SYMINFO_FLG_COPY 0x0004 /* symbol is a copy-reloc */
+#define SYMINFO_FLG_LAZYLOAD 0x0008 /* object containing defn should be */
+ /* lazily-loaded */
+#define SYMINFO_FLG_DIRECTBIND 0x0010 /* ref should be bound directly to */
+ /* object containing defn. */
+#define SYMINFO_FLG_NOEXTDIRECT 0x0020 /* don't let an external reference */
+ /* directly bind to this symbol */
+#define SYMINFO_FLG_FILTER 0x0002 /* symbol ref is associated to a */
+#define SYMINFO_FLG_AUXILIARY 0x0040 /* standard or auxiliary filter */
+
+/*
+ * Syminfo.si_boundto values.
+ */
+#define SYMINFO_BT_SELF 0xffff /* symbol bound to self */
+#define SYMINFO_BT_PARENT 0xfffe /* symbol bound to parent */
+#define SYMINFO_BT_NONE 0xfffd /* no special symbol binding */
+#define SYMINFO_BT_EXTERN 0xfffc /* symbol defined as external */
+#define SYMINFO_BT_LOWRESERVE 0xff00 /* beginning of reserved entries */
+
+/*
+ * Syminfo version values.
+ */
+#define SYMINFO_NONE 0 /* Syminfo version */
+#define SYMINFO_CURRENT 1
+#define SYMINFO_NUM 2
+
+/*
+ * Relocation types.
+ *
+ * All machine architectures are defined here to allow tools on one to
+ * handle others.
+ */
+
+#define R_386_NONE 0 /* No relocation. */
+#define R_386_32 1 /* Add symbol value. */
+#define R_386_PC32 2 /* Add PC-relative symbol value. */
+#define R_386_GOT32 3 /* Add PC-relative GOT offset. */
+#define R_386_PLT32 4 /* Add PC-relative PLT offset. */
+#define R_386_COPY 5 /* Copy data from shared object. */
+#define R_386_GLOB_DAT 6 /* Set GOT entry to data address. */
+#define R_386_JMP_SLOT 7 /* Set GOT entry to code address. */
+#define R_386_RELATIVE 8 /* Add load address of shared object. */
+#define R_386_GOTOFF 9 /* Add GOT-relative symbol address. */
+#define R_386_GOTPC 10 /* Add PC-relative GOT table address. */
+#define R_386_TLS_TPOFF 14 /* Negative offset in static TLS block */
+#define R_386_TLS_IE 15 /* Absolute address of GOT for -ve static TLS */
+#define R_386_TLS_GOTIE 16 /* GOT entry for negative static TLS block */
+#define R_386_TLS_LE 17 /* Negative offset relative to static TLS */
+#define R_386_TLS_GD 18 /* 32 bit offset to GOT (index,off) pair */
+#define R_386_TLS_LDM 19 /* 32 bit offset to GOT (index,zero) pair */
+#define R_386_TLS_GD_32 24 /* 32 bit offset to GOT (index,off) pair */
+#define R_386_TLS_GD_PUSH 25 /* pushl instruction for Sun ABI GD sequence */
+#define R_386_TLS_GD_CALL 26 /* call instruction for Sun ABI GD sequence */
+#define R_386_TLS_GD_POP 27 /* popl instruction for Sun ABI GD sequence */
+#define R_386_TLS_LDM_32 28 /* 32 bit offset to GOT (index,zero) pair */
+#define R_386_TLS_LDM_PUSH 29 /* pushl instruction for Sun ABI LD sequence */
+#define R_386_TLS_LDM_CALL 30 /* call instruction for Sun ABI LD sequence */
+#define R_386_TLS_LDM_POP 31 /* popl instruction for Sun ABI LD sequence */
+#define R_386_TLS_LDO_32 32 /* 32 bit offset from start of TLS block */
+#define R_386_TLS_IE_32 33 /* 32 bit offset to GOT static TLS offset entry */
+#define R_386_TLS_LE_32 34 /* 32 bit offset within static TLS block */
+#define R_386_TLS_DTPMOD32 35 /* GOT entry containing TLS index */
+#define R_386_TLS_DTPOFF32 36 /* GOT entry containing TLS offset */
+#define R_386_TLS_TPOFF32 37 /* GOT entry of -ve static TLS offset */
+#define R_386_IRELATIVE 42 /* PLT entry resolved indirectly at runtime */
+
+#define R_AARCH64_NONE 0 /* No relocation */
+#define R_AARCH64_ABS64 257 /* Absolute offset */
+#define R_AARCH64_ABS32 258 /* Absolute, 32-bit overflow check */
+#define R_AARCH64_ABS16 259 /* Absolute, 16-bit overflow check */
+#define R_AARCH64_PREL64 260 /* PC relative */
+#define R_AARCH64_PREL32 261 /* PC relative, 32-bit overflow check */
+#define R_AARCH64_PREL16 262 /* PC relative, 16-bit overflow check */
+#define R_AARCH64_COPY 1024 /* Copy data from shared object */
+#define R_AARCH64_GLOB_DAT 1025 /* Set GOT entry to data address */
+#define R_AARCH64_JUMP_SLOT 1026 /* Set GOT entry to code address */
+#define R_AARCH64_RELATIVE 1027 /* Add load address of shared object */
+#define R_AARCH64_TLS_DTPREL64 1028
+#define R_AARCH64_TLS_DTPMOD64 1029
+#define R_AARCH64_TLS_TPREL64 1030
+#define R_AARCH64_TLSDESC 1031 /* Identify the TLS descriptor */
+#define R_AARCH64_IRELATIVE 1032
+
+#define R_ARM_NONE 0 /* No relocation. */
+#define R_ARM_PC24 1
+#define R_ARM_ABS32 2
+#define R_ARM_REL32 3
+#define R_ARM_PC13 4
+#define R_ARM_ABS16 5
+#define R_ARM_ABS12 6
+#define R_ARM_THM_ABS5 7
+#define R_ARM_ABS8 8
+#define R_ARM_SBREL32 9
+#define R_ARM_THM_PC22 10
+#define R_ARM_THM_PC8 11
+#define R_ARM_AMP_VCALL9 12
+#define R_ARM_SWI24 13
+#define R_ARM_THM_SWI8 14
+#define R_ARM_XPC25 15
+#define R_ARM_THM_XPC22 16
+/* TLS relocations */
+#define R_ARM_TLS_DTPMOD32 17 /* ID of module containing symbol */
+#define R_ARM_TLS_DTPOFF32 18 /* Offset in TLS block */
+#define R_ARM_TLS_TPOFF32 19 /* Offset in static TLS block */
+#define R_ARM_COPY 20 /* Copy data from shared object. */
+#define R_ARM_GLOB_DAT 21 /* Set GOT entry to data address. */
+#define R_ARM_JUMP_SLOT 22 /* Set GOT entry to code address. */
+#define R_ARM_RELATIVE 23 /* Add load address of shared object. */
+#define R_ARM_GOTOFF 24 /* Add GOT-relative symbol address. */
+#define R_ARM_GOTPC 25 /* Add PC-relative GOT table address. */
+#define R_ARM_GOT32 26 /* Add PC-relative GOT offset. */
+#define R_ARM_PLT32 27 /* Add PC-relative PLT offset. */
+#define R_ARM_GNU_VTENTRY 100
+#define R_ARM_GNU_VTINHERIT 101
+#define R_ARM_RSBREL32 250
+#define R_ARM_THM_RPC22 251
+#define R_ARM_RREL32 252
+#define R_ARM_RABS32 253
+#define R_ARM_RPC24 254
+#define R_ARM_RBASE 255
+
+/* Name Value Field Calculation */
+#define R_IA_64_NONE 0 /* None */
+#define R_IA_64_IMM14 0x21 /* immediate14 S + A */
+#define R_IA_64_IMM22 0x22 /* immediate22 S + A */
+#define R_IA_64_IMM64 0x23 /* immediate64 S + A */
+#define R_IA_64_DIR32MSB 0x24 /* word32 MSB S + A */
+#define R_IA_64_DIR32LSB 0x25 /* word32 LSB S + A */
+#define R_IA_64_DIR64MSB 0x26 /* word64 MSB S + A */
+#define R_IA_64_DIR64LSB 0x27 /* word64 LSB S + A */
+#define R_IA_64_GPREL22 0x2a /* immediate22 @gprel(S + A) */
+#define R_IA_64_GPREL64I 0x2b /* immediate64 @gprel(S + A) */
+#define R_IA_64_GPREL32MSB 0x2c /* word32 MSB @gprel(S + A) */
+#define R_IA_64_GPREL32LSB 0x2d /* word32 LSB @gprel(S + A) */
+#define R_IA_64_GPREL64MSB 0x2e /* word64 MSB @gprel(S + A) */
+#define R_IA_64_GPREL64LSB 0x2f /* word64 LSB @gprel(S + A) */
+#define R_IA_64_LTOFF22 0x32 /* immediate22 @ltoff(S + A) */
+#define R_IA_64_LTOFF64I 0x33 /* immediate64 @ltoff(S + A) */
+#define R_IA_64_PLTOFF22 0x3a /* immediate22 @pltoff(S + A) */
+#define R_IA_64_PLTOFF64I 0x3b /* immediate64 @pltoff(S + A) */
+#define R_IA_64_PLTOFF64MSB 0x3e /* word64 MSB @pltoff(S + A) */
+#define R_IA_64_PLTOFF64LSB 0x3f /* word64 LSB @pltoff(S + A) */
+#define R_IA_64_FPTR64I 0x43 /* immediate64 @fptr(S + A) */
+#define R_IA_64_FPTR32MSB 0x44 /* word32 MSB @fptr(S + A) */
+#define R_IA_64_FPTR32LSB 0x45 /* word32 LSB @fptr(S + A) */
+#define R_IA_64_FPTR64MSB 0x46 /* word64 MSB @fptr(S + A) */
+#define R_IA_64_FPTR64LSB 0x47 /* word64 LSB @fptr(S + A) */
+#define R_IA_64_PCREL60B 0x48 /* immediate60 form1 S + A - P */
+#define R_IA_64_PCREL21B 0x49 /* immediate21 form1 S + A - P */
+#define R_IA_64_PCREL21M 0x4a /* immediate21 form2 S + A - P */
+#define R_IA_64_PCREL21F 0x4b /* immediate21 form3 S + A - P */
+#define R_IA_64_PCREL32MSB 0x4c /* word32 MSB S + A - P */
+#define R_IA_64_PCREL32LSB 0x4d /* word32 LSB S + A - P */
+#define R_IA_64_PCREL64MSB 0x4e /* word64 MSB S + A - P */
+#define R_IA_64_PCREL64LSB 0x4f /* word64 LSB S + A - P */
+#define R_IA_64_LTOFF_FPTR22 0x52 /* immediate22 @ltoff(@fptr(S + A)) */
+#define R_IA_64_LTOFF_FPTR64I 0x53 /* immediate64 @ltoff(@fptr(S + A)) */
+#define R_IA_64_LTOFF_FPTR32MSB 0x54 /* word32 MSB @ltoff(@fptr(S + A)) */
+#define R_IA_64_LTOFF_FPTR32LSB 0x55 /* word32 LSB @ltoff(@fptr(S + A)) */
+#define R_IA_64_LTOFF_FPTR64MSB 0x56 /* word64 MSB @ltoff(@fptr(S + A)) */
+#define R_IA_64_LTOFF_FPTR64LSB 0x57 /* word64 LSB @ltoff(@fptr(S + A)) */
+#define R_IA_64_SEGREL32MSB 0x5c /* word32 MSB @segrel(S + A) */
+#define R_IA_64_SEGREL32LSB 0x5d /* word32 LSB @segrel(S + A) */
+#define R_IA_64_SEGREL64MSB 0x5e /* word64 MSB @segrel(S + A) */
+#define R_IA_64_SEGREL64LSB 0x5f /* word64 LSB @segrel(S + A) */
+#define R_IA_64_SECREL32MSB 0x64 /* word32 MSB @secrel(S + A) */
+#define R_IA_64_SECREL32LSB 0x65 /* word32 LSB @secrel(S + A) */
+#define R_IA_64_SECREL64MSB 0x66 /* word64 MSB @secrel(S + A) */
+#define R_IA_64_SECREL64LSB 0x67 /* word64 LSB @secrel(S + A) */
+#define R_IA_64_REL32MSB 0x6c /* word32 MSB BD + A */
+#define R_IA_64_REL32LSB 0x6d /* word32 LSB BD + A */
+#define R_IA_64_REL64MSB 0x6e /* word64 MSB BD + A */
+#define R_IA_64_REL64LSB 0x6f /* word64 LSB BD + A */
+#define R_IA_64_LTV32MSB 0x74 /* word32 MSB S + A */
+#define R_IA_64_LTV32LSB 0x75 /* word32 LSB S + A */
+#define R_IA_64_LTV64MSB 0x76 /* word64 MSB S + A */
+#define R_IA_64_LTV64LSB 0x77 /* word64 LSB S + A */
+#define R_IA_64_PCREL21BI 0x79 /* immediate21 form1 S + A - P */
+#define R_IA_64_PCREL22 0x7a /* immediate22 S + A - P */
+#define R_IA_64_PCREL64I 0x7b /* immediate64 S + A - P */
+#define R_IA_64_IPLTMSB 0x80 /* function descriptor MSB special */
+#define R_IA_64_IPLTLSB 0x81 /* function descriptor LSB speciaal */
+#define R_IA_64_SUB 0x85 /* immediate64 A - S */
+#define R_IA_64_LTOFF22X 0x86 /* immediate22 special */
+#define R_IA_64_LDXMOV 0x87 /* immediate22 special */
+#define R_IA_64_TPREL14 0x91 /* imm14 @tprel(S + A) */
+#define R_IA_64_TPREL22 0x92 /* imm22 @tprel(S + A) */
+#define R_IA_64_TPREL64I 0x93 /* imm64 @tprel(S + A) */
+#define R_IA_64_TPREL64MSB 0x96 /* word64 MSB @tprel(S + A) */
+#define R_IA_64_TPREL64LSB 0x97 /* word64 LSB @tprel(S + A) */
+#define R_IA_64_LTOFF_TPREL22 0x9a /* imm22 @ltoff(@tprel(S+A)) */
+#define R_IA_64_DTPMOD64MSB 0xa6 /* word64 MSB @dtpmod(S + A) */
+#define R_IA_64_DTPMOD64LSB 0xa7 /* word64 LSB @dtpmod(S + A) */
+#define R_IA_64_LTOFF_DTPMOD22 0xaa /* imm22 @ltoff(@dtpmod(S+A)) */
+#define R_IA_64_DTPREL14 0xb1 /* imm14 @dtprel(S + A) */
+#define R_IA_64_DTPREL22 0xb2 /* imm22 @dtprel(S + A) */
+#define R_IA_64_DTPREL64I 0xb3 /* imm64 @dtprel(S + A) */
+#define R_IA_64_DTPREL32MSB 0xb4 /* word32 MSB @dtprel(S + A) */
+#define R_IA_64_DTPREL32LSB 0xb5 /* word32 LSB @dtprel(S + A) */
+#define R_IA_64_DTPREL64MSB 0xb6 /* word64 MSB @dtprel(S + A) */
+#define R_IA_64_DTPREL64LSB 0xb7 /* word64 LSB @dtprel(S + A) */
+#define R_IA_64_LTOFF_DTPREL22 0xba /* imm22 @ltoff(@dtprel(S+A)) */
+
+#define R_MIPS_NONE 0 /* No reloc */
+#define R_MIPS_16 1 /* Direct 16 bit */
+#define R_MIPS_32 2 /* Direct 32 bit */
+#define R_MIPS_REL32 3 /* PC relative 32 bit */
+#define R_MIPS_26 4 /* Direct 26 bit shifted */
+#define R_MIPS_HI16 5 /* High 16 bit */
+#define R_MIPS_LO16 6 /* Low 16 bit */
+#define R_MIPS_GPREL16 7 /* GP relative 16 bit */
+#define R_MIPS_LITERAL 8 /* 16 bit literal entry */
+#define R_MIPS_GOT16 9 /* 16 bit GOT entry */
+#define R_MIPS_PC16 10 /* PC relative 16 bit */
+#define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */
+#define R_MIPS_GPREL32 12 /* GP relative 32 bit */
+#define R_MIPS_64 18 /* Direct 64 bit */
+#define R_MIPS_GOT_DISP 19
+#define R_MIPS_GOT_PAGE 20
+#define R_MIPS_GOT_OFST 21
+#define R_MIPS_GOT_HI16 22 /* GOT HI 16 bit */
+#define R_MIPS_GOT_LO16 23 /* GOT LO 16 bit */
+#define R_MIPS_SUB 24
+#define R_MIPS_CALLHI16 30 /* upper 16 bit GOT entry for function */
+#define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */
+#define R_MIPS_JALR 37
+#define R_MIPS_TLS_GD 42
+
+#define R_PPC_NONE 0 /* No relocation. */
+#define R_PPC_ADDR32 1
+#define R_PPC_ADDR24 2
+#define R_PPC_ADDR16 3
+#define R_PPC_ADDR16_LO 4
+#define R_PPC_ADDR16_HI 5
+#define R_PPC_ADDR16_HA 6
+#define R_PPC_ADDR14 7
+#define R_PPC_ADDR14_BRTAKEN 8
+#define R_PPC_ADDR14_BRNTAKEN 9
+#define R_PPC_REL24 10
+#define R_PPC_REL14 11
+#define R_PPC_REL14_BRTAKEN 12
+#define R_PPC_REL14_BRNTAKEN 13
+#define R_PPC_GOT16 14
+#define R_PPC_GOT16_LO 15
+#define R_PPC_GOT16_HI 16
+#define R_PPC_GOT16_HA 17
+#define R_PPC_PLTREL24 18
+#define R_PPC_COPY 19
+#define R_PPC_GLOB_DAT 20
+#define R_PPC_JMP_SLOT 21
+#define R_PPC_RELATIVE 22
+#define R_PPC_LOCAL24PC 23
+#define R_PPC_UADDR32 24
+#define R_PPC_UADDR16 25
+#define R_PPC_REL32 26
+#define R_PPC_PLT32 27
+#define R_PPC_PLTREL32 28
+#define R_PPC_PLT16_LO 29
+#define R_PPC_PLT16_HI 30
+#define R_PPC_PLT16_HA 31
+#define R_PPC_SDAREL16 32
+#define R_PPC_SECTOFF 33
+#define R_PPC_SECTOFF_LO 34
+#define R_PPC_SECTOFF_HI 35
+#define R_PPC_SECTOFF_HA 36
+
+/*
+ * 64-bit relocations
+ */
+#define R_PPC64_ADDR64 38
+#define R_PPC64_ADDR16_HIGHER 39
+#define R_PPC64_ADDR16_HIGHERA 40
+#define R_PPC64_ADDR16_HIGHEST 41
+#define R_PPC64_ADDR16_HIGHESTA 42
+#define R_PPC64_UADDR64 43
+#define R_PPC64_REL64 44
+#define R_PPC64_PLT64 45
+#define R_PPC64_PLTREL64 46
+#define R_PPC64_TOC16 47
+#define R_PPC64_TOC16_LO 48
+#define R_PPC64_TOC16_HI 49
+#define R_PPC64_TOC16_HA 50
+#define R_PPC64_TOC 51
+#define R_PPC64_DTPMOD64 68
+#define R_PPC64_TPREL64 73
+#define R_PPC64_DTPREL64 78
+
+/*
+ * TLS relocations
+ */
+#define R_PPC_TLS 67
+#define R_PPC_DTPMOD32 68
+#define R_PPC_TPREL16 69
+#define R_PPC_TPREL16_LO 70
+#define R_PPC_TPREL16_HI 71
+#define R_PPC_TPREL16_HA 72
+#define R_PPC_TPREL32 73
+#define R_PPC_DTPREL16 74
+#define R_PPC_DTPREL16_LO 75
+#define R_PPC_DTPREL16_HI 76
+#define R_PPC_DTPREL16_HA 77
+#define R_PPC_DTPREL32 78
+#define R_PPC_GOT_TLSGD16 79
+#define R_PPC_GOT_TLSGD16_LO 80
+#define R_PPC_GOT_TLSGD16_HI 81
+#define R_PPC_GOT_TLSGD16_HA 82
+#define R_PPC_GOT_TLSLD16 83
+#define R_PPC_GOT_TLSLD16_LO 84
+#define R_PPC_GOT_TLSLD16_HI 85
+#define R_PPC_GOT_TLSLD16_HA 86
+#define R_PPC_GOT_TPREL16 87
+#define R_PPC_GOT_TPREL16_LO 88
+#define R_PPC_GOT_TPREL16_HI 89
+#define R_PPC_GOT_TPREL16_HA 90
+
+/*
+ * The remaining relocs are from the Embedded ELF ABI, and are not in the
+ * SVR4 ELF ABI.
+ */
+
+#define R_PPC_EMB_NADDR32 101
+#define R_PPC_EMB_NADDR16 102
+#define R_PPC_EMB_NADDR16_LO 103
+#define R_PPC_EMB_NADDR16_HI 104
+#define R_PPC_EMB_NADDR16_HA 105
+#define R_PPC_EMB_SDAI16 106
+#define R_PPC_EMB_SDA2I16 107
+#define R_PPC_EMB_SDA2REL 108
+#define R_PPC_EMB_SDA21 109
+#define R_PPC_EMB_MRKREF 110
+#define R_PPC_EMB_RELSEC16 111
+#define R_PPC_EMB_RELST_LO 112
+#define R_PPC_EMB_RELST_HI 113
+#define R_PPC_EMB_RELST_HA 114
+#define R_PPC_EMB_BIT_FLD 115
+#define R_PPC_EMB_RELSDA 116
+
+/*
+ * RISC-V relocation types.
+ */
+
+/* Relocation types used by the dynamic linker. */
+#define R_RISCV_NONE 0
+#define R_RISCV_32 1
+#define R_RISCV_64 2
+#define R_RISCV_RELATIVE 3
+#define R_RISCV_COPY 4
+#define R_RISCV_JUMP_SLOT 5
+#define R_RISCV_TLS_DTPMOD32 6
+#define R_RISCV_TLS_DTPMOD64 7
+#define R_RISCV_TLS_DTPREL32 8
+#define R_RISCV_TLS_DTPREL64 9
+#define R_RISCV_TLS_TPREL32 10
+#define R_RISCV_TLS_TPREL64 11
+
+/* Relocation types not used by the dynamic linker. */
+#define R_RISCV_BRANCH 16
+#define R_RISCV_JAL 17
+#define R_RISCV_CALL 18
+#define R_RISCV_CALL_PLT 19
+#define R_RISCV_GOT_HI20 20
+#define R_RISCV_TLS_GOT_HI20 21
+#define R_RISCV_TLS_GD_HI20 22
+#define R_RISCV_PCREL_HI20 23
+#define R_RISCV_PCREL_LO12_I 24
+#define R_RISCV_PCREL_LO12_S 25
+#define R_RISCV_HI20 26
+#define R_RISCV_LO12_I 27
+#define R_RISCV_LO12_S 28
+#define R_RISCV_TPREL_HI20 29
+#define R_RISCV_TPREL_LO12_I 30
+#define R_RISCV_TPREL_LO12_S 31
+#define R_RISCV_TPREL_ADD 32
+#define R_RISCV_ADD8 33
+#define R_RISCV_ADD16 34
+#define R_RISCV_ADD32 35
+#define R_RISCV_ADD64 36
+#define R_RISCV_SUB8 37
+#define R_RISCV_SUB16 38
+#define R_RISCV_SUB32 39
+#define R_RISCV_SUB64 40
+#define R_RISCV_GNU_VTINHERIT 41
+#define R_RISCV_GNU_VTENTRY 42
+#define R_RISCV_ALIGN 43
+#define R_RISCV_RVC_BRANCH 44
+#define R_RISCV_RVC_JUMP 45
+
+#define R_SPARC_NONE 0
+#define R_SPARC_8 1
+#define R_SPARC_16 2
+#define R_SPARC_32 3
+#define R_SPARC_DISP8 4
+#define R_SPARC_DISP16 5
+#define R_SPARC_DISP32 6
+#define R_SPARC_WDISP30 7
+#define R_SPARC_WDISP22 8
+#define R_SPARC_HI22 9
+#define R_SPARC_22 10
+#define R_SPARC_13 11
+#define R_SPARC_LO10 12
+#define R_SPARC_GOT10 13
+#define R_SPARC_GOT13 14
+#define R_SPARC_GOT22 15
+#define R_SPARC_PC10 16
+#define R_SPARC_PC22 17
+#define R_SPARC_WPLT30 18
+#define R_SPARC_COPY 19
+#define R_SPARC_GLOB_DAT 20
+#define R_SPARC_JMP_SLOT 21
+#define R_SPARC_RELATIVE 22
+#define R_SPARC_UA32 23
+#define R_SPARC_PLT32 24
+#define R_SPARC_HIPLT22 25
+#define R_SPARC_LOPLT10 26
+#define R_SPARC_PCPLT32 27
+#define R_SPARC_PCPLT22 28
+#define R_SPARC_PCPLT10 29
+#define R_SPARC_10 30
+#define R_SPARC_11 31
+#define R_SPARC_64 32
+#define R_SPARC_OLO10 33
+#define R_SPARC_HH22 34
+#define R_SPARC_HM10 35
+#define R_SPARC_LM22 36
+#define R_SPARC_PC_HH22 37
+#define R_SPARC_PC_HM10 38
+#define R_SPARC_PC_LM22 39
+#define R_SPARC_WDISP16 40
+#define R_SPARC_WDISP19 41
+#define R_SPARC_GLOB_JMP 42
+#define R_SPARC_7 43
+#define R_SPARC_5 44
+#define R_SPARC_6 45
+#define R_SPARC_DISP64 46
+#define R_SPARC_PLT64 47
+#define R_SPARC_HIX22 48
+#define R_SPARC_LOX10 49
+#define R_SPARC_H44 50
+#define R_SPARC_M44 51
+#define R_SPARC_L44 52
+#define R_SPARC_REGISTER 53
+#define R_SPARC_UA64 54
+#define R_SPARC_UA16 55
+#define R_SPARC_TLS_GD_HI22 56
+#define R_SPARC_TLS_GD_LO10 57
+#define R_SPARC_TLS_GD_ADD 58
+#define R_SPARC_TLS_GD_CALL 59
+#define R_SPARC_TLS_LDM_HI22 60
+#define R_SPARC_TLS_LDM_LO10 61
+#define R_SPARC_TLS_LDM_ADD 62
+#define R_SPARC_TLS_LDM_CALL 63
+#define R_SPARC_TLS_LDO_HIX22 64
+#define R_SPARC_TLS_LDO_LOX10 65
+#define R_SPARC_TLS_LDO_ADD 66
+#define R_SPARC_TLS_IE_HI22 67
+#define R_SPARC_TLS_IE_LO10 68
+#define R_SPARC_TLS_IE_LD 69
+#define R_SPARC_TLS_IE_LDX 70
+#define R_SPARC_TLS_IE_ADD 71
+#define R_SPARC_TLS_LE_HIX22 72
+#define R_SPARC_TLS_LE_LOX10 73
+#define R_SPARC_TLS_DTPMOD32 74
+#define R_SPARC_TLS_DTPMOD64 75
+#define R_SPARC_TLS_DTPOFF32 76
+#define R_SPARC_TLS_DTPOFF64 77
+#define R_SPARC_TLS_TPOFF32 78
+#define R_SPARC_TLS_TPOFF64 79
+
+#define R_X86_64_NONE 0 /* No relocation. */
+#define R_X86_64_64 1 /* Add 64 bit symbol value. */
+#define R_X86_64_PC32 2 /* PC-relative 32 bit signed sym value. */
+#define R_X86_64_GOT32 3 /* PC-relative 32 bit GOT offset. */
+#define R_X86_64_PLT32 4 /* PC-relative 32 bit PLT offset. */
+#define R_X86_64_COPY 5 /* Copy data from shared object. */
+#define R_X86_64_GLOB_DAT 6 /* Set GOT entry to data address. */
+#define R_X86_64_JMP_SLOT 7 /* Set GOT entry to code address. */
+#define R_X86_64_RELATIVE 8 /* Add load address of shared object. */
+#define R_X86_64_GOTPCREL 9 /* Add 32 bit signed pcrel offset to GOT. */
+#define R_X86_64_32 10 /* Add 32 bit zero extended symbol value */
+#define R_X86_64_32S 11 /* Add 32 bit sign extended symbol value */
+#define R_X86_64_16 12 /* Add 16 bit zero extended symbol value */
+#define R_X86_64_PC16 13 /* Add 16 bit signed extended pc relative symbol value */
+#define R_X86_64_8 14 /* Add 8 bit zero extended symbol value */
+#define R_X86_64_PC8 15 /* Add 8 bit signed extended pc relative symbol value */
+#define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */
+#define R_X86_64_DTPOFF64 17 /* Offset in TLS block */
+#define R_X86_64_TPOFF64 18 /* Offset in static TLS block */
+#define R_X86_64_TLSGD 19 /* PC relative offset to GD GOT entry */
+#define R_X86_64_TLSLD 20 /* PC relative offset to LD GOT entry */
+#define R_X86_64_DTPOFF32 21 /* Offset in TLS block */
+#define R_X86_64_GOTTPOFF 22 /* PC relative offset to IE GOT entry */
+#define R_X86_64_TPOFF32 23 /* Offset in static TLS block */
+#define R_X86_64_PC64 24 /* PC-relative 64 bit signed sym value. */
+#define R_X86_64_GOTOFF64 25
+#define R_X86_64_GOTPC32 26
+#define R_X86_64_GOT64 27
+#define R_X86_64_GOTPCREL64 28
+#define R_X86_64_GOTPC64 29
+#define R_X86_64_GOTPLT64 30
+#define R_X86_64_PLTOFF64 31
+#define R_X86_64_SIZE32 32
+#define R_X86_64_SIZE64 33
+#define R_X86_64_GOTPC32_TLSDESC 34
+#define R_X86_64_TLSDESC_CALL 35
+#define R_X86_64_TLSDESC 36
+#define R_X86_64_IRELATIVE 37
+
+
+#endif /* !_FREEBSD_ELF_SYS_ELF_COMMON_H_ */
diff --git a/third_party/googletest b/third_party/googletest
new file mode 160000
+Subproject 565f1b848215b77c3732bca345fe76a0431d8b3
diff --git a/third_party/protobuf b/third_party/protobuf
new file mode 160000
+Subproject bc1773c42c9c3c522145a3119e989e0dff2a8d5
diff --git a/third_party/re2 b/third_party/re2
new file mode 160000
+Subproject 5bd613749fd530b576b890283bfb6bc6ea6246c