diff options
Diffstat (limited to 'abseil-cpp/absl/strings/internal/cord_internal.h')
-rw-r--r-- | abseil-cpp/absl/strings/internal/cord_internal.h | 814 |
1 files changed, 779 insertions, 35 deletions
diff --git a/abseil-cpp/absl/strings/internal/cord_internal.h b/abseil-cpp/absl/strings/internal/cord_internal.h index d456eef..20dd008 100644 --- a/abseil-cpp/absl/strings/internal/cord_internal.h +++ b/abseil-cpp/absl/strings/internal/cord_internal.h @@ -1,4 +1,4 @@ -// Copyright 2020 The Abseil Authors. +// Copyright 2021 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,46 +21,162 @@ #include <cstdint> #include <type_traits> +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" #include "absl/base/internal/invoke.h" +#include "absl/base/optimization.h" #include "absl/container/internal/compressed_tuple.h" +#include "absl/container/internal/container_memory.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" +// We can only add poisoning if we can detect consteval executions. +#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \ + (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER)) +#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1 +#endif + +#define ABSL_CORD_INTERNAL_NO_SANITIZE \ + ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY + namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// Wraps std::atomic for reference counting. -class Refcount { +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. +struct CordRep; +struct CordRepConcat; +struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +struct CordRepCrc; +class CordRepRing; +class CordRepBtree; + +class CordzInfo; + +// Default feature enable states for cord ring buffers +enum CordFeatureDefaults { + kCordEnableRingBufferDefault = false, + kCordShallowSubcordsDefault = false +}; + +extern std::atomic<bool> cord_ring_buffer_enabled; +extern std::atomic<bool> shallow_subcords_enabled; + +inline void enable_cord_ring_buffer(bool enable) { + cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_shallow_subcords(bool enable) { + shallow_subcords_enabled.store(enable, std::memory_order_relaxed); +} + +enum Constants { + // The inlined size to use with absl::InlinedVector. + // + // Note: The InlinedVectors in this file (and in cord.h) do not need to use + // the same value for their inlined size. The fact that they do is historical. + // It may be desirable for each to use a different inlined size optimized for + // that InlinedVector's usage. + // + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + kInlinedVectorSize = 47, + + // Prefer copying blocks of at most this size, otherwise reference count. + kMaxBytesToCopy = 511 +}; + +// Emits a fatal error "Unexpected node type: xyz" and aborts the program. +ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 15 bytes. +template <bool nullify_tail = false> +inline void SmallMemmove(char* dst, const char* src, size_t n) { + if (n >= 8) { + assert(n <= 15); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 7, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 7, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +// Compact class for tracking the reference count and state flags for CordRep +// instances. Data is stored in an atomic int32_t for compactness and speed. +class RefcountAndFlags { public: - Refcount() : count_{1} {} - ~Refcount() {} + constexpr RefcountAndFlags() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} - // Increments the reference count by 1. Imposes no memory ordering. - inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } // Asserts that the current refcount is greater than 0. If the refcount is - // greater than 1, decrements the reference count by 1. + // greater than 1, decrements the reference count. // // Returns false if there are no references outstanding; true otherwise. // Inserts barriers to ensure that state written before this method returns // false will be visible to a thread that just observed this method returning - // false. + // false. Always returns false when the immortal bit is set. inline bool Decrement() { int32_t refcount = count_.load(std::memory_order_acquire); - assert(refcount > 0); - return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement && + (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kHighRefcountMask) != 0; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { - int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); - assert(refcount > 0); - return refcount != 1; + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); + assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag); + return (refcount & kHighRefcountMask) != 0; } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + inline size_t Get() const { + return static_cast<size_t>(count_.load(std::memory_order_acquire) >> + kNumFlags); + } // Returns whether the atomic integer is 1. // If the reference count is used in the conventional way, a @@ -69,51 +185,174 @@ class Refcount { // This call performs the test for a reference count of one, and // performs the memory barrier needed for the owning thread // to act on the object, knowing that it has exclusive access to the - // object. - inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + // object. Always returns false when the immortal bit is set. + inline bool IsOne() { + return (count_.load(std::memory_order_acquire) & kRefcountMask) == + kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; + } private: + // We reserve the bottom bits for flags. + // kImmortalBit indicates that this entity should never be collected; it is + // used for the StringConstant constructor to avoid collecting immutable + // constant cords. + // kReservedFlag is reserved for future use. + enum Flags { + kNumFlags = 2, + + kImmortalFlag = 0x1, + kReservedFlag = 0x2, + kRefIncrement = (1 << kNumFlags), + + // Bitmask to use when checking refcount by equality. This masks out + // all flags except kImmortalFlag, which is part of the refcount for + // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 + // if the immortal bit is set.) + kRefcountMask = ~kReservedFlag, + + // Bitmask to use when checking if refcount is equal to 1 and not + // immortal when decrementing the refcount. This masks out kRefIncrement and + // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we + // assume the refcount is equal to 1, since we know it's not immortal and + // not greater than 1. If the masked RefcountAndFlags is not 0, we can + // assume the refcount is not equal to 1 since either a higher bit in the + // refcount is set, or kImmortal is set. + kHighRefcountMask = kRefcountMask & ~kRefIncrement, + }; + std::atomic<int32_t> count_; }; -// The overhead of a vtable is too much for Cord, so we roll our own subclasses -// using only a single byte to differentiate classes from each other - the "tag" -// byte. Define the subclasses first so we can provide downcasting helper -// functions in the base class. +// Various representations that we allow +enum CordRepKind { + UNUSED_0 = 0, + SUBSTRING = 1, + CRC = 2, + BTREE = 3, + RING = 4, + EXTERNAL = 5, -struct CordRepConcat; -struct CordRepSubstring; -struct CordRepExternal; + // We have different tags for different sized flat arrays, + // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an + // allocated range of 32 bytes to 256 KB. The current granularity is: + // - 8 byte granularity for flat sizes in [32 - 512] + // - 64 byte granularity for flat sizes in (512 - 8KiB] + // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB] + // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should + // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still + // represents the minimum flat allocation size. (32 bytes as of now). + FLAT = 6, + MAX_FLAT_TAG = 248 +}; + +// There are various locations where we want to check if some rep is a 'plain' +// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we +// can perform this check in a single branch as 'tag >= EXTERNAL' +// Likewise, we have some locations where we check for 'ring or external/flat', +// so likewise align RING to EXTERNAL. +// Note that we can leave this optimization to the compiler. The compiler will +// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { + // Result from an `extract edge` operation. Contains the (possibly changed) + // tree node as well as the extracted edge, or {tree, nullptr} if no edge + // could be extracted. + // On success, the returned `tree` value is null if `extracted` was the only + // data edge inside the tree, a data edge if there were only two data edges in + // the tree, or the (possibly new / smaller) remaining tree with the extracted + // data edge removed. + struct ExtractResult { + CordRep* tree; + CordRep* extracted; + }; + + CordRep() = default; + constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} + // The following three fields have to be less than 32 bytes since - // that is the smallest supported flat node size. + // that is the smallest supported flat node size. Some code optimizations rely + // on the specific layout of these fields. Notably: the non-trivial field + // `refcount` being preceded by `length`, and being tailed by POD data + // members only. + // # LINT.IfChange size_t length; - Refcount refcount; + RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. uint8_t tag; - char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep - inline CordRepConcat* concat(); - inline const CordRepConcat* concat() const; + // `storage` provides two main purposes: + // - the starting point for FlatCordRep.Data() [flexible-array-member] + // - 3 bytes of additional storage for use by derived classes. + // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores + // a 'depth' value in storage[0], and the (future) CordRepBtree class stores + // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to + // allocate room for these in the derived class, as not all compilers reuse + // padding space from the base class (clang and gcc do, MSVC does not, etc) + uint8_t storage[3]; + // # LINT.ThenChange(cord_rep_btree.h:copy_raw) + + // Returns true if this instance's tag matches the requested type. + constexpr bool IsRing() const { return tag == RING; } + constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsCrc() const { return tag == CRC; } + constexpr bool IsExternal() const { return tag == EXTERNAL; } + constexpr bool IsFlat() const { return tag >= FLAT; } + constexpr bool IsBtree() const { return tag == BTREE; } + + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; + inline CordRepCrc* crc(); + inline const CordRepCrc* crc() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; -}; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; + + // -------------------------------------------------------------------- + // Memory management + + // Destroys the provided `rep`. + static void Destroy(CordRep* rep); -struct CordRepConcat : public CordRep { - CordRep* left; - CordRep* right; + // Increments the reference count of `rep`. + // Requires `rep` to be a non-null pointer value. + static inline CordRep* Ref(CordRep* rep); - uint8_t depth() const { return static_cast<uint8_t>(data[0]); } - void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } + // Decrements the reference count of `rep`. Destroys rep if count reaches + // zero. Requires `rep` to be a non-null pointer value. + static inline void Unref(CordRep* rep); }; struct CordRepSubstring : public CordRep { size_t start; // Starting offset of substring in child CordRep* child; + + // Creates a substring on `child`, adopting a reference on `child`. + // Requires `child` to be either a flat or external node, and `pos` and `n` to + // form a non-empty partial sub range of `'child`, i.e.: + // `n > 0 && n < length && n + pos <= length` + static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n); + + // Creates a substring of `rep`. Does not adopt a reference on `rep`. + // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`. + // If `n == rep->length` then this method returns `CordRep::Ref(rep)` + // If `rep` is a substring of a flat or external node, then this method will + // return a new substring of that flat or external node with `pos` adjusted + // with the original `start` position. + static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n); }; // Type for function pointer that will invoke the releaser function and also @@ -124,9 +363,19 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*); // External CordReps are allocated together with a type erased releaser. The // releaser is stored in the memory directly following the CordRepExternal. struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(absl::string_view str) + : CordRep(RefcountAndFlags::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + const char* base; // Pointer to function that knows how to call and destroy the releaser. ExternalReleaserInvoker releaser_invoker; + + // Deletes (releases) the external rep. + // Requires rep != nullptr and rep->IsExternal() + static void Delete(CordRep* rep); }; struct Rank1 {}; @@ -167,7 +416,502 @@ struct CordRepExternalImpl } }; +inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos, + size_t n) { + assert(child != nullptr); + assert(n > 0); + assert(n < child->length); + assert(pos < child->length); + assert(n <= child->length - pos); + + // TODO(b/217376272): Harden internal logic. + // Move to strategical places inside the Cord logic and make this an assert. + if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) { + LogFatalNodeType(child); + } + + CordRepSubstring* rep = new CordRepSubstring(); + rep->length = n; + rep->tag = SUBSTRING; + rep->start = pos; + rep->child = child; + return rep; +} + +inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos, + size_t n) { + assert(rep != nullptr); + assert(n != 0); + assert(pos < rep->length); + assert(n <= rep->length - pos); + if (n == rep->length) return CordRep::Ref(rep); + if (rep->IsSubstring()) { + pos += rep->substring()->start; + rep = rep->substring()->child; + } + CordRepSubstring* substr = new CordRepSubstring(); + substr->length = n; + substr->tag = SUBSTRING; + substr->start = pos; + substr->child = CordRep::Ref(rep); + return substr; +} + +inline void CordRepExternal::Delete(CordRep* rep) { + assert(rep != nullptr && rep->IsExternal()); + auto* rep_external = static_cast<CordRepExternal*>(rep); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); +} + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; +}; + +template <typename Str> +ABSL_CONST_INIT CordRepExternal + ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, +}; + +constexpr char GetOrNull(absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +// We store cordz_info as 64 bit pointer value in little endian format. This +// guarantees that the least significant byte of cordz_info matches the first +// byte of the inline data representation in `data`, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `data` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// LittleEndianByte() creates a little endian representation of 'value', i.e.: +// a little endian value where the first byte in the host's representation +// holds 'value`, with all other bytes being 0. +static constexpr cordz_info_t LittleEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#else + return value; +#endif +} + +class InlineData { + public: + // DefaultInitType forces the use of the default initialization constructor. + enum DefaultInitType { kDefaultInit }; + + // kNullCordzInfo holds the little endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1); + + // kTagOffset contains the offset of the control byte / tag. This constant is + // intended mostly for debugging purposes: do not remove this constant as it + // is actively inspected and used by gdb pretty printing code. + static constexpr size_t kTagOffset = 0; + + // Implement `~InlineData()` conditionally: we only need this destructor to + // unpoison poisoned instances under *SAN, and it will only compile correctly + // if the current compiler supports `absl::is_constant_evaluated()`. +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + ~InlineData() noexcept { unpoison(); } +#endif + + constexpr InlineData() noexcept { poison_this(); } + + explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) { + poison_this(); + } + + explicit InlineData(CordRep* rep) noexcept : rep_(rep) { + ABSL_ASSERT(rep != nullptr); + } + + // Explicit constexpr constructor to create a constexpr InlineData + // value. Creates an inlined SSO value if `rep` is null, otherwise + // creates a tree instance value. + constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept + : rep_(rep ? Rep(rep) : Rep(sv)) { + poison(); + } + + constexpr InlineData(const InlineData& rhs) noexcept; + InlineData& operator=(const InlineData& rhs) noexcept; + + friend bool operator==(const InlineData& lhs, const InlineData& rhs) { +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + const Rep l = lhs.rep_.SanitizerSafeCopy(); + const Rep r = rhs.rep_.SanitizerSafeCopy(); + return memcmp(&l, &r, sizeof(l)) == 0; +#else + return memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +#endif + } + friend bool operator!=(const InlineData& lhs, const InlineData& rhs) { + return !operator==(lhs, rhs); + } + + // Poisons the unused inlined SSO data if the current instance + // is inlined, else un-poisons the entire instance. + constexpr void poison(); + + // Un-poisons this instance. + constexpr void unpoison(); + + // Poisons the current instance. This is used on default initialization. + constexpr void poison_this(); + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return rep_.tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (rep_.tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return rep_.cordz_info() != kNullCordzInfo; + } + + // Returns true if either of the provided instances hold a cordz_info value. + // This method is more efficient than the equivalent `data1.is_profiled() || + // data2.is_profiled()`. Requires both arguments to hold a tree. + static bool is_either_profiled(const InlineData& data1, + const InlineData& data2) { + assert(data1.is_tree() && data2.is_tree()); + return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) != + kNullCordzInfo; + } + + // Returns the cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + CordzInfo* cordz_info() const { + assert(is_tree()); + intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64( + static_cast<uint64_t>(rep_.cordz_info()))); + assert(info & 1); + return reinterpret_cast<CordzInfo*>(info - 1); + } + + // Sets the current cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + void set_cordz_info(CordzInfo* cordz_info) { + assert(is_tree()); + uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; + rep_.set_cordz_info( + static_cast<cordz_info_t>(absl::little_endian::FromHost64(info))); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { + assert(is_tree()); + rep_.set_cordz_info(kNullCordzInfo); + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return rep_.as_chars(); + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return rep_.as_chars(); } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return rep_.tree(); + } + + void set_inline_data(const char* data, size_t n) { + ABSL_ASSERT(n <= kMaxInline); + unpoison(); + rep_.set_tag(static_cast<int8_t>(n << 1)); + SmallMemmove<true>(rep_.as_chars(), data, n); + poison(); + } + + void copy_max_inline_to(char* dst) const { + assert(!is_tree()); + memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline); + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + unpoison(); + rep_.make_tree(rep); + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + rep_.set_tree(rep); + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { return rep_.inline_size(); } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + unpoison(); + rep_.set_inline_size(size); + poison(); + } + + // Compares 'this' inlined data with rhs. The comparison is a straightforward + // lexicographic comparison. `Compare()` returns values as follows: + // + // -1 'this' InlineData instance is smaller + // 0 the InlineData instances are equal + // 1 'this' InlineData instance larger + int Compare(const InlineData& rhs) const { + return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy()); + } + + private: + struct Rep { + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree) {} + cordz_info_t cordz_info = kNullCordzInfo; + absl::cord_internal::CordRep* rep; + }; + + explicit Rep(DefaultInitType) {} + constexpr Rep() : data{0} {} + constexpr Rep(const Rep&) = default; + constexpr Rep& operator=(const Rep&) = default; + + explicit constexpr Rep(CordRep* rep) : as_tree(rep) {} + + explicit constexpr Rep(absl::string_view chars) + : data{static_cast<char>((chars.size() << 1)), + GetOrNull(chars, 0), + GetOrNull(chars, 1), + GetOrNull(chars, 2), + GetOrNull(chars, 3), + GetOrNull(chars, 4), + GetOrNull(chars, 5), + GetOrNull(chars, 6), + GetOrNull(chars, 7), + GetOrNull(chars, 8), + GetOrNull(chars, 9), + GetOrNull(chars, 10), + GetOrNull(chars, 11), + GetOrNull(chars, 12), + GetOrNull(chars, 13), + GetOrNull(chars, 14)} {} + + // Disable sanitizer as we must always be able to read `tag`. + ABSL_CORD_INTERNAL_NO_SANITIZE + int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; } + void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; } + + char* as_chars() { return data + 1; } + const char* as_chars() const { return data + 1; } + + bool is_tree() const { return (tag() & 1) != 0; } + + size_t inline_size() const { + ABSL_ASSERT(!is_tree()); + return static_cast<size_t>(tag()) >> 1; + } + + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + set_tag(static_cast<int8_t>(size << 1)); + } + + CordRep* tree() const { return as_tree.rep; } + void set_tree(CordRep* rhs) { as_tree.rep = rhs; } + + cordz_info_t cordz_info() const { return as_tree.cordz_info; } + void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; } + + void make_tree(CordRep* tree) { + as_tree.rep = tree; + as_tree.cordz_info = kNullCordzInfo; + } + +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + constexpr Rep SanitizerSafeCopy() const { + if (!absl::is_constant_evaluated()) { + Rep res; + if (is_tree()) { + res = *this; + } else { + res.set_tag(tag()); + memcpy(res.as_chars(), as_chars(), inline_size()); + } + return res; + } else { + return *this; + } + } +#else + constexpr const Rep& SanitizerSafeCopy() const { return *this; } +#endif + + // If the data has length <= kMaxInline, we store it in `data`, and + // store the size in the first char of `data` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero. + union { + char data[kMaxInline + 1]; + AsTree as_tree; + }; + }; + + // Private implementation of `Compare()` + static inline int Compare(const Rep& lhs, const Rep& rhs) { + uint64_t x, y; + memcpy(&x, lhs.as_chars(), sizeof(x)); + memcpy(&y, rhs.as_chars(), sizeof(y)); + if (x == y) { + memcpy(&x, lhs.as_chars() + 7, sizeof(x)); + memcpy(&y, rhs.as_chars() + 7, sizeof(y)); + if (x == y) { + if (lhs.inline_size() == rhs.inline_size()) return 0; + return lhs.inline_size() < rhs.inline_size() ? -1 : 1; + } + } + x = absl::big_endian::FromHost64(x); + y = absl::big_endian::FromHost64(y); + return x < y ? -1 : 1; + } + + Rep rep_; +}; + +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); + +#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData& rhs) noexcept + : rep_(rhs.rep_.SanitizerSafeCopy()) { + poison(); +} + +inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept { + unpoison(); + rep_ = rhs.rep_.SanitizerSafeCopy(); + poison(); + return *this; +} + +constexpr void InlineData::poison_this() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerPoisonObject(this); + } +} + +constexpr void InlineData::unpoison() { + if (!absl::is_constant_evaluated()) { + container_internal::SanitizerUnpoisonObject(this); + } +} + +constexpr void InlineData::poison() { + if (!absl::is_constant_evaluated()) { + if (is_tree()) { + container_internal::SanitizerUnpoisonObject(this); + } else if (const size_t size = inline_size()) { + if (size < kMaxInline) { + const char* end = rep_.as_chars() + size; + container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size); + } + } else { + container_internal::SanitizerPoisonObject(this); + } + } +} + +#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER + +constexpr InlineData::InlineData(const InlineData&) noexcept = default; +inline InlineData& InlineData::operator=(const InlineData&) noexcept = default; + +constexpr void InlineData::poison_this() {} +constexpr void InlineData::unpoison() {} +constexpr void InlineData::poison() {} + +#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER + +inline CordRepSubstring* CordRep::substring() { + assert(IsSubstring()); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(IsSubstring()); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(IsExternal()); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(IsExternal()); + return static_cast<const CordRepExternal*>(this); +} + +inline CordRep* CordRep::Ref(CordRep* rep) { + // ABSL_ASSUME is a workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585 + ABSL_ASSUME(rep != nullptr); + rep->refcount.Increment(); + return rep; +} + +inline void CordRep::Unref(CordRep* rep) { + assert(rep != nullptr); + // Expect refcount to be 0. Avoiding the cost of an atomic decrement should + // typically outweigh the cost of an extra branch checking for ref == 1. + if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { + Destroy(rep); + } +} + } // namespace cord_internal + ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ |