summaryrefslogtreecommitdiff
path: root/abseil-cpp/absl/strings/internal/cord_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'abseil-cpp/absl/strings/internal/cord_internal.h')
-rw-r--r--abseil-cpp/absl/strings/internal/cord_internal.h814
1 files changed, 779 insertions, 35 deletions
diff --git a/abseil-cpp/absl/strings/internal/cord_internal.h b/abseil-cpp/absl/strings/internal/cord_internal.h
index d456eef..20dd008 100644
--- a/abseil-cpp/absl/strings/internal/cord_internal.h
+++ b/abseil-cpp/absl/strings/internal/cord_internal.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The Abseil Authors.
+// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -21,46 +21,162 @@
#include <cstdint>
#include <type_traits>
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
+#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
+#include "absl/container/internal/container_memory.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
+// We can only add poisoning if we can detect consteval executions.
+#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
+ (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
+ defined(ABSL_HAVE_MEMORY_SANITIZER))
+#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
+#endif
+
+#define ABSL_CORD_INTERNAL_NO_SANITIZE \
+ ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
+
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
-// Wraps std::atomic for reference counting.
-class Refcount {
+// The overhead of a vtable is too much for Cord, so we roll our own subclasses
+// using only a single byte to differentiate classes from each other - the "tag"
+// byte. Define the subclasses first so we can provide downcasting helper
+// functions in the base class.
+struct CordRep;
+struct CordRepConcat;
+struct CordRepExternal;
+struct CordRepFlat;
+struct CordRepSubstring;
+struct CordRepCrc;
+class CordRepRing;
+class CordRepBtree;
+
+class CordzInfo;
+
+// Default feature enable states for cord ring buffers
+enum CordFeatureDefaults {
+ kCordEnableRingBufferDefault = false,
+ kCordShallowSubcordsDefault = false
+};
+
+extern std::atomic<bool> cord_ring_buffer_enabled;
+extern std::atomic<bool> shallow_subcords_enabled;
+
+inline void enable_cord_ring_buffer(bool enable) {
+ cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
+}
+
+inline void enable_shallow_subcords(bool enable) {
+ shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
+}
+
+enum Constants {
+ // The inlined size to use with absl::InlinedVector.
+ //
+ // Note: The InlinedVectors in this file (and in cord.h) do not need to use
+ // the same value for their inlined size. The fact that they do is historical.
+ // It may be desirable for each to use a different inlined size optimized for
+ // that InlinedVector's usage.
+ //
+ // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
+ // the inlined vector size (47 exists for backward compatibility).
+ kInlinedVectorSize = 47,
+
+ // Prefer copying blocks of at most this size, otherwise reference count.
+ kMaxBytesToCopy = 511
+};
+
+// Emits a fatal error "Unexpected node type: xyz" and aborts the program.
+ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
+
+// Fast implementation of memmove for up to 15 bytes. This implementation is
+// safe for overlapping regions. If nullify_tail is true, the destination is
+// padded with '\0' up to 15 bytes.
+template <bool nullify_tail = false>
+inline void SmallMemmove(char* dst, const char* src, size_t n) {
+ if (n >= 8) {
+ assert(n <= 15);
+ uint64_t buf1;
+ uint64_t buf2;
+ memcpy(&buf1, src, 8);
+ memcpy(&buf2, src + n - 8, 8);
+ if (nullify_tail) {
+ memset(dst + 7, 0, 8);
+ }
+ memcpy(dst, &buf1, 8);
+ memcpy(dst + n - 8, &buf2, 8);
+ } else if (n >= 4) {
+ uint32_t buf1;
+ uint32_t buf2;
+ memcpy(&buf1, src, 4);
+ memcpy(&buf2, src + n - 4, 4);
+ if (nullify_tail) {
+ memset(dst + 4, 0, 4);
+ memset(dst + 7, 0, 8);
+ }
+ memcpy(dst, &buf1, 4);
+ memcpy(dst + n - 4, &buf2, 4);
+ } else {
+ if (n != 0) {
+ dst[0] = src[0];
+ dst[n / 2] = src[n / 2];
+ dst[n - 1] = src[n - 1];
+ }
+ if (nullify_tail) {
+ memset(dst + 7, 0, 8);
+ memset(dst + n, 0, 8);
+ }
+ }
+}
+
+// Compact class for tracking the reference count and state flags for CordRep
+// instances. Data is stored in an atomic int32_t for compactness and speed.
+class RefcountAndFlags {
public:
- Refcount() : count_{1} {}
- ~Refcount() {}
+ constexpr RefcountAndFlags() : count_{kRefIncrement} {}
+ struct Immortal {};
+ explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
- // Increments the reference count by 1. Imposes no memory ordering.
- inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); }
+ // Increments the reference count. Imposes no memory ordering.
+ inline void Increment() {
+ count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
+ }
// Asserts that the current refcount is greater than 0. If the refcount is
- // greater than 1, decrements the reference count by 1.
+ // greater than 1, decrements the reference count.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
- // false.
+ // false. Always returns false when the immortal bit is set.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire);
- assert(refcount > 0);
- return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1;
+ assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
+ return refcount != kRefIncrement &&
+ (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
+ kHighRefcountMask) != 0;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
- int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel);
- assert(refcount > 0);
- return refcount != 1;
+ int32_t refcount =
+ count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
+ assert((refcount & kRefcountMask) > 0 || refcount & kImmortalFlag);
+ return (refcount & kHighRefcountMask) != 0;
}
// Returns the current reference count using acquire semantics.
- inline int32_t Get() const { return count_.load(std::memory_order_acquire); }
+ inline size_t Get() const {
+ return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
+ kNumFlags);
+ }
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
@@ -69,51 +185,174 @@ class Refcount {
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
- // object.
- inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; }
+ // object. Always returns false when the immortal bit is set.
+ inline bool IsOne() {
+ return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
+ kRefIncrement;
+ }
+
+ bool IsImmortal() const {
+ return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
+ }
private:
+ // We reserve the bottom bits for flags.
+ // kImmortalBit indicates that this entity should never be collected; it is
+ // used for the StringConstant constructor to avoid collecting immutable
+ // constant cords.
+ // kReservedFlag is reserved for future use.
+ enum Flags {
+ kNumFlags = 2,
+
+ kImmortalFlag = 0x1,
+ kReservedFlag = 0x2,
+ kRefIncrement = (1 << kNumFlags),
+
+ // Bitmask to use when checking refcount by equality. This masks out
+ // all flags except kImmortalFlag, which is part of the refcount for
+ // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
+ // if the immortal bit is set.)
+ kRefcountMask = ~kReservedFlag,
+
+ // Bitmask to use when checking if refcount is equal to 1 and not
+ // immortal when decrementing the refcount. This masks out kRefIncrement and
+ // all flags except kImmortalFlag. If the masked RefcountAndFlags is 0, we
+ // assume the refcount is equal to 1, since we know it's not immortal and
+ // not greater than 1. If the masked RefcountAndFlags is not 0, we can
+ // assume the refcount is not equal to 1 since either a higher bit in the
+ // refcount is set, or kImmortal is set.
+ kHighRefcountMask = kRefcountMask & ~kRefIncrement,
+ };
+
std::atomic<int32_t> count_;
};
-// The overhead of a vtable is too much for Cord, so we roll our own subclasses
-// using only a single byte to differentiate classes from each other - the "tag"
-// byte. Define the subclasses first so we can provide downcasting helper
-// functions in the base class.
+// Various representations that we allow
+enum CordRepKind {
+ UNUSED_0 = 0,
+ SUBSTRING = 1,
+ CRC = 2,
+ BTREE = 3,
+ RING = 4,
+ EXTERNAL = 5,
-struct CordRepConcat;
-struct CordRepSubstring;
-struct CordRepExternal;
+ // We have different tags for different sized flat arrays,
+ // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
+ // allocated range of 32 bytes to 256 KB. The current granularity is:
+ // - 8 byte granularity for flat sizes in [32 - 512]
+ // - 64 byte granularity for flat sizes in (512 - 8KiB]
+ // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
+ // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
+ // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
+ // represents the minimum flat allocation size. (32 bytes as of now).
+ FLAT = 6,
+ MAX_FLAT_TAG = 248
+};
+
+// There are various locations where we want to check if some rep is a 'plain'
+// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
+// can perform this check in a single branch as 'tag >= EXTERNAL'
+// Likewise, we have some locations where we check for 'ring or external/flat',
+// so likewise align RING to EXTERNAL.
+// Note that we can leave this optimization to the compiler. The compiler will
+// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
+static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
+static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
+static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
struct CordRep {
+ // Result from an `extract edge` operation. Contains the (possibly changed)
+ // tree node as well as the extracted edge, or {tree, nullptr} if no edge
+ // could be extracted.
+ // On success, the returned `tree` value is null if `extracted` was the only
+ // data edge inside the tree, a data edge if there were only two data edges in
+ // the tree, or the (possibly new / smaller) remaining tree with the extracted
+ // data edge removed.
+ struct ExtractResult {
+ CordRep* tree;
+ CordRep* extracted;
+ };
+
+ CordRep() = default;
+ constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
+ : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
+
// The following three fields have to be less than 32 bytes since
- // that is the smallest supported flat node size.
+ // that is the smallest supported flat node size. Some code optimizations rely
+ // on the specific layout of these fields. Notably: the non-trivial field
+ // `refcount` being preceded by `length`, and being tailed by POD data
+ // members only.
+ // # LINT.IfChange
size_t length;
- Refcount refcount;
+ RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
- char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep
- inline CordRepConcat* concat();
- inline const CordRepConcat* concat() const;
+ // `storage` provides two main purposes:
+ // - the starting point for FlatCordRep.Data() [flexible-array-member]
+ // - 3 bytes of additional storage for use by derived classes.
+ // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
+ // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
+ // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
+ // allocate room for these in the derived class, as not all compilers reuse
+ // padding space from the base class (clang and gcc do, MSVC does not, etc)
+ uint8_t storage[3];
+ // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
+
+ // Returns true if this instance's tag matches the requested type.
+ constexpr bool IsRing() const { return tag == RING; }
+ constexpr bool IsSubstring() const { return tag == SUBSTRING; }
+ constexpr bool IsCrc() const { return tag == CRC; }
+ constexpr bool IsExternal() const { return tag == EXTERNAL; }
+ constexpr bool IsFlat() const { return tag >= FLAT; }
+ constexpr bool IsBtree() const { return tag == BTREE; }
+
+ inline CordRepRing* ring();
+ inline const CordRepRing* ring() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
+ inline CordRepCrc* crc();
+ inline const CordRepCrc* crc() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
-};
+ inline CordRepFlat* flat();
+ inline const CordRepFlat* flat() const;
+ inline CordRepBtree* btree();
+ inline const CordRepBtree* btree() const;
+
+ // --------------------------------------------------------------------
+ // Memory management
+
+ // Destroys the provided `rep`.
+ static void Destroy(CordRep* rep);
-struct CordRepConcat : public CordRep {
- CordRep* left;
- CordRep* right;
+ // Increments the reference count of `rep`.
+ // Requires `rep` to be a non-null pointer value.
+ static inline CordRep* Ref(CordRep* rep);
- uint8_t depth() const { return static_cast<uint8_t>(data[0]); }
- void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); }
+ // Decrements the reference count of `rep`. Destroys rep if count reaches
+ // zero. Requires `rep` to be a non-null pointer value.
+ static inline void Unref(CordRep* rep);
};
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
+
+ // Creates a substring on `child`, adopting a reference on `child`.
+ // Requires `child` to be either a flat or external node, and `pos` and `n` to
+ // form a non-empty partial sub range of `'child`, i.e.:
+ // `n > 0 && n < length && n + pos <= length`
+ static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
+
+ // Creates a substring of `rep`. Does not adopt a reference on `rep`.
+ // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
+ // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
+ // If `rep` is a substring of a flat or external node, then this method will
+ // return a new substring of that flat or external node with `pos` adjusted
+ // with the original `start` position.
+ static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
};
// Type for function pointer that will invoke the releaser function and also
@@ -124,9 +363,19 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct CordRepExternal : public CordRep {
+ CordRepExternal() = default;
+ explicit constexpr CordRepExternal(absl::string_view str)
+ : CordRep(RefcountAndFlags::Immortal{}, str.size()),
+ base(str.data()),
+ releaser_invoker(nullptr) {}
+
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
+
+ // Deletes (releases) the external rep.
+ // Requires rep != nullptr and rep->IsExternal()
+ static void Delete(CordRep* rep);
};
struct Rank1 {};
@@ -167,7 +416,502 @@ struct CordRepExternalImpl
}
};
+inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
+ size_t n) {
+ assert(child != nullptr);
+ assert(n > 0);
+ assert(n < child->length);
+ assert(pos < child->length);
+ assert(n <= child->length - pos);
+
+ // TODO(b/217376272): Harden internal logic.
+ // Move to strategical places inside the Cord logic and make this an assert.
+ if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
+ LogFatalNodeType(child);
+ }
+
+ CordRepSubstring* rep = new CordRepSubstring();
+ rep->length = n;
+ rep->tag = SUBSTRING;
+ rep->start = pos;
+ rep->child = child;
+ return rep;
+}
+
+inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
+ size_t n) {
+ assert(rep != nullptr);
+ assert(n != 0);
+ assert(pos < rep->length);
+ assert(n <= rep->length - pos);
+ if (n == rep->length) return CordRep::Ref(rep);
+ if (rep->IsSubstring()) {
+ pos += rep->substring()->start;
+ rep = rep->substring()->child;
+ }
+ CordRepSubstring* substr = new CordRepSubstring();
+ substr->length = n;
+ substr->tag = SUBSTRING;
+ substr->start = pos;
+ substr->child = CordRep::Ref(rep);
+ return substr;
+}
+
+inline void CordRepExternal::Delete(CordRep* rep) {
+ assert(rep != nullptr && rep->IsExternal());
+ auto* rep_external = static_cast<CordRepExternal*>(rep);
+ assert(rep_external->releaser_invoker != nullptr);
+ rep_external->releaser_invoker(rep_external);
+}
+
+template <typename Str>
+struct ConstInitExternalStorage {
+ ABSL_CONST_INIT static CordRepExternal value;
+};
+
+template <typename Str>
+ABSL_CONST_INIT CordRepExternal
+ ConstInitExternalStorage<Str>::value(Str::value);
+
+enum {
+ kMaxInline = 15,
+};
+
+constexpr char GetOrNull(absl::string_view data, size_t pos) {
+ return pos < data.size() ? data[pos] : '\0';
+}
+
+// We store cordz_info as 64 bit pointer value in little endian format. This
+// guarantees that the least significant byte of cordz_info matches the first
+// byte of the inline data representation in `data`, which holds the inlined
+// size or the 'is_tree' bit.
+using cordz_info_t = int64_t;
+
+// Assert that the `cordz_info` pointer value perfectly overlaps the last half
+// of `data` and can hold a pointer value.
+static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
+static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
+
+// LittleEndianByte() creates a little endian representation of 'value', i.e.:
+// a little endian value where the first byte in the host's representation
+// holds 'value`, with all other bytes being 0.
+static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
+#if defined(ABSL_IS_BIG_ENDIAN)
+ return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
+#else
+ return value;
+#endif
+}
+
+class InlineData {
+ public:
+ // DefaultInitType forces the use of the default initialization constructor.
+ enum DefaultInitType { kDefaultInit };
+
+ // kNullCordzInfo holds the little endian representation of intptr_t(1)
+ // This is the 'null' / initial value of 'cordz_info'. The null value
+ // is specifically big endian 1 as with 64-bit pointers, the last
+ // byte of cordz_info overlaps with the last byte holding the tag.
+ static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
+
+ // kTagOffset contains the offset of the control byte / tag. This constant is
+ // intended mostly for debugging purposes: do not remove this constant as it
+ // is actively inspected and used by gdb pretty printing code.
+ static constexpr size_t kTagOffset = 0;
+
+ // Implement `~InlineData()` conditionally: we only need this destructor to
+ // unpoison poisoned instances under *SAN, and it will only compile correctly
+ // if the current compiler supports `absl::is_constant_evaluated()`.
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ ~InlineData() noexcept { unpoison(); }
+#endif
+
+ constexpr InlineData() noexcept { poison_this(); }
+
+ explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
+ poison_this();
+ }
+
+ explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
+ ABSL_ASSERT(rep != nullptr);
+ }
+
+ // Explicit constexpr constructor to create a constexpr InlineData
+ // value. Creates an inlined SSO value if `rep` is null, otherwise
+ // creates a tree instance value.
+ constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
+ : rep_(rep ? Rep(rep) : Rep(sv)) {
+ poison();
+ }
+
+ constexpr InlineData(const InlineData& rhs) noexcept;
+ InlineData& operator=(const InlineData& rhs) noexcept;
+
+ friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ const Rep l = lhs.rep_.SanitizerSafeCopy();
+ const Rep r = rhs.rep_.SanitizerSafeCopy();
+ return memcmp(&l, &r, sizeof(l)) == 0;
+#else
+ return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
+#endif
+ }
+ friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
+ return !operator==(lhs, rhs);
+ }
+
+ // Poisons the unused inlined SSO data if the current instance
+ // is inlined, else un-poisons the entire instance.
+ constexpr void poison();
+
+ // Un-poisons this instance.
+ constexpr void unpoison();
+
+ // Poisons the current instance. This is used on default initialization.
+ constexpr void poison_this();
+
+ // Returns true if the current instance is empty.
+ // The 'empty value' is an inlined data value of zero length.
+ bool is_empty() const { return rep_.tag() == 0; }
+
+ // Returns true if the current instance holds a tree value.
+ bool is_tree() const { return (rep_.tag() & 1) != 0; }
+
+ // Returns true if the current instance holds a cordz_info value.
+ // Requires the current instance to hold a tree value.
+ bool is_profiled() const {
+ assert(is_tree());
+ return rep_.cordz_info() != kNullCordzInfo;
+ }
+
+ // Returns true if either of the provided instances hold a cordz_info value.
+ // This method is more efficient than the equivalent `data1.is_profiled() ||
+ // data2.is_profiled()`. Requires both arguments to hold a tree.
+ static bool is_either_profiled(const InlineData& data1,
+ const InlineData& data2) {
+ assert(data1.is_tree() && data2.is_tree());
+ return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
+ kNullCordzInfo;
+ }
+
+ // Returns the cordz_info sampling instance for this instance, or nullptr
+ // if the current instance is not sampled and does not have CordzInfo data.
+ // Requires the current instance to hold a tree value.
+ CordzInfo* cordz_info() const {
+ assert(is_tree());
+ intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
+ static_cast<uint64_t>(rep_.cordz_info())));
+ assert(info & 1);
+ return reinterpret_cast<CordzInfo*>(info - 1);
+ }
+
+ // Sets the current cordz_info sampling instance for this instance, or nullptr
+ // if the current instance is not sampled and does not have CordzInfo data.
+ // Requires the current instance to hold a tree value.
+ void set_cordz_info(CordzInfo* cordz_info) {
+ assert(is_tree());
+ uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
+ rep_.set_cordz_info(
+ static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
+ }
+
+ // Resets the current cordz_info to null / empty.
+ void clear_cordz_info() {
+ assert(is_tree());
+ rep_.set_cordz_info(kNullCordzInfo);
+ }
+
+ // Returns a read only pointer to the character data inside this instance.
+ // Requires the current instance to hold inline data.
+ const char* as_chars() const {
+ assert(!is_tree());
+ return rep_.as_chars();
+ }
+
+ // Returns a mutable pointer to the character data inside this instance.
+ // Should be used for 'write only' operations setting an inlined value.
+ // Applications can set the value of inlined data either before or after
+ // setting the inlined size, i.e., both of the below are valid:
+ //
+ // // Set inlined data and inline size
+ // memcpy(data_.as_chars(), data, size);
+ // data_.set_inline_size(size);
+ //
+ // // Set inlined size and inline data
+ // data_.set_inline_size(size);
+ // memcpy(data_.as_chars(), data, size);
+ //
+ // It's an error to read from the returned pointer without a preceding write
+ // if the current instance does not hold inline data, i.e.: is_tree() == true.
+ char* as_chars() { return rep_.as_chars(); }
+
+ // Returns the tree value of this value.
+ // Requires the current instance to hold a tree value.
+ CordRep* as_tree() const {
+ assert(is_tree());
+ return rep_.tree();
+ }
+
+ void set_inline_data(const char* data, size_t n) {
+ ABSL_ASSERT(n <= kMaxInline);
+ unpoison();
+ rep_.set_tag(static_cast<int8_t>(n << 1));
+ SmallMemmove<true>(rep_.as_chars(), data, n);
+ poison();
+ }
+
+ void copy_max_inline_to(char* dst) const {
+ assert(!is_tree());
+ memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
+ }
+
+ // Initialize this instance to holding the tree value `rep`,
+ // initializing the cordz_info to null, i.e.: 'not profiled'.
+ void make_tree(CordRep* rep) {
+ unpoison();
+ rep_.make_tree(rep);
+ }
+
+ // Set the tree value of this instance to 'rep`.
+ // Requires the current instance to already hold a tree value.
+ // Does not affect the value of cordz_info.
+ void set_tree(CordRep* rep) {
+ assert(is_tree());
+ rep_.set_tree(rep);
+ }
+
+ // Returns the size of the inlined character data inside this instance.
+ // Requires the current instance to hold inline data.
+ size_t inline_size() const { return rep_.inline_size(); }
+
+ // Sets the size of the inlined character data inside this instance.
+ // Requires `size` to be <= kMaxInline.
+ // See the documentation on 'as_chars()' for more information and examples.
+ void set_inline_size(size_t size) {
+ unpoison();
+ rep_.set_inline_size(size);
+ poison();
+ }
+
+ // Compares 'this' inlined data with rhs. The comparison is a straightforward
+ // lexicographic comparison. `Compare()` returns values as follows:
+ //
+ // -1 'this' InlineData instance is smaller
+ // 0 the InlineData instances are equal
+ // 1 'this' InlineData instance larger
+ int Compare(const InlineData& rhs) const {
+ return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
+ }
+
+ private:
+ struct Rep {
+ // See cordz_info_t for forced alignment and size of `cordz_info` details.
+ struct AsTree {
+ explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
+ : rep(tree) {}
+ cordz_info_t cordz_info = kNullCordzInfo;
+ absl::cord_internal::CordRep* rep;
+ };
+
+ explicit Rep(DefaultInitType) {}
+ constexpr Rep() : data{0} {}
+ constexpr Rep(const Rep&) = default;
+ constexpr Rep& operator=(const Rep&) = default;
+
+ explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
+
+ explicit constexpr Rep(absl::string_view chars)
+ : data{static_cast<char>((chars.size() << 1)),
+ GetOrNull(chars, 0),
+ GetOrNull(chars, 1),
+ GetOrNull(chars, 2),
+ GetOrNull(chars, 3),
+ GetOrNull(chars, 4),
+ GetOrNull(chars, 5),
+ GetOrNull(chars, 6),
+ GetOrNull(chars, 7),
+ GetOrNull(chars, 8),
+ GetOrNull(chars, 9),
+ GetOrNull(chars, 10),
+ GetOrNull(chars, 11),
+ GetOrNull(chars, 12),
+ GetOrNull(chars, 13),
+ GetOrNull(chars, 14)} {}
+
+ // Disable sanitizer as we must always be able to read `tag`.
+ ABSL_CORD_INTERNAL_NO_SANITIZE
+ int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
+ void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
+
+ char* as_chars() { return data + 1; }
+ const char* as_chars() const { return data + 1; }
+
+ bool is_tree() const { return (tag() & 1) != 0; }
+
+ size_t inline_size() const {
+ ABSL_ASSERT(!is_tree());
+ return static_cast<size_t>(tag()) >> 1;
+ }
+
+ void set_inline_size(size_t size) {
+ ABSL_ASSERT(size <= kMaxInline);
+ set_tag(static_cast<int8_t>(size << 1));
+ }
+
+ CordRep* tree() const { return as_tree.rep; }
+ void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
+
+ cordz_info_t cordz_info() const { return as_tree.cordz_info; }
+ void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
+
+ void make_tree(CordRep* tree) {
+ as_tree.rep = tree;
+ as_tree.cordz_info = kNullCordzInfo;
+ }
+
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+ constexpr Rep SanitizerSafeCopy() const {
+ if (!absl::is_constant_evaluated()) {
+ Rep res;
+ if (is_tree()) {
+ res = *this;
+ } else {
+ res.set_tag(tag());
+ memcpy(res.as_chars(), as_chars(), inline_size());
+ }
+ return res;
+ } else {
+ return *this;
+ }
+ }
+#else
+ constexpr const Rep& SanitizerSafeCopy() const { return *this; }
+#endif
+
+ // If the data has length <= kMaxInline, we store it in `data`, and
+ // store the size in the first char of `data` shifted left + 1.
+ // Else we store it in a tree and store a pointer to that tree in
+ // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
+ union {
+ char data[kMaxInline + 1];
+ AsTree as_tree;
+ };
+ };
+
+ // Private implementation of `Compare()`
+ static inline int Compare(const Rep& lhs, const Rep& rhs) {
+ uint64_t x, y;
+ memcpy(&x, lhs.as_chars(), sizeof(x));
+ memcpy(&y, rhs.as_chars(), sizeof(y));
+ if (x == y) {
+ memcpy(&x, lhs.as_chars() + 7, sizeof(x));
+ memcpy(&y, rhs.as_chars() + 7, sizeof(y));
+ if (x == y) {
+ if (lhs.inline_size() == rhs.inline_size()) return 0;
+ return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
+ }
+ }
+ x = absl::big_endian::FromHost64(x);
+ y = absl::big_endian::FromHost64(y);
+ return x < y ? -1 : 1;
+ }
+
+ Rep rep_;
+};
+
+static_assert(sizeof(InlineData) == kMaxInline + 1, "");
+
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
+constexpr InlineData::InlineData(const InlineData& rhs) noexcept
+ : rep_(rhs.rep_.SanitizerSafeCopy()) {
+ poison();
+}
+
+inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
+ unpoison();
+ rep_ = rhs.rep_.SanitizerSafeCopy();
+ poison();
+ return *this;
+}
+
+constexpr void InlineData::poison_this() {
+ if (!absl::is_constant_evaluated()) {
+ container_internal::SanitizerPoisonObject(this);
+ }
+}
+
+constexpr void InlineData::unpoison() {
+ if (!absl::is_constant_evaluated()) {
+ container_internal::SanitizerUnpoisonObject(this);
+ }
+}
+
+constexpr void InlineData::poison() {
+ if (!absl::is_constant_evaluated()) {
+ if (is_tree()) {
+ container_internal::SanitizerUnpoisonObject(this);
+ } else if (const size_t size = inline_size()) {
+ if (size < kMaxInline) {
+ const char* end = rep_.as_chars() + size;
+ container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
+ }
+ } else {
+ container_internal::SanitizerPoisonObject(this);
+ }
+ }
+}
+
+#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
+constexpr InlineData::InlineData(const InlineData&) noexcept = default;
+inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
+
+constexpr void InlineData::poison_this() {}
+constexpr void InlineData::unpoison() {}
+constexpr void InlineData::poison() {}
+
+#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
+
+inline CordRepSubstring* CordRep::substring() {
+ assert(IsSubstring());
+ return static_cast<CordRepSubstring*>(this);
+}
+
+inline const CordRepSubstring* CordRep::substring() const {
+ assert(IsSubstring());
+ return static_cast<const CordRepSubstring*>(this);
+}
+
+inline CordRepExternal* CordRep::external() {
+ assert(IsExternal());
+ return static_cast<CordRepExternal*>(this);
+}
+
+inline const CordRepExternal* CordRep::external() const {
+ assert(IsExternal());
+ return static_cast<const CordRepExternal*>(this);
+}
+
+inline CordRep* CordRep::Ref(CordRep* rep) {
+ // ABSL_ASSUME is a workaround for
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
+ ABSL_ASSUME(rep != nullptr);
+ rep->refcount.Increment();
+ return rep;
+}
+
+inline void CordRep::Unref(CordRep* rep) {
+ assert(rep != nullptr);
+ // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
+ // typically outweigh the cost of an extra branch checking for ref == 1.
+ if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
+ Destroy(rep);
+ }
+}
+
} // namespace cord_internal
+
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_