1 files changed, 247 insertions, 112 deletions
diff --git a/pw_varint/public/pw_varint/varint.h b/pw_varint/public/pw_varint/varint.h
index d53705be0..9eac76749 100644
--- a/pw_varint/public/pw_varint/varint.h
+++ b/pw_varint/public/pw_varint/varint.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The Pigweed Authors
+// Copyright 2023 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
@@ -13,6 +13,25 @@
 // the License.
 #pragma once
 
+/// @file pw_varint/varint.h
+///
+/// The `pw_varint` module provides functions for encoding and decoding variable
+/// length integers or varints. For smaller values, varints require less memory
+/// than a fixed-size encoding. For example, a 32-bit (4-byte) integer requires
+/// 1–5 bytes when varint-encoded.
+///
+/// `pw_varint` supports custom variable-length encodings with different
+/// terminator bit values and positions (@cpp_enum{pw::varint::Format}).
+/// The basic encoding for unsigned integers is Little Endian Base 128 (LEB128).
+/// ZigZag encoding is also supported, which maps negative integers to positive
+/// integers to improve encoding density for LEB128.
+///
+/// <a
+/// href=https://developers.google.com/protocol-buffers/docs/encoding#varints>
+/// Protocol Buffers</a> and @rstref{HDLC <module-pw_hdlc>} use variable-length
+/// integer encodings for integers.
+
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -22,8 +41,112 @@
 extern "C" {
 #endif
 
-// Expose a subset of the varint API for use in C code.
+/// Maximum size of an LEB128-encoded `uint32_t`.
+#define PW_VARINT_MAX_INT32_SIZE_BYTES 5
+
+/// Maximum size of an LEB128-encoded `uint64_t`.
+#define PW_VARINT_MAX_INT64_SIZE_BYTES 10
+
+/// Encodes a 32-bit integer as LEB128.
+///
+/// @returns the number of bytes written
+size_t pw_varint_Encode32(uint32_t integer,
+                          void* output,
+                          size_t output_size_bytes);
+
+/// Encodes a 64-bit integer as LEB128.
+///
+/// @returns the number of bytes written
+size_t pw_varint_Encode64(uint64_t integer,
+                          void* output,
+                          size_t output_size_bytes);
+
+/// Zig-zag encodes an `int32_t`, returning it as a `uint32_t`.
+static inline uint32_t pw_varint_ZigZagEncode32(int32_t n) {
+  return (uint32_t)((uint32_t)n << 1) ^ (uint32_t)(n >> (sizeof(n) * 8 - 1));
+}
+
+/// Zig-zag encodes an `int64_t`, returning it as a `uint64_t`.
+static inline uint64_t pw_varint_ZigZagEncode64(int64_t n) {
+  return (uint64_t)((uint64_t)n << 1) ^ (uint64_t)(n >> (sizeof(n) * 8 - 1));
+}
+
+/// Extracts and encodes 7 bits from the integer. Sets the top bit to indicate
+/// more data is coming, which must be cleared if this was the last byte.
+static inline uint8_t pw_varint_EncodeOneByte32(uint32_t* integer) {
+  const uint8_t bits = (uint8_t)((*integer & 0x7Fu) | 0x80u);
+  *integer >>= 7;
+  return bits;
+}
+
+/// @copydoc pw_varint_EncodeOneByte32
+static inline uint8_t pw_varint_EncodeOneByte64(uint64_t* integer) {
+  const uint8_t bits = (uint8_t)((*integer & 0x7Fu) | 0x80u);
+  *integer >>= 7;
+  return bits;
+}
+
+/// Zig-zag decodes a `uint64_t`, returning it as an `int64_t`.
+static inline int32_t pw_varint_ZigZagDecode32(uint32_t n)
+    PW_NO_SANITIZE("unsigned-integer-overflow") {
+  return (int32_t)((n >> 1) ^ (~(n & 1) + 1));
+}
 
+/// Zig-zag decodes a `uint64_t`, returning it as an `int64_t`.
+static inline int64_t pw_varint_ZigZagDecode64(uint64_t n)
+    PW_NO_SANITIZE("unsigned-integer-overflow") {
+  return (int64_t)((n >> 1) ^ (~(n & 1) + 1));
+}
+
+/// Decodes an LEB128-encoded integer to a `uint32_t`.
+/// @returns the number of bytes read; 0 if decoding failed
+size_t pw_varint_Decode32(const void* input,
+                          size_t input_size_bytes,
+                          uint32_t* output);
+
+/// Decodes an LEB128-encoded integer to a `uint64_t`.
+/// @returns the number of bytes read; 0 if decoding failed
+size_t pw_varint_Decode64(const void* input,
+                          size_t input_size_bytes,
+                          uint64_t* output);
+
+/// Decodes one byte of an LEB128-encoded integer to a `uint32_t`.
+/// @returns true if there is more data to decode (top bit is set).
+static inline bool pw_varint_DecodeOneByte32(uint8_t byte,
+                                             size_t count,
+                                             uint32_t* value) {
+  *value |= (uint32_t)(byte & 0x7fu) << (count * 7);
+  return (byte & 0x80u) != 0u;
+}
+
+/// Decodes one byte of an LEB128-encoded integer to a `uint64_t`.
+/// @returns true if there is more data to decode (top bit is set).
+static inline bool pw_varint_DecodeOneByte64(uint8_t byte,
+                                             size_t count,
+                                             uint64_t* value) {
+  *value |= (uint64_t)(byte & 0x7fu) << (count * 7);
+  return (byte & 0x80u) != 0u;
+}
+
+/// Macro that returns the encoded size of up to a 64-bit integer. This is
+/// inefficient, but is a constant expression if the input is a constant. Use
+/// `pw_varint_EncodedSizeBytes` for runtime encoded size calculation.
+#define PW_VARINT_ENCODED_SIZE_BYTES(value)        \
+  ((unsigned long long)value < (1u << 7)      ? 1u \
+   : (unsigned long long)value < (1u << 14)   ? 2u \
+   : (unsigned long long)value < (1u << 21)   ? 3u \
+   : (unsigned long long)value < (1u << 28)   ? 4u \
+   : (unsigned long long)value < (1llu << 35) ? 5u \
+   : (unsigned long long)value < (1llu << 42) ? 6u \
+   : (unsigned long long)value < (1llu << 49) ? 7u \
+   : (unsigned long long)value < (1llu << 56) ? 8u \
+   : (unsigned long long)value < (1llu << 63) ? 9u \
+                                              : 10u)
+
+/// Returns the size of a `uint64_t` when encoded as a varint (LEB128).
+size_t pw_varint_EncodedSizeBytes(uint64_t integer);
+
+/// Describes a custom varint format.
 typedef enum {
   PW_VARINT_ZERO_TERMINATED_LEAST_SIGNIFICANT = 0,
   PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT = 1,
@@ -31,41 +154,18 @@ typedef enum {
   PW_VARINT_ONE_TERMINATED_MOST_SIGNIFICANT = 3,
 } pw_varint_Format;
 
+/// Encodes a `uint64_t` using a custom varint format.
 size_t pw_varint_EncodeCustom(uint64_t integer,
                               void* output,
                               size_t output_size,
                               pw_varint_Format format);
+
+/// Decodes a `uint64_t` using a custom varint format.
 size_t pw_varint_DecodeCustom(const void* input,
                               size_t input_size,
                               uint64_t* output,
                               pw_varint_Format format);
 
-static inline size_t pw_varint_Encode(uint64_t integer,
-                                      void* output,
-                                      size_t output_size) {
-  return pw_varint_EncodeCustom(
-      integer, output, output_size, PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT);
-}
-
-size_t pw_varint_ZigZagEncode(int64_t integer,
-                              void* output,
-                              size_t output_size);
-
-static inline size_t pw_varint_Decode(const void* input,
-                                      size_t input_size,
-                                      uint64_t* output) {
-  return pw_varint_DecodeCustom(
-      input, input_size, output, PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT);
-}
-
-size_t pw_varint_ZigZagDecode(const void* input,
-                              size_t input_size,
-                              int64_t* output);
-
-// Returns the size of an when encoded as a varint.
-size_t pw_varint_EncodedSize(uint64_t integer);
-size_t pw_varint_ZigZagEncodedSize(int64_t integer);
-
 #ifdef __cplusplus
 
 }  // extern "C"
@@ -73,36 +173,46 @@ size_t pw_varint_ZigZagEncodedSize(int64_t integer);
 #include <limits>
 #include <type_traits>
 
+#include "lib/stdcompat/bit.h"
 #include "pw_polyfill/language_feature_macros.h"
 #include "pw_span/span.h"
 
 namespace pw {
 namespace varint {
 
-// The maximum number of bytes occupied by an encoded varint.
-PW_INLINE_VARIABLE constexpr size_t kMaxVarint32SizeBytes = 5;
-PW_INLINE_VARIABLE constexpr size_t kMaxVarint64SizeBytes = 10;
+/// Maximum size of a varint (LEB128) encoded `uint32_t`.
+PW_INLINE_VARIABLE constexpr size_t kMaxVarint32SizeBytes =
+    PW_VARINT_MAX_INT32_SIZE_BYTES;
 
-// ZigZag encodes a signed integer. This maps small negative numbers to small,
-// unsigned positive numbers, which improves their density for LEB128 encoding.
-//
-// ZigZag encoding works by moving the sign bit from the most-significant bit to
-// the least-significant bit. For the signed k-bit integer n, the formula is
-//
-//   (n << 1) ^ (n >> (k - 1))
-//
-// See the following for a description of ZigZag encoding:
-//   https://developers.google.com/protocol-buffers/docs/encoding#types
+/// Maximum size of a varint (LEB128) encoded `uint64_t`.
+PW_INLINE_VARIABLE constexpr size_t kMaxVarint64SizeBytes =
+    PW_VARINT_MAX_INT64_SIZE_BYTES;
+
+/// ZigZag encodes a signed integer. This maps small negative numbers to small,
+/// unsigned positive numbers, which improves their density for LEB128 encoding.
+///
+/// ZigZag encoding works by moving the sign bit from the most-significant bit
+/// to the least-significant bit. For the signed `k`-bit integer `n`, the
+/// formula is:
+///
+/// @code{.cpp}
+///   (n << 1) ^ (n >> (k - 1))
+/// @endcode
+///
+/// See the following for a description of ZigZag encoding:
+///   https://developers.google.com/protocol-buffers/docs/encoding#types
 template <typename T>
 constexpr std::make_unsigned_t<T> ZigZagEncode(T n) {
   static_assert(std::is_signed<T>(), "Zig-zag encoding is for signed integers");
   using U = std::make_unsigned_t<T>;
-  return (static_cast<U>(n) << 1) ^ static_cast<U>(n >> (sizeof(T) * 8 - 1));
+  return static_cast<U>(static_cast<U>(n) << 1) ^
+         static_cast<U>(n >> (sizeof(T) * 8 - 1));
 }
 
-// ZigZag decodes a signed integer.
-// The calculation is done modulo std::numeric_limits<T>::max()+1, so the
-// unsigned integer overflows are intentional.
+/// ZigZag decodes a signed integer.
+///
+/// The calculation is done modulo `std::numeric_limits<T>::max()+1`, so the
+/// unsigned integer overflows are intentional.
 template <typename T>
 constexpr std::make_signed_t<T> ZigZagDecode(T n)
     PW_NO_SANITIZE("unsigned-integer-overflow") {
@@ -111,59 +221,90 @@ constexpr std::make_signed_t<T> ZigZagDecode(T n)
   return static_cast<std::make_signed_t<T>>((n >> 1) ^ (~(n & 1) + 1));
 }
 
-// Encodes a uint64_t with Little-Endian Base 128 (LEB128) encoding.
+/// @brief Computes the size of an integer when encoded as a varint.
+///
+/// @param integer The integer whose encoded size is to be computed. `integer`
+/// can be signed or unsigned.
+///
+/// @returns The size of `integer` when encoded as a varint.
+template <typename T,
+          typename = std::enable_if_t<std::is_integral<T>::value ||
+                                      std::is_convertible<T, uint64_t>::value>>
+constexpr size_t EncodedSize(T integer) {
+  if (integer == 0) {
+    return 1;
+  }
+  return static_cast<size_t>(
+      (64 - cpp20::countl_zero(static_cast<uint64_t>(integer)) + 6) / 7);
+}
+
+/// Encodes a `uint64_t` with Little-Endian Base 128 (LEB128) encoding.
+/// @returns the number of bytes written; 0 if the buffer is too small
 inline size_t EncodeLittleEndianBase128(uint64_t integer,
                                         const span<std::byte>& output) {
-  return pw_varint_Encode(integer, output.data(), output.size());
+  return pw_varint_Encode64(integer, output.data(), output.size());
 }
 
-// Encodes the provided integer using a variable-length encoding and returns the
-// number of bytes written.
-//
-// The encoding is the same as used in protocol buffers. Signed integers are
-// ZigZag encoded to remove leading 1s from small negative numbers, then the
-// resulting number is encoded as Little Endian Base 128 (LEB128). Unsigned
-// integers are encoded directly as LEB128.
-//
-// Returns the number of bytes written or 0 if the result didn't fit in the
-// encoding buffer.
+/// Encodes the provided integer using a variable-length encoding and returns
+/// the number of bytes written.
+///
+/// The encoding is the same as used in protocol buffers. Signed integers are
+/// ZigZag encoded to remove leading 1s from small negative numbers, then the
+/// resulting number is encoded as Little Endian Base 128 (LEB128). Unsigned
+/// integers are encoded directly as LEB128.
+///
+/// Returns the number of bytes written or 0 if the result didn't fit in the
+/// encoding buffer.
 template <typename T>
 size_t Encode(T integer, const span<std::byte>& output) {
   if (std::is_signed<T>()) {
-    return pw_varint_ZigZagEncode(integer, output.data(), output.size());
+    using Signed =
+        std::conditional_t<std::is_signed<T>::value, T, std::make_signed_t<T>>;
+    return EncodeLittleEndianBase128(ZigZagEncode(static_cast<Signed>(integer)),
+                                     output);
   } else {
-    return pw_varint_Encode(integer, output.data(), output.size());
+    using Unsigned = std::
+        conditional_t<std::is_signed<T>::value, std::make_unsigned_t<T>, T>;
+    return EncodeLittleEndianBase128(static_cast<Unsigned>(integer), output);
   }
 }
 
-// Decodes a varint-encoded value. If reading into a signed integer, the value
-// is ZigZag decoded.
-//
-// Returns the number of bytes read from the input if successful. Returns zero
-// if the result does not fit in a int64_t / uint64_t or if the input is
-// exhausted before the number terminates. Reads a maximum of 10 bytes.
-//
-// The following example decodes multiple varints from a buffer:
-//
-//   while (!data.empty()) {
-//     int64_t value;
-//     size_t bytes = Decode(data, &value);
-//
-//     if (bytes == 0u) {
-//       return Status::DataLoss();
-//     }
-//     results.push_back(value);
-//     data = data.subspan(bytes)
-//   }
-//
-inline size_t Decode(const span<const std::byte>& input, int64_t* value) {
-  return pw_varint_ZigZagDecode(input.data(), input.size(), value);
+/// Decodes a varint-encoded value. If reading into a signed integer, the value
+/// is ZigZag decoded.
+///
+/// Returns the number of bytes read from the input if successful. Returns zero
+/// if the result does not fit in a `int64_t`/ `uint64_t` or if the input is
+/// exhausted before the number terminates. Reads a maximum of 10 bytes.
+///
+/// The following example decodes multiple varints from a buffer:
+///
+/// @code{.cpp}
+///
+///   while (!data.empty()) {
+///     int64_t value;
+///     size_t bytes = Decode(data, &value);
+///
+///     if (bytes == 0u) {
+///       return Status::DataLoss();
+///     }
+///     results.push_back(value);
+///     data = data.subspan(bytes)
+///   }
+///
+/// @endcode
+inline size_t Decode(const span<const std::byte>& input, int64_t* output) {
+  uint64_t value = 0;
+  size_t bytes_read = pw_varint_Decode64(input.data(), input.size(), &value);
+  *output = pw_varint_ZigZagDecode64(value);
+  return bytes_read;
 }
 
+/// @overload
 inline size_t Decode(const span<const std::byte>& input, uint64_t* value) {
-  return pw_varint_Decode(input.data(), input.size(), value);
+  return pw_varint_Decode64(input.data(), input.size(), value);
 }
 
+/// Describes a custom varint format.
 enum class Format {
   kZeroTerminatedLeastSignificant = PW_VARINT_ZERO_TERMINATED_LEAST_SIGNIFICANT,
   kZeroTerminatedMostSignificant = PW_VARINT_ZERO_TERMINATED_MOST_SIGNIFICANT,
@@ -171,7 +312,7 @@ enum class Format {
   kOneTerminatedMostSignificant = PW_VARINT_ONE_TERMINATED_MOST_SIGNIFICANT,
 };
 
-// Encodes a varint in a custom format.
+/// Encodes a varint in a custom format.
 inline size_t Encode(uint64_t value, span<std::byte> output, Format format) {
   return pw_varint_EncodeCustom(value,
                                 output.data(),
@@ -179,7 +320,7 @@ inline size_t Encode(uint64_t value, span<std::byte> output, Format format) {
                                 static_cast<pw_varint_Format>(format));
 }
 
-// Decodes a varint from a custom format.
+/// Decodes a varint from a custom format.
 inline size_t Decode(span<const std::byte> input,
                      uint64_t* value,
                      Format format) {
@@ -187,34 +328,28 @@ inline size_t Decode(span<const std::byte> input,
       input.data(), input.size(), value, static_cast<pw_varint_Format>(format));
 }
 
-// Returns a size of an integer when encoded as a varint.
-constexpr size_t EncodedSize(uint64_t integer) {
-  return integer == 0 ? 1 : (64 - __builtin_clzll(integer) + 6) / 7;
-}
-
-// Returns a size of an signed integer when ZigZag encoded as a varint.
-constexpr size_t ZigZagEncodedSize(int64_t integer) {
-  return EncodedSize(ZigZagEncode(integer));
-}
-
-// Returns the maximum integer value that can be encoded in a varint of the
-// specified number of bytes.
-//
-// These values are also listed in the table below. Zigzag encoding cuts these
-// in half, as positive and negative integers are alternated.
-//
-//   Bytes          Max value
-//     1                          127
-//     2                       16,383
-//     3                    2,097,151
-//     4                  268,435,455
-//     5               34,359,738,367 -- needed for max uint32 value
-//     6            4,398,046,511,103
-//     7          562,949,953,421,311
-//     8       72,057,594,037,927,935
-//     9    9,223,372,036,854,775,807
-//     10            uint64 max value
-//
+/// @brief Returns the maximum (max) integer value that can be encoded as a
+/// varint into the specified number of bytes.
+///
+/// The following table lists the max value for each byte size:
+///
+/// | Bytes | Max value                 |
+/// | ----- | ------------------------- |
+/// | 1     |                       127 |
+/// | 2     |                    16,383 |
+/// | 3     |                 2,097,151 |
+/// | 4     |               268,435,455 |
+/// | 5     |            34,359,738,367 |
+/// | 6     |         4,398,046,511,103 |
+/// | 7     |       562,949,953,421,311 |
+/// | 8     |    72,057,594,037,927,935 |
+/// | 9     | 9,223,372,036,854,775,807 |
+/// | 10    |        (uint64 max value) |
+///
+/// @param bytes The size of the varint, in bytes. 5 bytes are needed for the
+/// max `uint32` value. 10 bytes are needed for the max `uint64` value.
+///
+/// @return The max integer value for a varint of size `bytes`.
 constexpr uint64_t MaxValueInBytes(size_t bytes) {
   return bytes >= kMaxVarint64SizeBytes ? std::numeric_limits<uint64_t>::max()
                                         : (uint64_t(1) << (7 * bytes)) - 1;