aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShawn Willden <swillden@google.com>2020-11-24 19:05:09 -0700
committerShawn Willden <swillden@google.com>2020-12-14 09:38:50 -0700
commit03990c2489864216132c319372ae209a1d6e6766 (patch)
tree29614bcd70bb06afbe966510ddb8049a165d4024
parent85e5286b597c890689e63ab7febc01db5da67906 (diff)
downloadlibcppbor-03990c2489864216132c319372ae209a1d6e6766.tar.gz
Improve Map canonicalization and add Map iterators.
This CL changes Map storage to use a vector of pairs, which removes the need to copy the contents twice to sort them and makes it easy to support Map iteration. Support for recursive canonicalization is added as well, and Map::get() uses a binary search when the map is canonicalized. Test: cppbor_test_external Change-Id: Ie7cee5d504e205e1768a26ec5df8436805a6eefe
-rw-r--r--Android.bp18
-rw-r--r--include/cppbor/cppbor.h118
-rw-r--r--src/cppbor.cpp99
-rw-r--r--src/cppbor_parse.cpp15
-rw-r--r--tests/cppbor_test.cpp170
5 files changed, 327 insertions, 93 deletions
diff --git a/Android.bp b/Android.bp
index e57f1d7..adefa63 100644
--- a/Android.bp
+++ b/Android.bp
@@ -12,8 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+cc_defaults {
+ name: "libcppbor_defaults",
+ cflags: [
+ "-Wall",
+ "-Wextra",
+ "-Werror",
+ ],
+}
+
cc_library {
name: "libcppbor_external",
+ defaults: [
+ "libcppbor_defaults",
+ ],
vendor_available: true,
host_supported: true,
srcs: [
@@ -31,6 +43,9 @@ cc_library {
cc_test {
name: "cppbor_test_external",
+ defaults: [
+ "libcppbor_defaults",
+ ],
srcs: [
"tests/cppbor_test.cpp"
],
@@ -46,6 +61,9 @@ cc_test {
cc_test_host {
name: "cppbor_host_test_external",
+ defaults: [
+ "libcppbor_defaults",
+ ],
srcs: [
"tests/cppbor_test.cpp"
],
diff --git a/include/cppbor/cppbor.h b/include/cppbor/cppbor.h
index 146974e..c33960b 100644
--- a/include/cppbor/cppbor.h
+++ b/include/cppbor/cppbor.h
@@ -508,6 +508,8 @@ class Map : public Item {
public:
static constexpr MajorType kMajorType = MAP;
+ using entry_type = std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>;
+
Map() = default;
Map(const Map& other) = delete;
Map(Map&&) = default;
@@ -536,60 +538,80 @@ class Map : public Item {
bool isCompound() const override { return true; }
- virtual size_t size() const {
- assertInvariant();
- return mEntries.size() / 2;
- }
+ virtual size_t size() const { return mEntries.size(); }
size_t encodedSize() const override {
- return std::accumulate(mEntries.begin(), mEntries.end(), headerSize(size()),
- [](size_t sum, auto& entry) { return sum + entry->encodedSize(); });
+ return std::accumulate(
+ mEntries.begin(), mEntries.end(), headerSize(size()), [](size_t sum, auto& entry) {
+ return sum + entry.first->encodedSize() + entry.second->encodedSize();
+ });
}
using Item::encode; // Make base versions visible.
uint8_t* encode(uint8_t* pos, const uint8_t* end) const override;
void encode(EncodeCallback encodeCallback) const override;
+ /**
+ * Find and return the value associated with `key`, if any.
+ *
+ * If the searched-for `key` is not present, returns `nullptr`.
+ *
+ * Note that if the map is canonicalized (sorted), Map::get() peforms a binary search. If your
+ * map is large and you're searching in it many times, it may be worthwhile to canonicalize it
+ * to make Map::get() faster. Any use of a method that might modify the map disables the
+ * speedup.
+ */
template <typename Key, typename Enable>
const std::unique_ptr<Item>& get(Key key) const;
- std::pair<const std::unique_ptr<Item>&, const std::unique_ptr<Item>&> operator[](
- size_t index) const {
- assertInvariant();
- return {mEntries[index * 2], mEntries[index * 2 + 1]};
- }
-
- std::pair<std::unique_ptr<Item>&, std::unique_ptr<Item>&> operator[](size_t index) {
- assertInvariant();
- return {mEntries[index * 2], mEntries[index * 2 + 1]};
+ // Note that use of non-const operator[] marks the map as not canonicalized.
+ auto& operator[](size_t index) {
+ mCanonicalized = false;
+ return mEntries[index];
}
+ const auto& operator[](size_t index) const { return mEntries[index]; }
MajorType type() const override { return kMajorType; }
Map* asMap() override { return this; }
- // Sorts the map in canonical order, as defined in RFC 7049. Use this before encoding if you
- // want canonicalization; cppbor does not canonicalize by default, though the integer encodings
- // are always canonical and cppbor does not support indefinite-length encodings, so map order
- // canonicalization is the only thing that needs to be done.
- //
- // Note that this canonicalization algorithm moves the map contents twice, so it isn't
- // particularly efficient. Avoid using it unnecessarily on large maps. It does nothing for empty
- // or single-entry maps, though, so it's recommended to always call it when you need a canonical
- // map, even if the map is known to have less than two entries. That way if a maintainer later
- // adds another item canonicalization will be preserved.
- Map& canonicalize() &;
- Map&& canonicalize() && {
- canonicalize();
+ /**
+ * Sorts the map in canonical order, as defined in RFC 7049. Use this before encoding if you
+ * want canonicalization; cppbor does not canonicalize by default, though the integer encodings
+ * are always canonical and cppbor does not support indefinite-length encodings, so map order
+ * canonicalization is the only thing that needs to be done.
+ *
+ * @param recurse If set to true, canonicalize() will also walk the contents of the map and
+ * canonicalize any contained maps as well.
+ */
+ Map& canonicalize(bool recurse = false) &;
+ Map&& canonicalize(bool recurse = false) && {
+ canonicalize(recurse);
return std::move(*this);
}
+ bool isCanonical() { return mCanonicalized; }
+
std::unique_ptr<Item> clone() const override;
+ auto begin() {
+ mCanonicalized = false;
+ return mEntries.begin();
+ }
+ auto begin() const { return mEntries.begin(); }
+ auto end() {
+ mCanonicalized = false;
+ return mEntries.end();
+ }
+ auto end() const { return mEntries.end(); }
+
+ // Returns true if a < b, per CBOR map key canonicalization rules.
+ static bool keyLess(const Item* a, const Item* b);
+
protected:
- std::vector<std::unique_ptr<Item>> mEntries;
+ std::vector<entry_type> mEntries;
private:
- void assertInvariant() const;
+ bool mCanonicalized = false;
};
class Semantic : public Item {
@@ -872,6 +894,14 @@ std::unique_ptr<Item> makeItem(T v) {
return std::unique_ptr<Item>(p);
}
+inline void map_helper(Map& /* map */) {}
+
+template <typename Key, typename Value, typename... Rest>
+inline void map_helper(Map& map, Key&& key, Value&& value, Rest&&... rest) {
+ map.add(std::forward<Key>(key), std::forward<Value>(value));
+ map_helper(map, std::forward<Rest>(rest)...);
+}
+
} // namespace details
template <typename... Args,
@@ -899,14 +929,15 @@ template <typename... Args,
/* Prevent use as copy ctor */ typename = std::enable_if_t<(sizeof...(Args)) != 1>>
Map::Map(Args&&... args) {
static_assert((sizeof...(Args)) % 2 == 0, "Map must have an even number of entries");
- mEntries.reserve(sizeof...(args));
- (mEntries.push_back(details::makeItem(std::forward<Args>(args))), ...);
+ mEntries.reserve(sizeof...(args) / 2);
+ details::map_helper(*this, std::forward<Args>(args)...);
}
template <typename Key, typename Value>
Map& Map::add(Key&& key, Value&& value) & {
- mEntries.push_back(details::makeItem(std::forward<Key>(key)));
- mEntries.push_back(details::makeItem(std::forward<Value>(value)));
+ mEntries.push_back({details::makeItem(std::forward<Key>(key)),
+ details::makeItem(std::forward<Value>(value))});
+ mCanonicalized = false;
return *this;
}
@@ -922,14 +953,21 @@ template <typename Key,
typename = std::enable_if_t<std::is_integral_v<Key> || std::is_enum_v<Key> ||
details::is_text_type_v<Key>::value>>
const std::unique_ptr<Item>& Map::get(Key key) const {
- assertInvariant();
auto keyItem = details::makeItem(key);
- for (size_t i = 0; i < mEntries.size(); i += 2) {
- if (*keyItem == *mEntries[i]) {
- return mEntries[i + 1];
- }
+
+ if (mCanonicalized) {
+ // It's sorted, so binary-search it.
+ auto found = std::lower_bound(begin(), end(), keyItem.get(),
+ [](const entry_type& entry, const Item* key) {
+ return keyLess(entry.first.get(), key);
+ });
+ return (found == end() || *found->first != *keyItem) ? kEmptyItemPtr : found->second;
+ } else {
+ // Unsorted, do a linear search.
+ auto found = std::find_if(
+ begin(), end(), [&](const entry_type& entry) { return *entry.first == *keyItem; });
+ return found == end() ? kEmptyItemPtr : found->second;
}
- return kEmptyItemPtr;
}
template <typename T>
diff --git a/src/cppbor.cpp b/src/cppbor.cpp
index 632dd0b..dc34985 100644
--- a/src/cppbor.cpp
+++ b/src/cppbor.cpp
@@ -67,8 +67,7 @@ bool cborAreAllElementsNonCompound(const Item* compoundItem) {
}
} else {
const Map* map = compoundItem->asMap();
- for (size_t n = 0; n < map->size(); n++) {
- auto [keyEntry, valueEntry] = (*map)[n];
+ for (auto& [keyEntry, valueEntry] : *map) {
switch (keyEntry->type()) {
case ARRAY:
case MAP:
@@ -182,11 +181,9 @@ bool prettyPrintInternal(const Item* item, string& out, size_t indent, size_t ma
out.append("{}");
} else {
out.append("{\n" + indentString);
- for (size_t n = 0; n < map->size(); n++) {
+ for (auto& [map_key, map_value] : *map) {
out.append(" ");
- auto [map_key, map_value] = (*map)[n];
-
if (!prettyPrintInternal(map_key.get(), out, indent + 2, maxBStrSize,
mapKeysToNotPrint)) {
return false;
@@ -400,17 +397,20 @@ std::unique_ptr<Item> Array::clone() const {
bool Map::operator==(const Map& other) const& {
return size() == other.size()
- // Can't use vector::operator== because the contents are pointers. std::equal lets us
- // provide a predicate that does the dereferencing.
- && std::equal(mEntries.begin(), mEntries.end(), other.mEntries.begin(),
- [](auto& a, auto& b) -> bool { return *a == *b; });
+ // Can't use vector::operator== because the contents are pairs of pointers. std::equal
+ // lets us provide a predicate that does the dereferencing.
+ && std::equal(begin(), end(), other.begin(), [](auto& a, auto& b) {
+ return *a.first == *b.first && *a.second == *b.second;
+ });
}
uint8_t* Map::encode(uint8_t* pos, const uint8_t* end) const {
pos = encodeHeader(size(), pos, end);
if (!pos) return nullptr;
for (auto& entry : mEntries) {
- pos = entry->encode(pos, end);
+ pos = entry.first->encode(pos, end);
+ if (!pos) return nullptr;
+ pos = entry.second->encode(pos, end);
if (!pos) return nullptr;
}
return pos;
@@ -419,71 +419,76 @@ uint8_t* Map::encode(uint8_t* pos, const uint8_t* end) const {
void Map::encode(EncodeCallback encodeCallback) const {
encodeHeader(size(), encodeCallback);
for (auto& entry : mEntries) {
- entry->encode(encodeCallback);
+ entry.first->encode(encodeCallback);
+ entry.second->encode(encodeCallback);
}
}
-void Map::assertInvariant() const {
- CHECK(mEntries.size() % 2 == 0);
-}
-
-bool mapKeyLess(const std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>& a,
- const std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>& b) {
- auto& keyA = a.first;
- auto& keyB = b.first;
-
+bool Map::keyLess(const Item* a, const Item* b) {
// CBOR map canonicalization rules are:
// 1. If two keys have different lengths, the shorter one sorts earlier.
- if (keyA->encodedSize() < keyB->encodedSize()) return true;
- if (keyA->encodedSize() > keyB->encodedSize()) return false;
+ if (a->encodedSize() < b->encodedSize()) return true;
+ if (a->encodedSize() > b->encodedSize()) return false;
// 2. If two keys have the same length, the one with the lower value in (byte-wise) lexical
// order sorts earlier. This requires encoding both items.
- auto encodedA = keyA->encode();
- auto encodedB = keyB->encode();
+ auto encodedA = a->encode();
+ auto encodedB = b->encode();
return std::lexicographical_compare(encodedA.begin(), encodedA.end(), //
encodedB.begin(), encodedB.end());
}
-Map& Map::canonicalize() & {
- assertInvariant();
+void recursivelyCanonicalize(std::unique_ptr<Item>& item) {
+ switch (item->type()) {
+ case UINT:
+ case NINT:
+ case BSTR:
+ case TSTR:
+ case SIMPLE:
+ return;
- if (size() < 2) {
- // Empty or single-entry map; no need to reorder.
- return *this;
- }
+ case ARRAY:
+ std::for_each(item->asArray()->begin(), item->asArray()->end(),
+ recursivelyCanonicalize);
+ return;
- // The entries of a Map are stored in a flat vector. We can't easily apply
- // std::sort on that, so instead we move all of the entries into a vector of
- // std::pair, sort that, then move all of the entries back into the original
- // flat vector.
- vector<std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>> temp;
- temp.reserve(size());
+ case MAP:
+ item->asMap()->canonicalize(true /* recurse */);
+ return;
- for (size_t i = 0; i < mEntries.size() - 1; i += 2) {
- temp.push_back({std::move(mEntries[i]), std::move(mEntries[i + 1])});
+ case SEMANTIC:
+ recursivelyCanonicalize(item->asSemantic()->child());
+ return;
}
+}
- std::sort(temp.begin(), temp.end(), mapKeyLess);
+Map& Map::canonicalize(bool recurse) & {
+ if (recurse) {
+ for (auto& entry : mEntries) {
+ recursivelyCanonicalize(entry.first);
+ recursivelyCanonicalize(entry.second);
+ }
+ }
- mEntries.resize(0);
- mEntries.reserve(temp.size() * 2); // Should be a NOP since capacity should be unchanged.
- for (auto& entry : temp) {
- mEntries.push_back(std::move(entry.first));
- mEntries.push_back(std::move(entry.second));
+ if (size() < 2 || mCanonicalized) {
+ // Trivially or already canonical; do nothing.
+ return *this;
}
+ std::sort(begin(), end(),
+ [](auto& a, auto& b) { return keyLess(a.first.get(), b.first.get()); });
+ mCanonicalized = true;
return *this;
}
std::unique_ptr<Item> Map::clone() const {
- assertInvariant();
auto res = std::make_unique<Map>();
- for (size_t i = 0; i < mEntries.size(); i += 2) {
- res->add(mEntries[i]->clone(), mEntries[i + 1]->clone());
+ for (auto& [key, value] : *this) {
+ res->add(key->clone(), value->clone());
}
+ res->mCanonicalized = mCanonicalized;
return res;
}
diff --git a/src/cppbor_parse.cpp b/src/cppbor_parse.cpp
index 488f8c7..2736b71 100644
--- a/src/cppbor_parse.cpp
+++ b/src/cppbor_parse.cpp
@@ -114,7 +114,7 @@ class IncompleteItem {
class IncompleteArray : public Array, public IncompleteItem {
public:
- IncompleteArray(size_t size) : mSize(size) {}
+ explicit IncompleteArray(size_t size) : mSize(size) {}
// We return the "complete" size, rather than the actual size.
size_t size() const override { return mSize; }
@@ -130,23 +130,28 @@ class IncompleteArray : public Array, public IncompleteItem {
class IncompleteMap : public Map, public IncompleteItem {
public:
- IncompleteMap(size_t size) : mSize(size) {}
+ explicit IncompleteMap(size_t size) : mSize(size) {}
// We return the "complete" size, rather than the actual size.
size_t size() const override { return mSize; }
void add(std::unique_ptr<Item> item) override {
- mEntries.reserve(mSize * 2);
- mEntries.push_back(std::move(item));
+ if (mKeyHeldForAdding) {
+ mEntries.reserve(mSize);
+ mEntries.push_back({std::move(mKeyHeldForAdding), std::move(item)});
+ } else {
+ mKeyHeldForAdding = std::move(item);
+ }
}
private:
+ std::unique_ptr<Item> mKeyHeldForAdding;
size_t mSize;
};
class IncompleteSemantic : public Semantic, public IncompleteItem {
public:
- IncompleteSemantic(uint64_t value) : Semantic(value) {}
+ explicit IncompleteSemantic(uint64_t value) : Semantic(value) {}
// We return the "complete" size, rather than the actual size.
size_t size() const override { return 1; }
diff --git a/tests/cppbor_test.cpp b/tests/cppbor_test.cpp
index 0a8000b..ed4b94e 100644
--- a/tests/cppbor_test.cpp
+++ b/tests/cppbor_test.cpp
@@ -829,7 +829,7 @@ TEST(CloningTest, Map) {
EXPECT_EQ(clone->type(), MAP);
EXPECT_NE(clone->asMap(), nullptr);
EXPECT_EQ(item, *clone->asMap());
- auto [key, value] = item[0];
+ auto& [key, value] = item[0];
std::move(key);
std::move(value);
std::move(item);
@@ -913,6 +913,122 @@ TEST(MapCanonicalizationTest, CanonicalizationTest) {
"}");
}
+TEST(MapCanonicalizationTest, DecanonicalizationTest) {
+ Map map;
+ map.add("hello", 1)
+ .add("h", 1)
+ .add(1, 1)
+ .add(-4, 1)
+ .add(-5, 1)
+ .add(2, 1)
+ .add("hellp", 1)
+ .add(254, 1)
+ .add(27, 1);
+
+ EXPECT_FALSE(map.isCanonical());
+ map.canonicalize();
+ EXPECT_TRUE(map.isCanonical());
+
+ /*
+ * Any operation that could potentially mutate the contents of the map should mark it as
+ * non-canonical. This includes getting non-const iterators or using the non-const [] operator.
+ */
+
+ map.begin();
+ EXPECT_FALSE(map.isCanonical());
+
+ map.canonicalize();
+ EXPECT_TRUE(map.isCanonical());
+
+ map.end(); // Non-const map.end() invalidates canonicalization.
+ EXPECT_FALSE(map.isCanonical());
+
+ map.canonicalize();
+ EXPECT_TRUE(map.isCanonical());
+
+ map[0]; // Non-const map.operator[]() invalidates canonicalization.
+ EXPECT_FALSE(map.isCanonical());
+}
+
+TEST(MapCanonicalizationTest, RecursiveTest) {
+ auto map = Map() //
+ .add("hello", 1)
+ .add("h", 1)
+ .add(1, 1)
+ .add(-4, Array( //
+ 2, 1,
+ Map() //
+ .add("b", 1)
+ .add(Map() //
+ .add("hello", "goodbye")
+ .add(1, 9)
+ .add(0, 3),
+ Map() //
+ .add("b", 1)
+ .add("a", 2))))
+ .add(-5, 1)
+ .add(2, 1)
+ .add("hellp", 1)
+ .add(254, 1)
+ .add(27, 1);
+
+ EXPECT_EQ(prettyPrint(&map),
+ "{\n"
+ " 'hello' : 1,\n"
+ " 'h' : 1,\n"
+ " 1 : 1,\n"
+ " -4 : [\n"
+ " 2,\n"
+ " 1,\n"
+ " {\n"
+ " 'b' : 1,\n"
+ " {\n"
+ " 'hello' : 'goodbye',\n"
+ " 1 : 9,\n"
+ " 0 : 3,\n"
+ " } : {\n"
+ " 'b' : 1,\n"
+ " 'a' : 2,\n"
+ " },\n"
+ " },\n"
+ " ],\n"
+ " -5 : 1,\n"
+ " 2 : 1,\n"
+ " 'hellp' : 1,\n"
+ " 254 : 1,\n"
+ " 27 : 1,\n"
+ "}");
+
+ map.canonicalize(true /* recurse */);
+
+ EXPECT_EQ(prettyPrint(&map),
+ "{\n"
+ " 1 : 1,\n"
+ " 2 : 1,\n"
+ " -4 : [\n"
+ " 2,\n"
+ " 1,\n"
+ " {\n"
+ " 'b' : 1,\n"
+ " {\n"
+ " 0 : 3,\n"
+ " 1 : 9,\n"
+ " 'hello' : 'goodbye',\n"
+ " } : {\n"
+ " 'a' : 2,\n"
+ " 'b' : 1,\n"
+ " },\n"
+ " },\n"
+ " ],\n"
+ " -5 : 1,\n"
+ " 27 : 1,\n"
+ " 254 : 1,\n"
+ " 'h' : 1,\n"
+ " 'hello' : 1,\n"
+ " 'hellp' : 1,\n"
+ "}");
+}
+
class MockParseClient : public ParseClient {
public:
MOCK_METHOD4(item, ParseClient*(std::unique_ptr<Item>& item, const uint8_t* hdrBegin,
@@ -1557,6 +1673,58 @@ TEST(ArrayIterationTest, BidirectionalTest) {
EXPECT_EQ(++iter, array.end());
}
+TEST(MapIterationTest, EmptyMap) {
+ Map map;
+
+ EXPECT_EQ(map.begin(), map.end());
+}
+
+TEST(MapIterationTest, ForwardTest) {
+ Map map(1, 2, 3, "hello", -4, 5);
+
+ auto iter = map.begin();
+ ASSERT_NE(iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(1));
+ EXPECT_EQ(*iter->second, Uint(2));
+
+ ASSERT_NE(++iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(3));
+ EXPECT_EQ(*(iter++)->second, Tstr("hello"));
+
+ ASSERT_NE(iter, map.end());
+ EXPECT_EQ(*iter->first, Nint(-4));
+ EXPECT_EQ(*(iter++)->second, Uint(5));
+
+ EXPECT_EQ(iter, map.end());
+}
+
+TEST(MapIterationTest, BidirectionalTest) {
+ Map map(1, 2, 3, "hello", -4, 5);
+
+ auto iter = map.begin();
+ ASSERT_NE(iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(1));
+ EXPECT_EQ(*iter->second, Uint(2));
+
+ ASSERT_NE(++iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(3));
+ EXPECT_EQ(*(iter--)->second, Tstr("hello"));
+
+ ASSERT_NE(iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(1));
+ EXPECT_EQ(*(iter++)->second, Uint(2));
+
+ ASSERT_NE(iter, map.end());
+ EXPECT_EQ(*iter->first, Uint(3));
+ EXPECT_EQ(*iter->second, Tstr("hello"));
+
+ ASSERT_NE(++iter, map.end());
+ EXPECT_EQ(*iter->first, Nint(-4));
+ EXPECT_EQ(*(iter++)->second, Uint(5));
+
+ EXPECT_EQ(iter, map.end());
+}
+
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();