aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoon Young Seo <jyseo@google.com>2022-10-25 20:23:00 +0000
committerPresubmit Automerger Backend <android-build-presubmit-automerger-backend@system.gserviceaccount.com>2022-10-25 20:23:00 +0000
commit63229a55bb48c52bee821ea42e6cfda84304832f (patch)
tree68c5be46c8c16659306b8d762017005ec2caac32
parentf7965238c3128a35ee9a0d11f128f8b8e278d70a (diff)
parent6f1d6e6dc94c6d7714e58e435e0d9ed1d264f395 (diff)
downloadsetfilters-63229a55bb48c52bee821ea42e6cfda84304832f.tar.gz
Original change: https://googleplex-android-review.googlesource.com/c/platform/external/setfilters/+/20281941 Bug: 249533805 Change-Id: I8f9a8762d62ab4f7978768bf3b95103778aad6b3 Merged-In: I3176564c836f69c02796cfe1f653f7f0b591d9bb
-rw-r--r--LICENSE202
-rw-r--r--METADATA14
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--OWNERS2
-rw-r--r--README.md7
-rw-r--r--WORKSPACE47
-rw-r--r--docs/code-of-conduct.md93
-rw-r--r--docs/contributing.md28
-rw-r--r--java/com/google/setfilters/cuckoofilter/BUILD28
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilter.java278
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilterArray.java182
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilterConfig.java364
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctions.java35
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilterStrategies.java62
-rw-r--r--java/com/google/setfilters/cuckoofilter/CuckooFilterTable.java106
-rw-r--r--java/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTable.java266
-rw-r--r--java/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTable.java38
-rw-r--r--java/com/google/setfilters/cuckoofilter/UncompressedCuckooFilterTable.java136
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/BUILD130
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterArrayTest.java199
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterConfigTest.java272
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctionsTest.java33
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterStrategiesTest.java103
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterTableTest.java202
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/CuckooFilterTest.java323
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTableTest.java65
-rw-r--r--javatests/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTableTest.java51
-rw-r--r--third_party/java/errorprone/BUILD23
-rw-r--r--third_party/java/guava/BUILD24
-rw-r--r--third_party/java/junit/BUILD24
-rw-r--r--third_party/java/mockito/BUILD23
-rw-r--r--third_party/java/truth/BUILD26
32 files changed, 3386 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..43dd621
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,14 @@
+name: "setfilters"
+description:
+ "A library which contains a collection of space efficient set filters data "
+ "structures such as cuckoo filter."
+
+third_party {
+ url {
+ type: GIT
+ value: "https://github.com/google/setfilters"
+ }
+ version: "1.0.0"
+ license_type: NOTICE
+ last_upgrade_date { year: 2022 month: 9 day: 1 }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..305ffc8
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,2 @@
+kwlyeo@google.com
+jyseo@google.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..27fe2dc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,7 @@
+# Set filters library
+
+This repository contains implementations of a collection set filter data structures.
+
+## Note
+
+This is not an officially supported Google product.
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..3d92a12
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,47 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+RULES_JVM_EXTERNAL_TAG = "4.2"
+
+RULES_JVM_EXTERNAL_SHA = "cd1a77b7b02e8e008439ca76fd34f5b07aecb8c752961f9640dea15e9e5ba1ca"
+
+http_archive(
+ name = "rules_jvm_external",
+ sha256 = RULES_JVM_EXTERNAL_SHA,
+ strip_prefix = "rules_jvm_external-%s" % RULES_JVM_EXTERNAL_TAG,
+ url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG,
+)
+
+load("@rules_jvm_external//:defs.bzl", "maven_install")
+
+GUAVA_VERSION = "27.1"
+
+ERROR_PRONE_VERSION = "2.14.0"
+
+maven_install(
+ artifacts = [
+ "com.google.errorprone:error_prone_annotation:%s" % ERROR_PRONE_VERSION,
+ "com.google.guava:guava:%s-jre" % GUAVA_VERSION,
+ "com.google.truth:truth:1.1",
+ "com.google.truth.extensions:truth-java8-extension:1.1.3",
+ "junit:junit:4.13",
+ "org.mockito:mockito-core:2.28.2",
+ ],
+ repositories = [
+ "https://repo1.maven.org/maven2",
+ "https://maven.google.com",
+ ],
+)
diff --git a/docs/code-of-conduct.md b/docs/code-of-conduct.md
new file mode 100644
index 0000000..dc079b4
--- /dev/null
+++ b/docs/code-of-conduct.md
@@ -0,0 +1,93 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of
+experience, education, socio-economic status, nationality, personal appearance,
+race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, or to ban temporarily or permanently any
+contributor for other behaviors that they deem inappropriate, threatening,
+offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+This Code of Conduct also applies outside the project spaces when the Project
+Steward has a reasonable belief that an individual's behavior may have a
+negative impact on the project or its community.
+
+## Conflict Resolution
+
+We do not believe that all conflict is bad; healthy debate and disagreement
+often yield positive results. However, it is never okay to be disrespectful or
+to engage in behavior that violates the project’s code of conduct.
+
+If you see someone violating the code of conduct, you are encouraged to address
+the behavior directly with those involved. Many issues can be resolved quickly
+and easily, and this gives people more control over the outcome of their
+dispute. If you are unable to resolve the matter for any reason, or if the
+behavior is threatening or harassing, report it. We are dedicated to providing
+an environment where participants feel welcome and safe.
+
+Reports should be directed to *[PROJECT STEWARD NAME(s) AND EMAIL(s)]*, the
+Project Steward(s) for *[PROJECT NAME]*. It is the Project Steward’s duty to
+receive and address reported violations of the code of conduct. They will then
+work with a committee consisting of representatives from the Open Source
+Programs Office and the Google Open Source Strategy team. If for any reason you
+are uncomfortable reaching out to the Project Steward, please email
+opensource@google.com.
+
+We will investigate every complaint, but you may not receive a direct response.
+We will use our discretion in determining when and how to follow up on reported
+incidents, which may range from not taking action to permanent expulsion from
+the project and project-sponsored spaces. We will notify the accused of the
+report and provide them an opportunity to discuss it before any action is taken.
+The identity of the reporter will be omitted from the details of the report
+supplied to the accused. In potentially harmful situations, such as ongoing
+harassment or threats to anyone's safety, we may take action without notice.
+
+## Attribution
+
+This Code of Conduct is adapted from the Contributor Covenant, version 1.4,
+available at
+https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 0000000..6272489
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,28 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google/conduct/).
diff --git a/java/com/google/setfilters/cuckoofilter/BUILD b/java/com/google/setfilters/cuckoofilter/BUILD
new file mode 100644
index 0000000..3fa3fc6
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/BUILD
@@ -0,0 +1,28 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "cuckoofilter",
+ srcs = glob(
+ ["*.java"],
+ ),
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/errorprone:annotations",
+ ],
+)
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilter.java b/java/com/google/setfilters/cuckoofilter/CuckooFilter.java
new file mode 100644
index 0000000..b37840b
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilter.java
@@ -0,0 +1,278 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.HashCode;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Random;
+
+/**
+ * A space efficient, probabilistic multiset data structure that supports membership check,
+ * insertion, and deletion of the elements.
+ *
+ * <p>Cuckoo filter enables tradeoffs between its space efficiency and the false positive
+ * probability of the membership check.
+ *
+ * <p>See the original paper https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf for more
+ * details.
+ *
+ * <p>This class is not thread-safe.
+ */
+public final class CuckooFilter<T> {
+ private final CuckooFilterConfig config;
+ private final CuckooFilterTable table;
+ private final Funnel<? super T> funnel;
+ private final Random random;
+
+ /** Counts the total number of elements in the cuckoo filter. */
+ private long count;
+
+ /** Instantiates a new cuckoo filter. */
+ public static <T> CuckooFilter<T> createNew(CuckooFilterConfig config, Funnel<? super T> funnel) {
+ Random random = new Random();
+ CuckooFilterTable table =
+ CuckooFilterTable.create(config.size(), config.useSpaceOptimization(), random);
+ return new CuckooFilter<T>(config, table, funnel, random);
+ }
+
+ /**
+ * Instantiates a cuckoo filter from serialized cuckoo filter table.
+ *
+ * <p>Note that {@link SerializedCuckooFilterTable} does not contain any data on {@link
+ * CuckooFilterConfig.HashFunction}, {@link CuckooFilterConfig.Strategy}, or {@link Funnel} used,
+ * so it is up to the user to supply appropriate hash function, strategy, and funnel that were
+ * used to generate the {@link SerializedCuckooFilterTable}.
+ */
+ public static <T> CuckooFilter<T> createFromSerializedTable(
+ SerializedCuckooFilterTable serializedTable,
+ CuckooFilterConfig.HashFunction hashFunction,
+ CuckooFilterConfig.Strategy strategy,
+ Funnel<? super T> funnel) {
+ Random random = new Random();
+ CuckooFilterTable table = CuckooFilterTable.createFromSerialization(serializedTable, random);
+ return new CuckooFilter<T>(
+ CuckooFilterConfig.newBuilder()
+ .setSize(table.size())
+ .setHashFunction(hashFunction)
+ .setStrategy(strategy)
+ .build(),
+ table,
+ funnel,
+ random);
+ }
+
+ private CuckooFilter(
+ CuckooFilterConfig config, CuckooFilterTable table, Funnel<? super T> funnel, Random random) {
+ this.config = config;
+ this.table = table;
+ this.funnel = funnel;
+ this.random = random;
+ count = 0;
+ }
+
+ /**
+ * Returns true if {@code element} is in the cuckoo filter.
+ *
+ * <p>By the probabilistic nature of the cuckoo filter data structure, this method may return a
+ * false positive result. In other words, this method may incorrectly return true for an element
+ * that was actually never inserted. This probability can depend on various factors, including the
+ * size of the cuckoo filter and the hash function used.
+ *
+ * <p>However, it is guaranteed that this method never returns a false negative result, as long as
+ * {@code delete} method is called on an element that exists in the filter. Please see {@code
+ * delete} method for more details.
+ */
+ public boolean contains(T element) {
+ HashCode hash = config.hashFunction().hash(element, funnel);
+ long fingerprint =
+ config.strategy().computeFingerprint(hash, config.size().fingerprintLength());
+ int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount());
+ int otherBucketIndex =
+ config
+ .strategy()
+ .computeOtherBucketIndex(
+ fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction());
+ return table.contains(bucketIndex, fingerprint)
+ || table.contains(otherBucketIndex, fingerprint);
+ }
+
+ /**
+ * Inserts {@code element} to the cuckoo filter, returning true if the element was inserted
+ * successfully.
+ *
+ * <p>Insertion of {@code element} will fail if there is no room for {@code element}. Note that
+ * even when the insertion of {@code element} fails, it is possible for another element to be
+ * inserted successfully. Even then, the insertion failure should be a good indicator that the
+ * filter is getting close to its maximum capacity.
+ */
+ public boolean insert(T element) {
+ HashCode hash = config.hashFunction().hash(element, funnel);
+ long fingerprint =
+ config.strategy().computeFingerprint(hash, config.size().fingerprintLength());
+ int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount());
+ int otherBucketIndex =
+ config
+ .strategy()
+ .computeOtherBucketIndex(
+ fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction());
+
+ // First attempt to insert the fingerprint to one of the two assigned buckets.
+ if (attemptInsertion(fingerprint, bucketIndex, otherBucketIndex)) {
+ count++;
+ return true;
+ }
+
+ // If both buckets are full, execute insertion with repeated replacements algorithm.
+ int startBucketIndex = (random.nextInt(2) == 0) ? bucketIndex : otherBucketIndex;
+ boolean inserted = insertWithRepeatedReplacements(fingerprint, startBucketIndex);
+ if (inserted) {
+ count++;
+ }
+ return inserted;
+ }
+
+ /**
+ * Deletes {@code element} from the cuckoo filter, returning true if the element was deleted
+ * successfully.
+ *
+ * <p>It is critical for {@code delete} to be called on an already existing element. Otherwise,
+ * the filter may incorrectly delete a wrong element. When this happens, it is possible for {@code
+ * contains} method to return a false negative result.
+ */
+ public boolean delete(T element) {
+ HashCode hash = config.hashFunction().hash(element, funnel);
+ long fingerprint =
+ config.strategy().computeFingerprint(hash, config.size().fingerprintLength());
+ int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount());
+ int otherBucketIndex =
+ config
+ .strategy()
+ .computeOtherBucketIndex(
+ fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction());
+ boolean deleted =
+ table.delete(bucketIndex, fingerprint) || table.delete(otherBucketIndex, fingerprint);
+ if (deleted) {
+ count--;
+ }
+ return deleted;
+ }
+
+ /** Returns the size of the cuckoo filter. */
+ public CuckooFilterConfig.Size size() {
+ return config.size();
+ }
+
+ /** Returns the count of the elements in the cuckoo filter. */
+ public long count() {
+ return count;
+ }
+
+ /**
+ * Returns the ratio of the total number of elements in the cuckoo filter and the theoretical max
+ * capacity.
+ *
+ * <p>The returned value is in range [0, 1].
+ */
+ public double load() {
+ return count / ((double) config.size().bucketCount() * config.size().bucketCapacity());
+ }
+
+ /**
+ * Serializes the state of the cuckoo filter table.
+ *
+ * <p>Note that this method does not serialize hash function, strategy, and funnel. When
+ * instantiating a cuckoo filter from the returned {@link SerializedCuckooFilterTable}, it is up
+ * to the user to supply appropriate hash function, strategy, and funnel that were used.
+ */
+ public SerializedCuckooFilterTable serializeTable() {
+ return table.serialize();
+ }
+
+ /**
+ * Attempts to insert {@code fingerprint} to one of the buckets with indices {@code bucketIndex}
+ * and {@code otherBucketIndex}, returning true when successful. Returns false if both buckets are
+ * full and the insertion failed.
+ */
+ private boolean attemptInsertion(long fingerprint, int bucketIndex, int otherBucketIndex) {
+ if (!table.isFull(bucketIndex)) {
+ table.insertWithReplacement(bucketIndex, fingerprint);
+ return true;
+ }
+ if (!table.isFull(otherBucketIndex)) {
+ table.insertWithReplacement(otherBucketIndex, fingerprint);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Randomly traverses the cuckoo graph to find an available bucket for insertion.
+ *
+ * <p>At a high level, this algorithm starts at vertex {@code bucketIndex} and performs a random
+ * walk of length at most {@link CuckooFilterConfig.Strategy#maxReplacementCount}. If an available
+ * bucket is found, the algorithm "pushes" all the fingerprints (edges) that are visited (note
+ * that in the cuckoo graph, the edges are the fingerprints) to their alternate buckets, and make
+ * room for {@code fingerprint} to be inserted.
+ *
+ * <p>If during the random walk an available bucket is not found, the insertion fails and the
+ * method returns false.
+ *
+ * <p>Note that it is possible to deterministically find an available bucket by performing breadth
+ * first search in the cuckoo graph, but this is usually slower and the extra chance of successful
+ * insertion is negligibly small in practice.
+ */
+ private boolean insertWithRepeatedReplacements(long fingerprint, int bucketIndex) {
+ List<Integer> visitedBucketIndices = new ArrayList<>();
+ List<Long> replacedFingerprints = new ArrayList<>();
+
+ long currFingerprint = fingerprint;
+ int currBucketIndex = bucketIndex;
+ visitedBucketIndices.add(-1); // Just for index alignment purpose.
+ replacedFingerprints.add(currFingerprint);
+ for (int i = 0; i < config.strategy().maxReplacementCount(); i++) {
+ Optional<Long> replacedFingerprint =
+ table.insertWithReplacement(currBucketIndex, currFingerprint);
+ // Found an available bucket, and the insertion is successful.
+ if (replacedFingerprint.isEmpty()) {
+ return true;
+ }
+
+ visitedBucketIndices.add(currBucketIndex);
+ replacedFingerprints.add(replacedFingerprint.get());
+
+ currFingerprint = replacedFingerprint.get();
+ currBucketIndex =
+ config
+ .strategy()
+ .computeOtherBucketIndex(
+ currFingerprint,
+ currBucketIndex,
+ config.size().bucketCount(),
+ config.hashFunction());
+ }
+
+ // Failed to find a bucket to insert. Reverse the replacements and declare that the insertion
+ // failed.
+ for (int i = visitedBucketIndices.size() - 1; i > 0; i--) {
+ int previousBucketIndex = visitedBucketIndices.get(i);
+ table.delete(previousBucketIndex, replacedFingerprints.get(i - 1));
+ table.insertWithReplacement(previousBucketIndex, replacedFingerprints.get(i));
+ }
+ return false;
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilterArray.java b/java/com/google/setfilters/cuckoofilter/CuckooFilterArray.java
new file mode 100644
index 0000000..3c4b9bc
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilterArray.java
@@ -0,0 +1,182 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * Static array where each element is an integer of size {@code bitsPerElement} bits.
+ *
+ * <p>Supports up to 64 bits per element. This will be used internally by cuckoo filter.
+ */
+final class CuckooFilterArray {
+ private final long length;
+ private final int bitsPerElement;
+ private final long[] bitArray;
+
+ /**
+ * Constructs a new cuckoo filter array with length {@code length}, with each element of length
+ * {@code bitsPerElement} bits.
+ *
+ * @throws IllegalArgumentException if {@code length} <= 0 or {@code bitsPerElement} <= 0 or
+ * {@code bitsPerElement} > 64.
+ */
+ public CuckooFilterArray(long length, int bitsPerElement) {
+ checkLengthIsValid(length);
+ checkBitsPerElementIsValid(bitsPerElement);
+
+ this.length = length;
+ this.bitsPerElement = bitsPerElement;
+ long totalBits = length * bitsPerElement;
+ // ceil(totalBits / 64) number of elements.
+ long longArrayLength = (totalBits + Long.SIZE - 1) / Long.SIZE;
+ checkArgument(
+ longArrayLength < Integer.MAX_VALUE,
+ "Too large: could not create CuckooFilterArray with length %s and bitsPerElement %s.",
+ length,
+ bitsPerElement);
+ bitArray = new long[(int) longArrayLength];
+ }
+
+ /**
+ * Constructs a cuckoo filter array with length {@code length}, with each element of length {@code
+ * bitsPerElement}, from {@code byteArray}.
+ */
+ public CuckooFilterArray(long length, int bitsPerElement, byte[] byteArray) {
+ this(length, bitsPerElement);
+ ByteBuffer buffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN);
+ for (int i = 0; i < bitArray.length; i++) {
+ bitArray[i] = buffer.getLong();
+ }
+ }
+
+ /** Returns the length of the array. */
+ public long length() {
+ return length;
+ }
+
+ /** Returns the number of bits per element. */
+ public int bitsPerElement() {
+ return bitsPerElement;
+ }
+
+ /**
+ * Returns the element at the {@code index}th position as a long.
+ *
+ * <p>The lowest {@code bitsPerElement} bits will correspond to the value of the element.
+ *
+ * @throws IllegalArgumentException if {@code index} is out of bounds.
+ */
+ public long getAsLong(long index) {
+ checkIndexOutOfBounds(index);
+ long bitStart = index * bitsPerElement;
+ long bitEnd = bitStart + bitsPerElement;
+ int arrayIndex1 = (int) (bitStart / Long.SIZE);
+ int arrayIndex2 = (int) ((bitEnd - 1) / Long.SIZE);
+
+ int a = (int) (bitStart % Long.SIZE);
+ // The element intersects the two array indices.
+ if (arrayIndex1 < arrayIndex2) {
+ int b = a + bitsPerElement - Long.SIZE;
+ long value1 = bitArray[arrayIndex1] >>> a;
+ long value2 = bitArray[arrayIndex2] & mask(b);
+ return (value1 | (value2 << (Long.SIZE - a)));
+ }
+ // Element is contained in one array index.
+ return (bitArray[arrayIndex1] >>> a) & mask(bitsPerElement);
+ }
+
+ /**
+ * Sets the element at {@code index}th position as {@code value}, using the lowest {@code
+ * bitsPerElement} bits as the value of the element.
+ *
+ * @throws IllegalArgumentException if {@code index} is out of bounds.
+ */
+ public void set(long index, long value) {
+ checkIndexOutOfBounds(index);
+ long bitStart = index * bitsPerElement;
+ long bitEnd = bitStart + bitsPerElement;
+ int arrayIndex1 = (int) (bitStart / Long.SIZE);
+ int arrayIndex2 = (int) ((bitEnd - 1) / Long.SIZE);
+
+ // Use the lowest bitsPerElement bits and clear all other bits.
+ value &= mask(bitsPerElement);
+
+ int a = (int) (bitStart % Long.SIZE);
+ // The element intersects the two array indices.
+ if (arrayIndex1 < arrayIndex2) {
+ int b = a + bitsPerElement - Long.SIZE;
+ bitArray[arrayIndex1] &= clearMask(Long.SIZE, a, Long.SIZE);
+ bitArray[arrayIndex1] |= (value << a);
+ bitArray[arrayIndex2] &= clearMask(Long.SIZE, 0, b);
+ bitArray[arrayIndex2] |= (value >>> (Long.SIZE - a));
+ } else {
+ // Element is contained in one array index.
+ int b = a + bitsPerElement;
+ bitArray[arrayIndex1] &= clearMask(Long.SIZE, a, b);
+ bitArray[arrayIndex1] |= (value << a);
+ }
+ }
+
+ /** Returns byte array representation of the {@link CuckooFilterArray}. */
+ public byte[] toByteArray() {
+ byte[] byteArray = new byte[bitArray.length * Long.BYTES];
+ for (int i = 0; i < bitArray.length; i++) {
+ long value = bitArray[i];
+ for (int j = 0; j < Long.BYTES; j++) {
+ // Explicit conversion from long to byte will truncate to lowest 8 bits.
+ byteArray[i * Long.BYTES + j] = (byte) value;
+ value >>>= Byte.SIZE;
+ }
+ }
+ return byteArray;
+ }
+
+ // Theoretical max size of a long array is Integer.MAX_VALUE. Assuming each element is 1 bit,
+ // we can support up to Integer.MAX_VALUE * 64 number of elements.
+ private void checkLengthIsValid(long length) {
+ checkArgument(
+ 0 < length && length < (long) Integer.MAX_VALUE * Long.SIZE,
+ "length must be in range (0, %s).",
+ (long) Integer.MAX_VALUE * Long.SIZE);
+ }
+
+ private void checkBitsPerElementIsValid(int bitsPerElement) {
+ checkArgument(
+ 0 < bitsPerElement && bitsPerElement <= 64, "bitsPerElement must be in range [1, 64].");
+ }
+
+ private void checkIndexOutOfBounds(long index) {
+ checkArgument(0 <= index && index < length, "Index is out of bounds: %s.", index);
+ }
+
+ private static long mask(int length) {
+ if (length == Long.SIZE) {
+ // -1 in 2s complement is 0xFFFFFFFFFFFFFFFF.
+ return -1;
+ }
+ return (1L << length) - 1;
+ }
+
+ // Mask for clearing bits in range [a, b).
+ private static long clearMask(int length, int a, int b) {
+ long mask1 = mask(length);
+ long mask2 = mask(b - a);
+ return mask1 ^ (mask2 << a);
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilterConfig.java b/java/com/google/setfilters/cuckoofilter/CuckooFilterConfig.java
new file mode 100644
index 0000000..e8e2849
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilterConfig.java
@@ -0,0 +1,364 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.hash.Funnel;
+import com.google.common.hash.HashCode;
+import com.google.errorprone.annotations.CanIgnoreReturnValue;
+import java.util.Map;
+
+/**
+ * Specification for the cuckoo filter.
+ *
+ * <p>This class is immutable.
+ */
+// TODO: Handle serialization.
+public final class CuckooFilterConfig {
+ private final Size size;
+ private final HashFunction hashFunction;
+ private final Strategy strategy;
+ private final boolean useSpaceOptimization;
+
+ private CuckooFilterConfig(
+ Size size, HashFunction hashFunction, Strategy strategy, boolean useSpaceOptimization) {
+ this.size = size;
+ this.hashFunction = hashFunction;
+ this.strategy = strategy;
+ this.useSpaceOptimization = useSpaceOptimization;
+ }
+
+ public Size size() {
+ return size;
+ }
+
+ public HashFunction hashFunction() {
+ return hashFunction;
+ }
+
+ public Strategy strategy() {
+ return strategy;
+ }
+
+ public boolean useSpaceOptimization() {
+ return useSpaceOptimization;
+ }
+
+ public static Builder newBuilder() {
+ return new Builder();
+ }
+
+ /** Builder for the {@link CuckooFilterConfig}. */
+ public static class Builder {
+ private Size size;
+ private HashFunction hashFunction;
+ private Strategy strategy;
+ private boolean useSpaceOptimization;
+
+ private Builder() {}
+
+ @CanIgnoreReturnValue
+ public Builder setSize(Size size) {
+ this.size = size;
+ return this;
+ }
+
+ @CanIgnoreReturnValue
+ public Builder setHashFunction(HashFunction hashFunction) {
+ this.hashFunction = hashFunction;
+ return this;
+ }
+
+ @CanIgnoreReturnValue
+ public Builder setStrategy(Strategy strategy) {
+ this.strategy = strategy;
+ return this;
+ }
+
+ /**
+ * Whether to use space optimized filter representation (if possible).
+ *
+ * <p>Setting this field to {@code true} does not guarantee the optimization algorithm to always
+ * apply - it is best effort.
+ *
+ * <p>In general, using this may result in slower filter operations, and incurs an additional
+ * fixed space overhead. Thus, it is possible for the "optimized" version of the filter to
+ * actually take more space than the non optimized one.
+ */
+ @CanIgnoreReturnValue
+ public Builder setUseSpaceOptimization(boolean useSpaceOptimization) {
+ this.useSpaceOptimization = useSpaceOptimization;
+ return this;
+ }
+
+ /**
+ * Builds {@link CuckooFilterConfig}.
+ *
+ * @throws IllegalArgumentException if the required parameters are not set.
+ */
+ public CuckooFilterConfig build() {
+ checkArgument(size != null, "Size must be set.");
+ checkArgument(hashFunction != null, "Hash function must be set.");
+ checkArgument(strategy != null, "Strategy must be set.");
+
+ return new CuckooFilterConfig(size, hashFunction, strategy, useSpaceOptimization);
+ }
+ }
+
+ /**
+ * Specification of the cuckoo filter size.
+ *
+ * <p>A cuckoo filter's size can be defined as a tuple (bucketCount, bucketCapacity,
+ * fingeprintLength); this means that there are bucketCount number of buckets, where each bucket
+ * can store up to bucketCapacity fingerprints, and each fingerprint is of length
+ * fingerprintLength bits.
+ *
+ * <p>All fields are required and must be set explicitly.
+ *
+ * <p>This class is immutable.
+ */
+ public static class Size {
+ private static final int MAX_BUCKET_CAPACITY = 128;
+ private static final int MAX_FINGERPRINT_LENGTH = 64;
+ /** Empirical load by the bucket capacity. */
+ private static final ImmutableMap<Integer, Double> APPROX_LOAD_BY_BUCKET_CAPACITY =
+ ImmutableMap.<Integer, Double>builder()
+ .put(2, 0.85)
+ .put(3, 0.91)
+ .put(4, 0.95)
+ .put(5, 0.96)
+ .put(6, 0.97)
+ .put(7, 0.98)
+ .put(8, 0.98)
+ .buildOrThrow();
+
+ private final int bucketCount;
+ private final int bucketCapacity;
+ private final int fingerprintLength;
+
+ private Size(int bucketCount, int bucketCapacity, int fingerprintLength) {
+ this.bucketCount = bucketCount;
+ this.bucketCapacity = bucketCapacity;
+ this.fingerprintLength = fingerprintLength;
+ }
+
+ /**
+ * Automatically computes a reasonably efficient cuckoo filter {@link Size} that ensures (with
+ * high probability) storing up to {@code elementsCountUpperBound} elements (with high
+ * probability) with the given {@code targetFalsePositiveRate}.
+ *
+ * @throws IllegalArgumentException if {@code targetFalsePositiveRate} is not in range [0, 1] or
+ * {@code elementsCountUpperBound} is <= 0, or a suitable cuckoo filter size could not be
+ * computed based on the given input.
+ */
+ public static Size computeEfficientSize(
+ double targetFalsePositiveRate, long elementsCountUpperBound) {
+ checkArgument(
+ 0 < targetFalsePositiveRate && targetFalsePositiveRate < 1,
+ "targetFalsePositiveRate must be in range (0, 1): %s given.",
+ targetFalsePositiveRate);
+ checkArgument(
+ elementsCountUpperBound > 0,
+ "elementsCountUpperBound must be > 0: %s given.",
+ elementsCountUpperBound);
+
+ long bestCuckooFilterSizeInBits = -1;
+ int bestBucketCount = 0;
+ int bestBucketCapacity = 0;
+ int bestFingerprintLength = 0;
+ for (Map.Entry<Integer, Double> entry : APPROX_LOAD_BY_BUCKET_CAPACITY.entrySet()) {
+ int bucketCapacity = entry.getKey();
+ double load = entry.getValue();
+
+ int fingerprintLength =
+ (int) Math.ceil(-log2(targetFalsePositiveRate) + log2(bucketCapacity) + 1);
+ long bucketCount = (long) Math.ceil(elementsCountUpperBound / (bucketCapacity * load));
+
+ // The computed size is invalid if fingerprint length is larger than max length or the
+ // bucket count that is larger than max integer.
+ if (fingerprintLength > MAX_FINGERPRINT_LENGTH || bucketCount >= Integer.MAX_VALUE) {
+ continue;
+ }
+
+ long totalBits = bucketCount * bucketCapacity * fingerprintLength;
+ if (bestCuckooFilterSizeInBits == -1 || bestCuckooFilterSizeInBits > totalBits) {
+ bestCuckooFilterSizeInBits = totalBits;
+ bestBucketCount = (int) bucketCount;
+ bestBucketCapacity = bucketCapacity;
+ bestFingerprintLength = fingerprintLength;
+ }
+ }
+
+ checkArgument(
+ bestCuckooFilterSizeInBits != -1,
+ "Could not compute suitable cuckoo filter size based on the given input. Either the"
+ + " target false positive rate is too low, or the computed size is too big.");
+
+ return Size.newBuilder()
+ .setBucketCount(bestBucketCount)
+ .setBucketCapacity(bestBucketCapacity)
+ .setFingerprintLength(bestFingerprintLength)
+ .build();
+ }
+
+ public static Builder newBuilder() {
+ return new Builder();
+ }
+
+ /** Returns the total number of buckets in the cuckoo filter. */
+ public int bucketCount() {
+ return bucketCount;
+ }
+
+ /** Returns the maximum number of fingerprints each bucket can hold. */
+ public int bucketCapacity() {
+ return bucketCapacity;
+ }
+
+ /** Returns the length of the fingerprint in bits. */
+ public int fingerprintLength() {
+ return fingerprintLength;
+ }
+
+ /** Builder for the {@link Size}. */
+ public static class Builder {
+ private int bucketCount;
+ private int bucketCapacity;
+ private int fingerprintLength;
+
+ private Builder() {}
+
+ /**
+ * Sets the number of buckets in the cuckoo filter.
+ *
+ * <p>{@code bucketCount} must be > 0.
+ */
+ @CanIgnoreReturnValue
+ public Builder setBucketCount(int bucketCount) {
+ this.bucketCount = bucketCount;
+ return this;
+ }
+
+ /**
+ * Sets the maximum number of fingerprints each bucket can hold.
+ *
+ * <p>{@code bucketCapacity} must be in range (0, {@value #MAX_BUCKET_CAPACITY}].
+ */
+ @CanIgnoreReturnValue
+ public Builder setBucketCapacity(int bucketCapacity) {
+ this.bucketCapacity = bucketCapacity;
+ return this;
+ }
+
+ /**
+ * Sets the length of each fingerprint in bits.
+ *
+ * <p>{@code fingerprintLength} must be in range (0, {@value #MAX_FINGERPRINT_LENGTH}].
+ */
+ @CanIgnoreReturnValue
+ public Builder setFingerprintLength(int fingerprintLength) {
+ this.fingerprintLength = fingerprintLength;
+ return this;
+ }
+
+ /**
+ * Builds {@link Size}.
+ *
+ * @throws IllegalArgumentException if the configured parameters are invalid.
+ */
+ public Size build() {
+ checkArgument(bucketCount > 0, "bucketCount must be > 0: %s given instead.", bucketCount);
+ checkArgument(
+ 0 < bucketCapacity && bucketCapacity <= MAX_BUCKET_CAPACITY,
+ "bucketCapacity must be in range (0, %s]: %s given instead.",
+ MAX_BUCKET_CAPACITY,
+ bucketCapacity);
+ checkArgument(
+ 0 < fingerprintLength && fingerprintLength <= MAX_FINGERPRINT_LENGTH,
+ "fingerprintLength must be in range (0, %s]: %s given instead.",
+ MAX_FINGERPRINT_LENGTH,
+ fingerprintLength);
+
+ return new Size(bucketCount, bucketCapacity, fingerprintLength);
+ }
+ }
+
+ private static double log2(double x) {
+ return Math.log(x) / Math.log(2);
+ }
+ }
+
+ /** Hash function for transforming an arbitrary type element to a {@link HashCode}. */
+ public interface HashFunction {
+ /** Hashes given {@code element} to a {@link HashCode}, using the given {@code funnel}. */
+ <T> HashCode hash(T element, Funnel<? super T> funnel);
+ }
+
+ /**
+ * Strategy for computing fingerprints and where these fingerprints belong in the cuckoo filter
+ * table.
+ */
+ public interface Strategy {
+
+ /**
+ * Computes the fingerprint value given the element's {@code hash} output from {@link
+ * HashFunction}.
+ *
+ * <p>The returned value should be in range (0, 2^{@code fingerprintLength}). Otherwise, the
+ * behavior of the cuckoo filter is undefined. Note that the interval is an open interval, so 0
+ * and 2^{@code fingerprintLength} are not included.
+ */
+ long computeFingerprint(HashCode hash, int fingerprintLength);
+
+ /**
+ * Computes one of the bucket indices given the element's {@code hash} output from {@link
+ * HashFunction} and {@code bucketCount} of the cuckoo filter.
+ *
+ * <p>The returned value should be in range [0, {@code bucketCount}). Otherwise, the behavior of
+ * the cuckoo filter is undefined.
+ */
+ int computeBucketIndex(HashCode hash, int bucketCount);
+
+ /**
+ * Computes the element's other bucket index given the element's {@code fingerprint} value and
+ * its initial {@code bucketIndex}.
+ *
+ * <p>{@code hashFunction} corresponds to the {@link HashFunction} that was supplied when the
+ * config was constructed. Depending on the implementation, {@code hashFunction} may or may not
+ * be used.
+ *
+ * <p>The returned value should be in range [0, {@code bucketCount}), and the method needs to be
+ * an involution with respect to {@code bucketIndex}. That is, with other parameters fixed, the
+ * method needs to satisfy <b>bucketIndex =
+ * computeOtherBucketIndex(computeOtherBucketIndex(bucketIndex))</b> for all valid
+ * <b>bucketIndex</b>. Note that other parameters are omitted for brevity. If these properties
+ * don't hold, the behavior of the cuckoo filter is undefined.
+ */
+ int computeOtherBucketIndex(
+ long fingerprint, int bucketIndex, int bucketCount, HashFunction hashFunction);
+
+ /**
+ * Maximum number of replacements to be made during insertion, before declaring that the
+ * insertion has failed.
+ *
+ * <p>If not overridden, set to 500 as a default.
+ */
+ default int maxReplacementCount() {
+ return 500;
+ }
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctions.java b/java/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctions.java
new file mode 100644
index 0000000..ddb6519
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctions.java
@@ -0,0 +1,35 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.Hashing;
+
+/** A set of predefined {@link CuckooFilterConfig.HashFunction}s. */
+public enum CuckooFilterHashFunctions implements CuckooFilterConfig.HashFunction {
+
+ /**
+ * MurmurHash3 that yields 128 bit hash value.
+ *
+ * <p>Behavior of MurmurHash3 is fixed and should not change in the future.
+ */
+ MURMUR3_128() {
+ @Override
+ public <T> HashCode hash(T element, Funnel<? super T> funnel) {
+ return Hashing.murmur3_128().hashObject(element, funnel);
+ }
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilterStrategies.java b/java/com/google/setfilters/cuckoofilter/CuckooFilterStrategies.java
new file mode 100644
index 0000000..aeabb1d
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilterStrategies.java
@@ -0,0 +1,62 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import com.google.common.hash.Funnels;
+import com.google.common.hash.HashCode;
+
+/** A set of predefined {@link CuckooFilterConfig.Strategy}s. */
+public enum CuckooFilterStrategies implements CuckooFilterConfig.Strategy {
+
+ /**
+ * A strategy that uses a mod operator to produce the desired outputs.
+ *
+ * <p>The {@link HashCode} generated with the hash function should be at least 64 bits. This will
+ * achieve good false positive rate when fingerprintLength <= 32.
+ */
+ SIMPLE_MOD() {
+ @Override
+ public long computeFingerprint(HashCode hash, int fingerprintLength) {
+ // Use the most significant fingerprintLength bits. This is needed to get rid of the
+ // correlation with the bucket index.
+ long fingerprint = hash.asLong() >>> (Long.SIZE - fingerprintLength);
+ // Value 0 is reserved, so instead map to 1. This means that the generated fingerprint value
+ // is skewed (1 is twice as more likely to be generated than any other value). Note that, we
+ // could have taken mod (2^fingerprintLength - 1) and added 1, which would produce a more
+ // uniform distribution. However, for performance reason, we choose to take this approach
+ // instead.
+ if (fingerprint == 0) {
+ return 1L;
+ }
+ return fingerprint;
+ }
+
+ @Override
+ public int computeBucketIndex(HashCode hash, int bucketCount) {
+ return Math.floorMod(hash.asLong(), bucketCount);
+ }
+
+ @Override
+ public int computeOtherBucketIndex(
+ long fingerprint,
+ int bucketIndex,
+ int bucketCount,
+ CuckooFilterConfig.HashFunction hashFunction) {
+ long fingerprintHash = hashFunction.hash(fingerprint, Funnels.longFunnel()).asLong();
+ // Use (hash(fingerprint) - bucketIndex) mod bucketCount as the involution.
+ return Math.floorMod(fingerprintHash - bucketIndex, bucketCount);
+ }
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/CuckooFilterTable.java b/java/com/google/setfilters/cuckoofilter/CuckooFilterTable.java
new file mode 100644
index 0000000..b1eb9aa
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/CuckooFilterTable.java
@@ -0,0 +1,106 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import java.nio.ByteBuffer;
+import java.util.Optional;
+import java.util.Random;
+
+/** An array of buckets where each bucket can store a fixed number of fingerprints. */
+interface CuckooFilterTable {
+ /** Value of the empty "slot", which is reserved as 0. */
+ public static long EMPTY_SLOT = 0L;
+
+ /**
+ * Creates an implementation of an empty cuckoo filter based on whether space optimization should
+ * be used.
+ *
+ * <p>Space optimization is best effort, and is not guaranteed.
+ */
+ public static CuckooFilterTable create(
+ CuckooFilterConfig.Size size, boolean useSpaceOptimization, Random random) {
+ if (useSpaceOptimization && size.bucketCapacity() == 4 && size.fingerprintLength() >= 4) {
+ return new SemiSortedCuckooFilterTable(size, random);
+ }
+ return new UncompressedCuckooFilterTable(size, random);
+ }
+
+ /** Creates an implementation of the cuckoo filter based on the serialization. */
+ public static CuckooFilterTable createFromSerialization(
+ SerializedCuckooFilterTable serializedTable, Random random) {
+ ByteBuffer buffer = ByteBuffer.wrap(serializedTable.asByteArray());
+
+ if (buffer.remaining() <= 16) {
+ throw new IllegalArgumentException("Unable to parse the SerializedCuckooFilterTable.");
+ }
+
+ int tableType = buffer.getInt();
+ int bucketCount = buffer.getInt();
+ int bucketCapacity = buffer.getInt();
+ int fingerprintLength = buffer.getInt();
+ CuckooFilterConfig.Size size =
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(bucketCount)
+ .setBucketCapacity(bucketCapacity)
+ .setFingerprintLength(fingerprintLength)
+ .build();
+
+ byte[] bitArray = new byte[buffer.remaining()];
+ buffer.get(bitArray);
+
+ if (tableType == UncompressedCuckooFilterTable.TABLE_TYPE) {
+ return new UncompressedCuckooFilterTable(size, bitArray, random);
+ } else if (tableType == SemiSortedCuckooFilterTable.TABLE_TYPE) {
+ return new SemiSortedCuckooFilterTable(size, bitArray, random);
+ } else {
+ throw new IllegalArgumentException("Unable to parse the SerializedCuckooFilterTable.");
+ }
+ }
+
+ /**
+ * Inserts given {@code fingerprint} to the {@code bucketIndex}th bucket, replacing an arbitrary
+ * fingerprint if the bucket is full.
+ *
+ * <p>How this arbitrary fingerprint is chosen depends on the implementation.
+ *
+ * @return the value of the replaced fingerprint if the bucket is full, and an empty {@link
+ * Optional} otherwise.
+ */
+ Optional<Long> insertWithReplacement(int bucketIndex, long fingerprint);
+
+ /** Returns whether {@code bucketIndex}th bucket contains {@code fingerprint}. */
+ boolean contains(int bucketIndex, long fingerprint);
+
+ /**
+ * Deletes a {@code fingerprint} from {@code bucketIndex}th bucket.
+ *
+ * <p>If a bucket contains multiple {@code fingerprint} values, this method only deletes one.
+ *
+ * @return {@code true} if {@code fingerprint} is in {@code bucketIndex}th bucket and is deleted,
+ * and {@code false} otherwise.
+ */
+ boolean delete(int bucketIndex, long fingerprint);
+
+ /** Returns whether {@code bucketIndex}th bucket is full. */
+ boolean isFull(int bucketIndex);
+
+ /** Returns the size of {@link CuckooFilterTable}. */
+ CuckooFilterConfig.Size size();
+
+ /** Returns serialization of {@link CuckooFilterTable}. */
+ SerializedCuckooFilterTable serialize();
+
+ // TODO: Add more methods as needed.
+}
diff --git a/java/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTable.java b/java/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTable.java
new file mode 100644
index 0000000..f08b47c
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTable.java
@@ -0,0 +1,266 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Comparator.comparingInt;
+
+import com.google.common.collect.ImmutableMap;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Optional;
+import java.util.Random;
+
+/**
+ * Implementation of the {@link CuckooFilterTable} using the semi-sorting bucket compression scheme
+ * in the original paper by Fan et al (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) -
+ * see section 5.2.
+ *
+ * <p>The main idea behind the compression algorithm is that the order of the fingerprints in each
+ * bucket is irrelevant - that is, the fingerprints in each bucket forms a multiset. For fingerprint
+ * length f and bucket capacity b, the possible number of multisets of b fingerprints of f bits each
+ * is given by C(2^f + b - 1, b), where C denotes binomial coefficient. In particular, we can encode
+ * each bucket with ceil(log2(C(2^f + b - 1, b))) bits. On the other hand, naively encoding the
+ * fingerprints will take b * f bits. Thus, it is theoretically possible to save b * f -
+ * ceil(log2(C(2^f + b - 1, b))) bits per bucket (note that this is not information theoretically
+ * tight because the distribution of the multisets is not uniform).
+ *
+ * <p>For performance reason, this only supports a table with bucket capacity of size 4 and
+ * fingerprint length >= 4 - in many cases this is not a limitation because, for many practical
+ * applications, bucket capacity of size 4 yields the optimal cuckoo filter size and fingerprint
+ * length < 4 will never achieve good enough false positive rate.
+ *
+ * <p>Compared to the {@link UncompressedCuckooFilterTable}, this implementation can save 1 bit per
+ * element, at the cost of slower filter operations by a constant factor (asymptotically, it is the
+ * same as the uncompressed one). Note that for bucket capacity of size 4, saving 1 bit per element
+ * is "optimal" up to rounding down, as the function 4 * f - ceil(log2(C(2^f + 3, 4))) < 5 for
+ * reasonable values of f. However, this also incurs an additional fixed space overhead, so for
+ * smaller filter the extra saving of 1 bit per element may not be worth it.
+ */
+final class SemiSortedCuckooFilterTable implements CuckooFilterTable {
+ // Implementation type of the table, to be encoded in the serialization.
+ public static final int TABLE_TYPE = 1;
+
+ // Table containing all sorted 4 bit partial fingerprints of length 4 (16 bits) by its index.
+ private static final short[] SORTED_PARTIAL_FINGERPRINTS = computeSortedPartialFingerprints();
+ // Inverse map of SORTED_PARTIAL_FINGERPRINTS.
+ private static final ImmutableMap<Short, Short> SORTED_PARTIAL_FINGERPRINTS_INDEX =
+ computeSortedPartialFingerprintsIndex(SORTED_PARTIAL_FINGERPRINTS);
+
+ private final CuckooFilterConfig.Size size;
+ private final Random random;
+ private final CuckooFilterArray cuckooFilterArray;
+
+ /**
+ * Creates a new uncompressed cuckoo filter table of the given size.
+ *
+ * <p>Uses the given source of {@code random} to choose the replaced fingerprint in {@code
+ * insertWithReplacement} method.
+ */
+ public SemiSortedCuckooFilterTable(CuckooFilterConfig.Size size, Random random) {
+ this.size = size;
+ checkArgument(
+ size.bucketCapacity() == 4,
+ "SemiSortedCuckooFilterTable only supports bucket capacity of 4.");
+ checkArgument(
+ size.fingerprintLength() >= 4,
+ "SemiSortedCuckooFilterTable only supports fingerprint length >= 4.");
+ this.random = random;
+ // bucketCapacity == 4 and fingerprintLength <= 64, so we can assume that it will always fit
+ // into a long.
+ cuckooFilterArray =
+ new CuckooFilterArray(
+ (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength() - 1);
+ }
+
+ /** Creates {@link SemiSortedCuckooFilterTable} from {@link SerializedCuckooFilterTable}. */
+ public SemiSortedCuckooFilterTable(CuckooFilterConfig.Size size, byte[] bitArray, Random random) {
+ this.size = size;
+ this.random = random;
+ cuckooFilterArray =
+ new CuckooFilterArray(
+ (long) size.bucketCount() * size.bucketCapacity(),
+ size.fingerprintLength() - 1,
+ bitArray);
+ }
+
+ @Override
+ public Optional<Long> insertWithReplacement(int bucketIndex, long fingerprint) {
+ long[] fingerprints = decodeBucket(bucketIndex);
+ for (int i = 0; i < size.bucketCapacity(); i++) {
+ if (fingerprints[i] == EMPTY_SLOT) {
+ fingerprints[i] = fingerprint;
+ encodeAndPut(bucketIndex, fingerprints);
+ return Optional.empty();
+ }
+ }
+
+ int replacedSlotIndex = random.nextInt(size.bucketCapacity());
+ long replacedFingerprint = fingerprints[replacedSlotIndex];
+ fingerprints[replacedSlotIndex] = fingerprint;
+ encodeAndPut(bucketIndex, fingerprints);
+ return Optional.of(replacedFingerprint);
+ }
+
+ @Override
+ public boolean contains(int bucketIndex, long fingerprint) {
+ long[] fingerprints = decodeBucket(bucketIndex);
+ for (long fingerprintInBucket : fingerprints) {
+ if (fingerprintInBucket == fingerprint) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean delete(int bucketIndex, long fingerprint) {
+ long[] fingerprints = decodeBucket(bucketIndex);
+ for (int i = 0; i < fingerprints.length; i++) {
+ if (fingerprints[i] == fingerprint) {
+ fingerprints[i] = EMPTY_SLOT;
+ encodeAndPut(bucketIndex, fingerprints);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isFull(int bucketIndex) {
+ return !contains(bucketIndex, CuckooFilterTable.EMPTY_SLOT);
+ }
+
+ @Override
+ public CuckooFilterConfig.Size size() {
+ return size;
+ }
+
+ @Override
+ public SerializedCuckooFilterTable serialize() {
+ byte[] serializedArray = cuckooFilterArray.toByteArray();
+
+ // The first 16 bytes specifies the implementation type and the size of the table (defined by
+ // tuple (type, bucketCount,
+ // bucketCapacity, fingerprintLength)).
+ // Rest is the bit array.
+ ByteBuffer encoded = ByteBuffer.allocate(16 + serializedArray.length);
+ return SerializedCuckooFilterTable.createFromByteArray(
+ encoded
+ .putInt(TABLE_TYPE)
+ .putInt(size.bucketCount())
+ .putInt(size.bucketCapacity())
+ .putInt(size.fingerprintLength())
+ .put(serializedArray)
+ .array());
+ }
+
+ private long toArrayIndex(int bucketIndex, int slotIndex) {
+ return (long) bucketIndex * size.bucketCapacity() + slotIndex;
+ }
+
+ // TODO: Check if encoding/decoding needs to be optimized.
+
+ // Decodes fingerprints at bucketIndex.
+ private long[] decodeBucket(int bucketIndex) {
+ int encodedSortedPartialFingerintsIndex = 0;
+ long[] fingerprintPrefixes = new long[size.bucketCapacity()];
+ for (int i = 0; i < size.bucketCapacity(); i++) {
+ long arrayIndex = toArrayIndex(bucketIndex, i);
+ long n = cuckooFilterArray.getAsLong(arrayIndex);
+ encodedSortedPartialFingerintsIndex <<= 3;
+ encodedSortedPartialFingerintsIndex |= (int) (n & 0x7);
+ fingerprintPrefixes[i] = n >>> 3;
+ }
+
+ int encodedSortedPartialFingerprints =
+ SORTED_PARTIAL_FINGERPRINTS[encodedSortedPartialFingerintsIndex];
+ long[] fingerprints = new long[size.bucketCapacity()];
+ for (int i = size.bucketCapacity() - 1; i >= 0; i--) {
+ fingerprints[i] = (fingerprintPrefixes[i] << 4) | (encodedSortedPartialFingerprints & 0xF);
+ encodedSortedPartialFingerprints >>>= 4;
+ }
+ return fingerprints;
+ }
+
+ /**
+ * Encode fingerprints and put them to bucketIndex.
+ *
+ * <p>Encoding works as follows.
+ *
+ * <p>Suppose each fingerprint is logically f bits. First, sort the fingerprints by the least
+ * significant 4 bits. Let's call the most significant f - 4 bits of the fingerprints as the
+ * fingerprint prefixes. The least significant 4 bits of the fingerprints will be the partial
+ * fingerprints, which will be encoded according to the SORTED_PARTIAL_FINGEPRRINTS_INDEX map as a
+ * 12 bit value. Partition the encoded 12 bit value into four 3 bit chunks. Group each of the f -
+ * 4 bit prefixes with each 3 bit chunk (f - 1 bits total) and insert it as a cuckoo filter array
+ * element.
+ */
+ private void encodeAndPut(int bucketIndex, long[] fingerprints) {
+ long[] fingerprintPrefixes = new long[size.bucketCapacity()];
+ int[] partialFingerprints = new int[size.bucketCapacity()];
+ for (int i = 0; i < size.bucketCapacity(); i++) {
+ fingerprintPrefixes[i] = fingerprints[i] >>> 4;
+ partialFingerprints[i] = (int) (fingerprints[i] & 0xF);
+ }
+ Integer[] indices = {0, 1, 2, 3};
+ Arrays.sort(indices, comparingInt((Integer i) -> partialFingerprints[i]));
+ short encodedSortedPartialFingerprints =
+ (short)
+ ((partialFingerprints[indices[0]] << 12)
+ | (partialFingerprints[indices[1]] << 8)
+ | (partialFingerprints[indices[2]] << 4)
+ | partialFingerprints[indices[3]]);
+ int encodedSortedPartialFingerprintsIndex =
+ SORTED_PARTIAL_FINGERPRINTS_INDEX.get(encodedSortedPartialFingerprints);
+ for (int i = size.bucketCapacity() - 1; i >= 0; i--) {
+ long arrayIndex = toArrayIndex(bucketIndex, i);
+ cuckooFilterArray.set(
+ arrayIndex,
+ (fingerprintPrefixes[indices[i]] << 3) | (encodedSortedPartialFingerprintsIndex & 0x7));
+ encodedSortedPartialFingerprintsIndex >>>= 3;
+ }
+ }
+
+ private static short[] computeSortedPartialFingerprints() {
+ // (2^4 + 3 choose 4) = 3876 counts the number of multisets of size 4, with each element in
+ // [0, 16).
+ short[] sortedPartialFingerprints = new short[3876];
+
+ final short fingerprintUpperBound = 16;
+
+ int i = 0;
+ for (short a = 0; a < fingerprintUpperBound; a++) {
+ for (short b = a; b < fingerprintUpperBound; b++) {
+ for (short c = b; c < fingerprintUpperBound; c++) {
+ for (short d = c; d < fingerprintUpperBound; d++) {
+ sortedPartialFingerprints[i] = (short) ((a << 12) | (b << 8) | (c << 4) | d);
+ i++;
+ }
+ }
+ }
+ }
+ return sortedPartialFingerprints;
+ }
+
+ private static ImmutableMap<Short, Short> computeSortedPartialFingerprintsIndex(
+ short[] sortedPartialFingerprints) {
+ ImmutableMap.Builder<Short, Short> map = ImmutableMap.builder();
+ for (short i = 0; i < sortedPartialFingerprints.length; i++) {
+ map.put(sortedPartialFingerprints[i], i);
+ }
+ return map.buildOrThrow();
+ }
+}
diff --git a/java/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTable.java b/java/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTable.java
new file mode 100644
index 0000000..4d5b48e
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTable.java
@@ -0,0 +1,38 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import java.util.Arrays;
+
+/** Serialization of {@link CuckooFilterTable}. */
+final class SerializedCuckooFilterTable {
+ private final byte[] rawSerialization;
+
+ /** Creates serialization from raw byte array. */
+ public static SerializedCuckooFilterTable createFromByteArray(byte[] byteArray) {
+ return new SerializedCuckooFilterTable(Arrays.copyOf(byteArray, byteArray.length));
+ }
+
+ private SerializedCuckooFilterTable(byte[] rawSerialization) {
+ this.rawSerialization = rawSerialization;
+ }
+
+ /** Returns the serialization as a byte array. */
+ public byte[] asByteArray() {
+ return Arrays.copyOf(rawSerialization, rawSerialization.length);
+ }
+
+ // TODO: Add other methods like asJSON();
+}
diff --git a/java/com/google/setfilters/cuckoofilter/UncompressedCuckooFilterTable.java b/java/com/google/setfilters/cuckoofilter/UncompressedCuckooFilterTable.java
new file mode 100644
index 0000000..3f96815
--- /dev/null
+++ b/java/com/google/setfilters/cuckoofilter/UncompressedCuckooFilterTable.java
@@ -0,0 +1,136 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import java.nio.ByteBuffer;
+import java.util.Optional;
+import java.util.Random;
+
+/**
+ * Implementation of the {@link CuckooFilterTable} that doesn't use the semi-sorting bucket
+ * compression scheme in the original paper by Fan et al
+ * (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - see section 5.2 for what
+ * semi-sorting bucket compression scheme is.
+ *
+ * <p>Thus, if a bucket can hold up to bucketCapacity number of fingerprints and each fingerprint is
+ * of length fingerprintLength bits, it takes bucketCapacity * fingerprintLength bits to represent
+ * each bucket.
+ */
+final class UncompressedCuckooFilterTable implements CuckooFilterTable {
+ // Implementation type of the table, to be encoded in the serialization.
+ public static final int TABLE_TYPE = 0;
+
+ private final CuckooFilterConfig.Size size;
+ private final Random random;
+ private final CuckooFilterArray cuckooFilterArray;
+
+ /**
+ * Creates a new uncompressed cuckoo filter table of the given size.
+ *
+ * <p>Uses the given source of {@code random} to choose the replaced fingerprint in {@code
+ * insertWithReplacement} method.
+ */
+ public UncompressedCuckooFilterTable(CuckooFilterConfig.Size size, Random random) {
+ this.size = size;
+ this.random = random;
+ // bucketCapacity <= 128 and fingerprintLength <= 64, so we can assume that it will always fit
+ // into a long.
+ cuckooFilterArray =
+ new CuckooFilterArray(
+ (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength());
+ }
+
+ /** Creates {@link UncompressedCuckooFilterTable} from {@link SerializedCuckooFilterTable}. */
+ public UncompressedCuckooFilterTable(
+ CuckooFilterConfig.Size size, byte[] bitArray, Random random) {
+ this.size = size;
+ this.random = random;
+ cuckooFilterArray =
+ new CuckooFilterArray(
+ (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength(), bitArray);
+ }
+
+ @Override
+ public Optional<Long> insertWithReplacement(int bucketIndex, long fingerprint) {
+ for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
+ long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
+ if (cuckooFilterArray.getAsLong(arrayIndex) == CuckooFilterTable.EMPTY_SLOT) {
+ cuckooFilterArray.set(arrayIndex, fingerprint);
+ return Optional.empty();
+ }
+ }
+ int replacedSlotIndex = random.nextInt(size.bucketCapacity());
+ long replacedArrayIndex = toArrayIndex(bucketIndex, replacedSlotIndex);
+ long replacedFingerprint = cuckooFilterArray.getAsLong(replacedArrayIndex);
+ cuckooFilterArray.set(replacedArrayIndex, fingerprint);
+ return Optional.of(replacedFingerprint);
+ }
+
+ @Override
+ public boolean contains(int bucketIndex, long fingerprint) {
+ for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
+ long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
+ if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean delete(int bucketIndex, long fingerprint) {
+ for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) {
+ long arrayIndex = toArrayIndex(bucketIndex, slotIndex);
+ if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) {
+ cuckooFilterArray.set(arrayIndex, CuckooFilterTable.EMPTY_SLOT);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isFull(int bucketIndex) {
+ return !contains(bucketIndex, CuckooFilterTable.EMPTY_SLOT);
+ }
+
+ @Override
+ public CuckooFilterConfig.Size size() {
+ return size;
+ }
+
+ @Override
+ public SerializedCuckooFilterTable serialize() {
+ byte[] serializedArray = cuckooFilterArray.toByteArray();
+
+ // The first 16 bytes specifies the implementation type and the size of the table (defined by
+ // tuple (type, bucketCount,
+ // bucketCapacity, fingerprintLength)).
+ // Rest is the bit array.
+ ByteBuffer encoded = ByteBuffer.allocate(16 + serializedArray.length);
+ return SerializedCuckooFilterTable.createFromByteArray(
+ encoded
+ .putInt(TABLE_TYPE)
+ .putInt(size.bucketCount())
+ .putInt(size.bucketCapacity())
+ .putInt(size.fingerprintLength())
+ .put(serializedArray)
+ .array());
+ }
+
+ private long toArrayIndex(int bucketIndex, int slotIndex) {
+ return (long) bucketIndex * size.bucketCapacity() + slotIndex;
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/BUILD b/javatests/com/google/setfilters/cuckoofilter/BUILD
new file mode 100644
index 0000000..da4e5aa
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/BUILD
@@ -0,0 +1,130 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_test")
+
+package(default_visibility = ["//visibility:public"])
+
+java_test(
+ name = "CuckooFilterArrayTest",
+ srcs = [
+ "CuckooFilterArrayTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "CuckooFilterTest",
+ srcs = [
+ "CuckooFilterTest.java",
+ ],
+ size = "medium",
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "CuckooFilterConfigTest",
+ srcs = [
+ "CuckooFilterConfigTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "CuckooFilterHashFunctionsTest",
+ srcs = [
+ "CuckooFilterHashFunctionsTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "CuckooFilterStrategiesTest",
+ srcs = [
+ "CuckooFilterStrategiesTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "CuckooFilterTableTest",
+ srcs = [
+ "CuckooFilterTableTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "SemiSortedCuckooFilterTableTest",
+ srcs = [
+ "SemiSortedCuckooFilterTableTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
+
+java_test(
+ name = "SerializedCuckooFilterTableTest",
+ srcs = [
+ "SerializedCuckooFilterTableTest.java",
+ ],
+ deps = [
+ "//third_party/java/guava",
+ "//third_party/java/junit",
+ "//third_party/java/truth",
+ "//third_party/java/mockito",
+ "//java/com/google/setfilters/cuckoofilter",
+ ]
+)
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterArrayTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterArrayTest.java
new file mode 100644
index 0000000..6ee18c3
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterArrayTest.java
@@ -0,0 +1,199 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import java.util.Random;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class CuckooFilterArrayTest {
+
+ @Test
+ public void createsNewArray_invalidLength() {
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(0, 20))
+ .getMessage();
+ assertThat(message)
+ .isEqualTo(
+ String.format(
+ "length must be in range (0, %s).", (long) Integer.MAX_VALUE * Long.SIZE));
+ }
+
+ @Test
+ public void createsNewArray_invalidBitsPerElement() {
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 0))
+ .getMessage();
+ assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64].");
+
+ message =
+ assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 65))
+ .getMessage();
+ assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64].");
+ }
+
+ @Test
+ public void createsNewArray_tooLarge() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> new CuckooFilterArray((long) Integer.MAX_VALUE * 63, 20))
+ .getMessage();
+ assertThat(message)
+ .isEqualTo(
+ String.format(
+ "Too large: could not create CuckooFilterArray with length %s and bitsPerElement"
+ + " 20.",
+ (long) Integer.MAX_VALUE * 63));
+ }
+
+ @Test
+ public void createsExistingArray_invalidLength() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class, () -> new CuckooFilterArray(0, 20, new byte[1]))
+ .getMessage();
+ assertThat(message)
+ .isEqualTo(
+ String.format(
+ "length must be in range (0, %s).", (long) Integer.MAX_VALUE * Long.SIZE));
+ }
+
+ @Test
+ public void createsExistingArray_invalidBitsPerElement() {
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 0, new byte[1]))
+ .getMessage();
+ assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64].");
+
+ message =
+ assertThrows(
+ IllegalArgumentException.class, () -> new CuckooFilterArray(5, 65, new byte[1]))
+ .getMessage();
+ assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64].");
+ }
+
+ @Test
+ public void creatExistingArray() {
+ CuckooFilterArray array = new CuckooFilterArray(100, 20);
+ array.set(0, 1);
+ array.set(1, 2);
+
+ byte[] byteArray = array.toByteArray();
+
+ CuckooFilterArray existing = new CuckooFilterArray(100, 20, byteArray);
+
+ assertThat(existing.getAsLong(0)).isEqualTo(1);
+ assertThat(existing.getAsLong(1)).isEqualTo(2);
+ for (int i = 2; i < existing.length(); i++) {
+ assertThat(existing.getAsLong(i)).isEqualTo(0);
+ }
+ }
+
+ @Test
+ public void length() {
+ CuckooFilterArray array = new CuckooFilterArray(100, 20);
+
+ assertThat(array.length()).isEqualTo(100);
+ }
+
+ @Test
+ public void bitsPerElement() {
+ CuckooFilterArray array = new CuckooFilterArray(100, 20);
+
+ assertThat(array.bitsPerElement()).isEqualTo(20);
+ }
+
+ @Test
+ public void getAsLong_indexOutOfBounds() {
+ CuckooFilterArray array = new CuckooFilterArray(100, 20);
+
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> array.getAsLong(-1)).getMessage();
+ assertThat(message).isEqualTo("Index is out of bounds: -1.");
+
+ message = assertThrows(IllegalArgumentException.class, () -> array.getAsLong(100)).getMessage();
+ assertThat(message).isEqualTo("Index is out of bounds: 100.");
+ }
+
+ @Test
+ public void set_indexOutOfBounds() {
+ CuckooFilterArray array = new CuckooFilterArray(100, 20);
+
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> array.set(-1, 20)).getMessage();
+ assertThat(message).isEqualTo("Index is out of bounds: -1.");
+
+ message = assertThrows(IllegalArgumentException.class, () -> array.set(100, 20)).getMessage();
+ assertThat(message).isEqualTo("Index is out of bounds: 100.");
+ }
+
+ @Test
+ public void setAndGet() {
+ for (int bitsPerElement = 1; bitsPerElement <= 64; bitsPerElement++) {
+ CuckooFilterArray array = new CuckooFilterArray(100, bitsPerElement);
+
+ for (int i = 0; i < array.length(); i++) {
+ array.set(i, -1L - i);
+ }
+
+ for (int i = 0; i < array.length(); i++) {
+ assertThat(array.getAsLong(i)).isEqualTo((-1L - i) & mask(bitsPerElement));
+ }
+ }
+ }
+
+ @Test
+ public void setAndGet2() {
+ for (int bitsPerElement = 1; bitsPerElement <= 64; bitsPerElement++) {
+ CuckooFilterArray array = new CuckooFilterArray(10000, bitsPerElement);
+
+ Random rand = new Random();
+ long[] inserted = new long[(int) array.length()];
+ for (int i = 0; i < array.length(); i++) {
+ long v = rand.nextLong() & mask(bitsPerElement);
+ inserted[i] = v;
+ array.set(i, v);
+ }
+
+ for (int i = 0; i < array.length(); i++) {
+ long v = rand.nextLong() & mask(bitsPerElement);
+ inserted[i] = v;
+ array.set(i, v);
+ }
+
+ for (int i = 0; i < array.length(); i += 2) {
+ inserted[i] = 0;
+ array.set(i, 0);
+ }
+
+ for (int i = 0; i < array.length(); i++) {
+ assertThat(array.getAsLong(i)).isEqualTo(inserted[i]);
+ }
+ }
+ }
+
+ private static long mask(int length) {
+ if (length == 64) {
+ return -1;
+ }
+ return (1L << length) - 1;
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterConfigTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterConfigTest.java
new file mode 100644
index 0000000..56c3798
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterConfigTest.java
@@ -0,0 +1,272 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.Funnels;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.Hashing;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class CuckooFilterConfigTest {
+
+ public static final class TestHashFunction implements CuckooFilterConfig.HashFunction {
+ @Override
+ public <T> HashCode hash(T element, Funnel<? super T> funnel) {
+ return Hashing.murmur3_128().hashObject(element, funnel);
+ }
+ }
+
+ public static final class TestStrategy implements CuckooFilterConfig.Strategy {
+ @Override
+ public long computeFingerprint(HashCode hash, int fingerprintLength) {
+ return 20;
+ }
+
+ @Override
+ public int computeBucketIndex(HashCode hash, int bucketCount) {
+ return 0;
+ }
+
+ @Override
+ public int computeOtherBucketIndex(
+ long fingerprint,
+ int bucketIndex,
+ int bucketCount,
+ CuckooFilterConfig.HashFunction hashFunction) {
+ return 1;
+ }
+ }
+
+ @Test
+ public void build_buildsCuckooFilterConfig() {
+ CuckooFilterConfig config =
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(16)
+ .build())
+ .setHashFunction(new TestHashFunction())
+ .setStrategy(new TestStrategy())
+ .setUseSpaceOptimization(true)
+ .build();
+
+ CuckooFilterConfig.Size size = config.size();
+ assertThat(size.bucketCount()).isEqualTo(100);
+ assertThat(size.bucketCapacity()).isEqualTo(4);
+ assertThat(size.fingerprintLength()).isEqualTo(16);
+
+ Funnel<Long> funnel = Funnels.longFunnel();
+ CuckooFilterConfig.HashFunction hashFunction = config.hashFunction();
+ assertThat(hashFunction.hash(100L, funnel))
+ .isEqualTo(Hashing.murmur3_128().hashObject(100L, funnel));
+
+ CuckooFilterConfig.Strategy strategy = config.strategy();
+ HashCode randomHash = HashCode.fromLong(100L);
+ assertThat(strategy.computeFingerprint(randomHash, 16)).isEqualTo(20);
+ assertThat(strategy.computeBucketIndex(randomHash, 100)).isEqualTo(0);
+ assertThat(strategy.computeOtherBucketIndex(0, 5, 100, config.hashFunction())).isEqualTo(1);
+ assertThat(strategy.maxReplacementCount()).isEqualTo(500);
+
+ assertThat(config.useSpaceOptimization()).isTrue();
+ }
+
+ @Test
+ public void build_failsWithUnsetSize() {
+ String message =
+ assertThrows(IllegalArgumentException.class, () -> CuckooFilterConfig.newBuilder().build())
+ .getMessage();
+
+ assertThat(message).isEqualTo("Size must be set.");
+ }
+
+ @Test
+ public void build_failsWithUnsetHashFunction() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(16)
+ .build())
+ .build())
+ .getMessage();
+
+ assertThat(message).isEqualTo("Hash function must be set.");
+ }
+
+ @Test
+ public void build_failsWithUnsetStrategy() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(16)
+ .build())
+ .setHashFunction(new TestHashFunction())
+ .build())
+ .getMessage();
+
+ assertThat(message).isEqualTo("Strategy must be set.");
+ }
+
+ @Test
+ public void buildSize_failsWithInvalidBucketCount() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.newBuilder().setBucketCount(0).build())
+ .getMessage();
+
+ assertThat(message).isEqualTo("bucketCount must be > 0: 0 given instead.");
+ }
+
+ @Test
+ public void buildSize_failsWithInvalidBucketCapacity() {
+ String messageLower =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(1)
+ .setBucketCapacity(0)
+ .build())
+ .getMessage();
+
+ assertThat(messageLower)
+ .isEqualTo("bucketCapacity must be in range (0, 128]: 0 given instead.");
+
+ String messageHigher =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(1)
+ .setBucketCapacity(129)
+ .build())
+ .getMessage();
+
+ assertThat(messageHigher)
+ .isEqualTo("bucketCapacity must be in range (0, 128]: 129 given instead.");
+ }
+
+ @Test
+ public void buildSize_failsWithInvalidFingerprintLength() {
+ String messageLower =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(1)
+ .setBucketCapacity(1)
+ .setFingerprintLength(0)
+ .build())
+ .getMessage();
+
+ assertThat(messageLower)
+ .isEqualTo("fingerprintLength must be in range (0, 64]: 0 given instead.");
+
+ String messageHigher =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(1)
+ .setBucketCapacity(1)
+ .setFingerprintLength(65)
+ .build())
+ .getMessage();
+
+ assertThat(messageHigher)
+ .isEqualTo("fingerprintLength must be in range (0, 64]: 65 given instead.");
+ }
+
+ @Test
+ public void computeEfficientSize_failsWithInvalidFalsePositiveRate() {
+ String messageLower =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.computeEfficientSize(0, 5))
+ .getMessage();
+
+ assertThat(messageLower)
+ .isEqualTo("targetFalsePositiveRate must be in range (0, 1): 0.0 given.");
+
+ String messageHigher =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.computeEfficientSize(1, 5))
+ .getMessage();
+
+ assertThat(messageHigher)
+ .isEqualTo("targetFalsePositiveRate must be in range (0, 1): 1.0 given.");
+ }
+
+ @Test
+ public void computeEfficientSize_failsWithInvalidElementsCountUpperBound() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.computeEfficientSize(0.5, 0))
+ .getMessage();
+
+ assertThat(message).isEqualTo("elementsCountUpperBound must be > 0: 0 given.");
+ }
+
+ @Test
+ public void computeEfficientSize_failsIfElementsCountUpperBoundTooBig() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.computeEfficientSize(0.5, 5000L * Integer.MAX_VALUE))
+ .getMessage();
+
+ assertThat(message)
+ .isEqualTo(
+ "Could not compute suitable cuckoo filter size based on the given input. Either the"
+ + " target false positive rate is too low, or the computed size is too big.");
+ }
+
+ @Test
+ public void computeEfficientSize_failsIfFalsePositiveRateTooLow() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> CuckooFilterConfig.Size.computeEfficientSize(Double.MIN_NORMAL, 100))
+ .getMessage();
+
+ assertThat(message)
+ .isEqualTo(
+ "Could not compute suitable cuckoo filter size based on the given input. Either the"
+ + " target false positive rate is too low, or the computed size is too big.");
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctionsTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctionsTest.java
new file mode 100644
index 0000000..5ac4cea
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctionsTest.java
@@ -0,0 +1,33 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.hash.Funnels;
+import com.google.common.hash.Hashing;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class CuckooFilterHashFunctionsTest {
+
+ @Test
+ public void murmur3_128() {
+ assertThat(CuckooFilterHashFunctions.MURMUR3_128.hash(100L, Funnels.longFunnel()))
+ .isEqualTo(Hashing.murmur3_128().hashObject(100L, Funnels.longFunnel()));
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterStrategiesTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterStrategiesTest.java
new file mode 100644
index 0000000..512696e
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterStrategiesTest.java
@@ -0,0 +1,103 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.hash.HashCode;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class CuckooFilterStrategiesTest {
+
+ private static final int FINGERPRINT_LENGTH = 16;
+ private static final int MAX_FINGERPRINT_LENGTH = 64;
+ private static final int BUCKET_COUNT = 100;
+
+ @Test
+ public void simpleModStrategy_computeFingerprint_zeroMapsToOne() {
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint(
+ HashCode.fromLong(0L), FINGERPRINT_LENGTH))
+ .isEqualTo(1L);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint(
+ HashCode.fromLong(1L << (FINGERPRINT_LENGTH + 1)), FINGERPRINT_LENGTH))
+ .isEqualTo(1L);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint(
+ HashCode.fromLong(0L), MAX_FINGERPRINT_LENGTH))
+ .isEqualTo(1L);
+ }
+
+ @Test
+ public void simpleModStrategy_computeFingerprint_mostSignificantBits() {
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint(
+ HashCode.fromLong(-1L), FINGERPRINT_LENGTH))
+ .isEqualTo((1L << 16) - 1);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint(
+ HashCode.fromLong(-1L), MAX_FINGERPRINT_LENGTH))
+ .isEqualTo(-1L);
+ }
+
+ @Test
+ public void simpleModStrategy_computeBucketIndex_smallerThanDivisorStaysUnchanged() {
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex(
+ HashCode.fromLong(0L), BUCKET_COUNT))
+ .isEqualTo(0);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex(
+ HashCode.fromLong(99L), BUCKET_COUNT))
+ .isEqualTo(99);
+ }
+
+ @Test
+ public void simpleModStrategy_computeBucketIndex_largerThanDivisorUsesRemainder() {
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex(
+ HashCode.fromLong(100), BUCKET_COUNT))
+ .isEqualTo(0);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex(
+ HashCode.fromLong(199L), BUCKET_COUNT))
+ .isEqualTo(99);
+ }
+
+ @Test
+ public void simpleModStrategy_computeOtherBucketIndex_involution() {
+ for (long fingerprint = 1; fingerprint < 1000; fingerprint += 10) {
+ for (int bucketIndex = 0; bucketIndex < BUCKET_COUNT; bucketIndex++) {
+ int otherBucketIndex =
+ CuckooFilterStrategies.SIMPLE_MOD.computeOtherBucketIndex(
+ fingerprint, bucketIndex, BUCKET_COUNT, CuckooFilterHashFunctions.MURMUR3_128);
+
+ assertThat(otherBucketIndex).isAtLeast(0);
+ assertThat(otherBucketIndex).isLessThan(BUCKET_COUNT);
+ assertThat(
+ CuckooFilterStrategies.SIMPLE_MOD.computeOtherBucketIndex(
+ fingerprint,
+ otherBucketIndex,
+ BUCKET_COUNT,
+ CuckooFilterHashFunctions.MURMUR3_128))
+ .isEqualTo(bucketIndex);
+ }
+ }
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTableTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTableTest.java
new file mode 100644
index 0000000..0e91adb
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTableTest.java
@@ -0,0 +1,202 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.common.truth.Truth8.assertThat;
+import static org.junit.Assert.assertThrows;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.Random;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public final class CuckooFilterTableTest {
+ private static final int BUCKET_COUNT = 10000;
+ private static final int BUCKET_CAPACITY = 4;
+ private static final int FINGERPRINT_LENGTH = 16;
+
+ private Random random;
+ private CuckooFilterTable table;
+
+ private interface CuckooFilterTableFactory {
+ public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random);
+
+ public default CuckooFilterTable createExisting(
+ SerializedCuckooFilterTable serializedTable, Random random) {
+ return CuckooFilterTable.createFromSerialization(serializedTable, random);
+ }
+ }
+
+ private static class SemiSortedCuckooFilterTableFactory implements CuckooFilterTableFactory {
+ @Override
+ public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random) {
+ return new SemiSortedCuckooFilterTable(size, random);
+ }
+ }
+
+ private static class UncompressedCuckooFilterTableFactory implements CuckooFilterTableFactory {
+ @Override
+ public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random) {
+ return new UncompressedCuckooFilterTable(size, random);
+ }
+ }
+
+ @Parameters
+ public static List<? extends Object> data() {
+ return Arrays.asList(
+ new SemiSortedCuckooFilterTableFactory(), new UncompressedCuckooFilterTableFactory());
+ }
+
+ @Parameter public CuckooFilterTableFactory tableFactory;
+
+ @Before
+ public void setUp() {
+ random = mock(Random.class);
+ table =
+ tableFactory.create(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(BUCKET_COUNT)
+ .setBucketCapacity(BUCKET_CAPACITY)
+ .setFingerprintLength(FINGERPRINT_LENGTH)
+ .build(),
+ random);
+ }
+
+ @Test
+ public void insertWithReplacement() {
+ for (int i = 0; i < BUCKET_COUNT; i++) {
+ long offset = (long) i * BUCKET_CAPACITY;
+ for (int j = 0; j < BUCKET_CAPACITY; j++) {
+ assertThat(table.insertWithReplacement(i, offset + j + 1)).isEmpty();
+ }
+ when(random.nextInt(BUCKET_CAPACITY)).thenReturn(0);
+
+ Optional<Long> replaced = table.insertWithReplacement(i, offset + BUCKET_CAPACITY + 1);
+
+ boolean anyOf = false;
+ for (int j = 0; j < BUCKET_CAPACITY; j++) {
+ anyOf = anyOf || (replaced.get() == offset + j + 1);
+ }
+ assertThat(anyOf).isTrue();
+ assertThat(table.contains(i, replaced.get())).isFalse();
+ for (long fingerprint = offset + 1;
+ fingerprint < offset + BUCKET_CAPACITY + 2;
+ fingerprint++) {
+ if (fingerprint != replaced.get()) {
+ assertThat(table.contains(i, fingerprint)).isTrue();
+ }
+ }
+ }
+ }
+
+ @Test
+ public void contains_containsFingerprint() {
+ assertThat(table.insertWithReplacement(0, 1L)).isEmpty();
+
+ assertThat(table.contains(0, 1L)).isTrue();
+ }
+
+ @Test
+ public void contains_doesNotContainFingerprint() {
+ assertThat(table.contains(0, 1L)).isFalse();
+ }
+
+ @Test
+ public void delete_deletesExistingFingerprint() {
+ assertThat(table.insertWithReplacement(0, 1L)).isEmpty();
+ assertThat(table.contains(0, 1L)).isTrue();
+
+ assertThat(table.delete(0, 1L)).isTrue();
+ assertThat(table.contains(0, 1L)).isFalse();
+ }
+
+ @Test
+ public void delete_deletesOneFingerprintAtATime() {
+ assertThat(table.insertWithReplacement(0, 1L)).isEmpty();
+ assertThat(table.insertWithReplacement(0, 1L)).isEmpty();
+ assertThat(table.contains(0, 1L)).isTrue();
+
+ assertThat(table.delete(0, 1L)).isTrue();
+ assertThat(table.contains(0, 1L)).isTrue();
+ assertThat(table.delete(0, 1L)).isTrue();
+ assertThat(table.contains(0, 1L)).isFalse();
+ }
+
+ @Test
+ public void delete_deletesNonExistingFingerprint() {
+ assertThat(table.delete(0, 1L)).isFalse();
+ }
+
+ @Test
+ public void isFull() {
+ for (int j = 0; j < BUCKET_CAPACITY; j++) {
+ assertThat(table.isFull(0)).isFalse();
+ assertThat(table.insertWithReplacement(0, j + 1)).isEmpty();
+ }
+ assertThat(table.isFull(0)).isTrue();
+ }
+
+ @Test
+ public void size() {
+ CuckooFilterConfig.Size size = table.size();
+
+ assertThat(size.bucketCount()).isEqualTo(BUCKET_COUNT);
+ assertThat(size.bucketCapacity()).isEqualTo(BUCKET_CAPACITY);
+ assertThat(size.fingerprintLength()).isEqualTo(FINGERPRINT_LENGTH);
+ }
+
+ @Test
+ public void serializeAndDeserialize() {
+ for (int i = 0; i < BUCKET_CAPACITY; i++) {
+ long offset = (long) i * BUCKET_CAPACITY;
+ for (int j = 0; j < BUCKET_CAPACITY; j++) {
+ assertThat(table.insertWithReplacement(i, offset + j + 1)).isEmpty();
+ }
+ }
+
+ SerializedCuckooFilterTable serializedTable = table.serialize();
+ CuckooFilterTable existingTable = tableFactory.createExisting(serializedTable, new Random());
+
+ for (int i = 0; i < BUCKET_CAPACITY; i++) {
+ long offset = (long) i * BUCKET_CAPACITY;
+ for (int j = 0; j < BUCKET_CAPACITY; j++) {
+ assertThat(existingTable.contains(i, offset + j + 1)).isTrue();
+ }
+ }
+ }
+
+ @Test
+ public void deserialize_failsWithInvalidSerialization() {
+ SerializedCuckooFilterTable serializedTable =
+ SerializedCuckooFilterTable.createFromByteArray(new byte[12]);
+
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> tableFactory.createExisting(serializedTable, new Random()))
+ .getMessage();
+ assertThat(message).isEqualTo("Unable to parse the SerializedCuckooFilterTable.");
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTest.java b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTest.java
new file mode 100644
index 0000000..94d4e8d
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/CuckooFilterTest.java
@@ -0,0 +1,323 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.hash.Funnels;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public final class CuckooFilterTest {
+
+ @Parameters
+ public static List<? extends Object> data() {
+ return Arrays.asList(true, false);
+ }
+
+ @Parameter public boolean useSpaceOptimization;
+
+ private CuckooFilterConfig config =
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(16)
+ .build())
+ .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128)
+ .setStrategy(CuckooFilterStrategies.SIMPLE_MOD)
+ .build();
+
+ private CuckooFilter<Integer> cuckooFilter;
+
+ @Before
+ public void setUp() {
+ config =
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(16)
+ .build())
+ .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128)
+ .setStrategy(CuckooFilterStrategies.SIMPLE_MOD)
+ .setUseSpaceOptimization(useSpaceOptimization)
+ .build();
+ cuckooFilter = CuckooFilter.createNew(config, Funnels.integerFunnel());
+ }
+
+ @Test
+ public void insertAndContains() {
+ final int insertedElementsCount = 380;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ }
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.contains(i)).isTrue();
+ }
+
+ final int testCountNonExistentElements = 300;
+
+ for (int i = 0; i < testCountNonExistentElements; i++) {
+ assertThat(cuckooFilter.contains(i + insertedElementsCount)).isFalse();
+ }
+ }
+
+ @Test
+ public void insert_failsWhenFull_insertSameElements() {
+ // Exhaust two buckets that element 0 can belong to.
+ for (int i = 0; i < 2 * config.size().bucketCapacity(); i++) {
+ assertThat(cuckooFilter.insert(0)).isTrue();
+ }
+
+ assertThat(cuckooFilter.insert(0)).isFalse();
+ }
+
+ @Test
+ public void insert_insertFailureReversesTheReplacements() {
+ int insertedCount = 0;
+ while (true) {
+ if (!cuckooFilter.insert(insertedCount)) {
+ break;
+ }
+ insertedCount++;
+ }
+
+ for (int i = 0; i < insertedCount; i++) {
+ assertThat(cuckooFilter.contains(i)).isTrue();
+ }
+ assertThat(cuckooFilter.contains(insertedCount)).isFalse();
+ }
+
+ @Test
+ public void delete_deletesExistingElements() {
+ final int insertedElementsCount = 150;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ }
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.delete(i)).isTrue();
+ assertThat(cuckooFilter.delete(i)).isTrue();
+ }
+ }
+
+ @Test
+ public void delete_deletingNonExistingElementsFails() {
+ final int insertedElementsCount = 150;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.delete(i)).isFalse();
+ }
+ }
+
+ @Test
+ public void size() {
+ assertThat(cuckooFilter.size()).isEqualTo(config.size());
+ }
+
+ @Test
+ public void count() {
+ final int insertedElementsCount = 300;
+ final int deletedElementCount = 150;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ }
+ assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount);
+
+ for (int i = 0; i < deletedElementCount; i++) {
+ assertThat(cuckooFilter.delete(i)).isTrue();
+ }
+ assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount - deletedElementCount);
+
+ // Attempt to delete non existing elements.
+ for (int i = 0; i < deletedElementCount; i++) {
+ assertThat(cuckooFilter.delete(insertedElementsCount + i)).isFalse();
+ }
+ assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount - deletedElementCount);
+ }
+
+ @Test
+ public void serializeAndDeserialize() {
+ final int insertedElementsCount = 300;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ }
+
+ SerializedCuckooFilterTable serializedTable = cuckooFilter.serializeTable();
+
+ CuckooFilter<Integer> anotherCuckooFilter =
+ CuckooFilter.createFromSerializedTable(
+ serializedTable, config.hashFunction(), config.strategy(), Funnels.integerFunnel());
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(anotherCuckooFilter.contains(i)).isTrue();
+ }
+ assertThat(anotherCuckooFilter.contains(insertedElementsCount)).isFalse();
+ }
+
+ @Test
+ public void load() {
+ final int insertedElementsCount = 300;
+
+ for (int i = 0; i < insertedElementsCount; i++) {
+ assertThat(cuckooFilter.insert(i)).isTrue();
+ }
+
+ assertThat(cuckooFilter.load())
+ .isWithin(0.00000001)
+ .of(
+ (double) insertedElementsCount
+ / (config.size().bucketCount() * config.size().bucketCapacity()));
+ }
+
+ @Test
+ public void loadIsHigh() {
+ final int[] bucketCounts = {1000, 10000, 100000, 1000000};
+ final int[] bucketCapacities = {4, 5, 6, 7, 8};
+ final int fingerprintLength = 16;
+
+ for (int bucketCount : bucketCounts) {
+ for (int bucketCapacity : bucketCapacities) {
+ CuckooFilter<Integer> cuckooFilter =
+ CuckooFilter.createNew(
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(bucketCount)
+ .setBucketCapacity(bucketCapacity)
+ .setFingerprintLength(fingerprintLength)
+ .build())
+ .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128)
+ .setStrategy(CuckooFilterStrategies.SIMPLE_MOD)
+ .setUseSpaceOptimization(useSpaceOptimization)
+ .build(),
+ Funnels.integerFunnel());
+
+ int element = 0;
+ while (cuckooFilter.insert(element)) {
+ element++;
+ }
+
+ assertThat(cuckooFilter.load()).isAtLeast(0.95);
+ }
+ }
+ }
+
+ @Test
+ public void computeEfficientSize_achievesTargetFalsePositiveRateAndCapacity() {
+ final double[] targetFalsePositiveRates = {0.05, 0.01, 0.001};
+ final long[] elementsCountUpperBounds = {100, 1000, 10000};
+
+ for (double targetFalsePositiveRate : targetFalsePositiveRates) {
+ for (long elementsCountUpperBound : elementsCountUpperBounds) {
+ CuckooFilter<Integer> cuckooFilter =
+ CuckooFilter.createNew(
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.computeEfficientSize(
+ targetFalsePositiveRate, elementsCountUpperBound))
+ .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128)
+ .setStrategy(CuckooFilterStrategies.SIMPLE_MOD)
+ .build(),
+ Funnels.integerFunnel());
+
+ int element = 0;
+ while (cuckooFilter.insert(element)) {
+ element++;
+ }
+
+ assertThat(computeFalsePositiveRate(cuckooFilter, 1000000))
+ .isAtMost(targetFalsePositiveRate);
+ assertThat(cuckooFilter.count()).isAtLeast(elementsCountUpperBound);
+ }
+ }
+ }
+
+ @Test
+ public void closeToTheoreticalFalsePositiveRate() {
+ final int bucketCount = 1000;
+ final int[] bucketCapacities = {2, 3, 4, 5, 6, 7, 8};
+ for (int bucketCapacity : bucketCapacities) {
+ // Due to time out issue, we only go up to 12 bits (otherwise we have to sample too many times
+ // to get a reliable measurement).
+ // TODO: Add a separate benchmark to test for longer fingerprint length.
+ for (int fingerprintLength = 8; fingerprintLength <= 12; fingerprintLength++) {
+ CuckooFilter<Integer> cuckooFilter =
+ CuckooFilter.createNew(
+ CuckooFilterConfig.newBuilder()
+ .setSize(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(bucketCount)
+ .setBucketCapacity(bucketCapacity)
+ .setFingerprintLength(fingerprintLength)
+ .build())
+ .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128)
+ .setStrategy(CuckooFilterStrategies.SIMPLE_MOD)
+ .build(),
+ Funnels.integerFunnel());
+
+ int element = 0;
+ while (cuckooFilter.insert(element)) {
+ element++;
+ }
+
+ // Let f = fingerprintLength. A random element not in the cuckoo filter has 1 / (2^f - 1)
+ // probability of matching a random fingerprint, and the probability it matches at least one
+ // of the x fingerprints is 1 - (1 - 1 / (2^f - 1))^x which is approximately x / (2^f - 1)
+ // when x << 2^f - 1.
+ //
+ // If X is a random variable denoting number of fingerprints in a randomly chosen two
+ // buckets, false positive probability is roughly E[X / (2^f - 1)] = E[X] / (2^f - 1).
+ // Let a be the cuckoo filter's load and b be the bucketCapacity. Then E[X] = a * 2b.
+ // Thus, theoretical false positive rate is ~ a * 2b / (2^f - 1).
+ double load = cuckooFilter.load();
+ double theoreticalFalsePositiveRate =
+ load * 2 * bucketCapacity / ((1 << fingerprintLength) - 1);
+
+ double relativeDiff =
+ Math.abs(computeFalsePositiveRate(cuckooFilter, 2000000) - theoreticalFalsePositiveRate)
+ / theoreticalFalsePositiveRate;
+ assertThat(relativeDiff).isAtMost(0.03);
+ }
+ }
+ }
+
+ private static double computeFalsePositiveRate(
+ CuckooFilter<Integer> cuckooFilter, int sampleCount) {
+ int falsePositiveCount = 0;
+ for (int i = 0; i < sampleCount; i++) {
+ if (cuckooFilter.contains(-i - 1)) {
+ falsePositiveCount++;
+ }
+ }
+ return (double) falsePositiveCount / sampleCount;
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTableTest.java b/javatests/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTableTest.java
new file mode 100644
index 0000000..91eba4f
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTableTest.java
@@ -0,0 +1,65 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import java.util.Random;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class SemiSortedCuckooFilterTableTest {
+
+ @Test
+ public void creation_failsWithInvalidBucketCapacity() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ new SemiSortedCuckooFilterTable(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(5)
+ .setFingerprintLength(4)
+ .build(),
+ new Random()))
+ .getMessage();
+
+ assertThat(message)
+ .isEqualTo("SemiSortedCuckooFilterTable only supports bucket capacity of 4.");
+ }
+
+ @Test
+ public void creation_failsWithInvalidFingerprintLength() {
+ String message =
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ new SemiSortedCuckooFilterTable(
+ CuckooFilterConfig.Size.newBuilder()
+ .setBucketCount(100)
+ .setBucketCapacity(4)
+ .setFingerprintLength(3)
+ .build(),
+ new Random()))
+ .getMessage();
+
+ assertThat(message)
+ .isEqualTo("SemiSortedCuckooFilterTable only supports fingerprint length >= 4.");
+ }
+}
diff --git a/javatests/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTableTest.java b/javatests/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTableTest.java
new file mode 100644
index 0000000..c11de88
--- /dev/null
+++ b/javatests/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTableTest.java
@@ -0,0 +1,51 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.setfilters.cuckoofilter;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.util.Arrays;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class SerializedCuckooFilterTableTest {
+
+ @Test
+ public void construct_byteArrayCopied() {
+ byte[] array = new byte[] {0, 1, 2, 3, 4};
+ byte[] copied = Arrays.copyOf(array, array.length);
+
+ SerializedCuckooFilterTable serializedTable =
+ SerializedCuckooFilterTable.createFromByteArray(array);
+ array[0] = 2;
+
+ byte[] asByteArray = serializedTable.asByteArray();
+ assertThat(asByteArray).isEqualTo(copied);
+ }
+
+ @Test
+ public void asByteArray_byteArrayCopied() {
+ byte[] array = new byte[] {0, 1, 2, 3, 4};
+
+ SerializedCuckooFilterTable serializedTable =
+ SerializedCuckooFilterTable.createFromByteArray(array);
+
+ byte[] asByteArray = serializedTable.asByteArray();
+ asByteArray[0] = 1;
+ assertThat(serializedTable.asByteArray()).isEqualTo(array);
+ }
+}
diff --git a/third_party/java/errorprone/BUILD b/third_party/java/errorprone/BUILD
new file mode 100644
index 0000000..c37a3f3
--- /dev/null
+++ b/third_party/java/errorprone/BUILD
@@ -0,0 +1,23 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "annotations",
+ tags = ["maven:compile_only"],
+ exports = ["@maven//:com_google_errorprone_error_prone_annotations"],
+)
diff --git a/third_party/java/guava/BUILD b/third_party/java/guava/BUILD
new file mode 100644
index 0000000..93f6146
--- /dev/null
+++ b/third_party/java/guava/BUILD
@@ -0,0 +1,24 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "guava",
+ exports = [
+ "@maven//:com_google_guava_guava",
+ ],
+)
diff --git a/third_party/java/junit/BUILD b/third_party/java/junit/BUILD
new file mode 100644
index 0000000..bb5ef43
--- /dev/null
+++ b/third_party/java/junit/BUILD
@@ -0,0 +1,24 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "junit",
+ testonly = 1,
+ exports = ["@maven//:junit_junit"],
+ #runtime_deps = ["//third_party/java/hamcrest"],
+)
diff --git a/third_party/java/mockito/BUILD b/third_party/java/mockito/BUILD
new file mode 100644
index 0000000..fc03a5f
--- /dev/null
+++ b/third_party/java/mockito/BUILD
@@ -0,0 +1,23 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "mockito",
+ testonly = 1,
+ exports = ["@maven//:org_mockito_mockito_core"],
+)
diff --git a/third_party/java/truth/BUILD b/third_party/java/truth/BUILD
new file mode 100644
index 0000000..cbe2452
--- /dev/null
+++ b/third_party/java/truth/BUILD
@@ -0,0 +1,26 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+java_library(
+ name = "truth",
+ testonly = 1,
+ exports = [
+ "@maven//:com_google_truth_extensions_truth_java8_extension",
+ "@maven//:com_google_truth_truth",
+ ],
+)