aboutsummaryrefslogtreecommitdiff
path: root/src/vcdiffengine.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vcdiffengine.h')
-rw-r--r--src/vcdiffengine.h127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/vcdiffengine.h b/src/vcdiffengine.h
new file mode 100644
index 0000000..c69fc28
--- /dev/null
+++ b/src/vcdiffengine.h
@@ -0,0 +1,127 @@
+// Copyright 2006 Google Inc.
+// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_
+#define OPEN_VCDIFF_VCDIFFENGINE_H_
+
+#include <config.h>
+#include <stddef.h> // size_t
+#include <stdint.h> // uint32_t
+
+namespace open_vcdiff {
+
+class BlockHash;
+class OutputStringInterface;
+class CodeTableWriterInterface;
+
+// The VCDiffEngine class is used to find the optimal encoding (in terms of COPY
+// and ADD instructions) for a given dictionary and target window. To write the
+// instructions for this encoding, it calls the Copy() and Add() methods of the
+// code table writer object which is passed as an argument to Encode().
+class VCDiffEngine {
+ public:
+ // The minimum size of a string match that is worth putting into a COPY
+ // instruction. Since this value is more than twice the block size, the
+ // encoder will always discover a match of this size, no matter whether it is
+ // aligned on block boundaries in the dictionary text.
+ static const size_t kMinimumMatchSize = 32;
+
+ VCDiffEngine(const char* dictionary, size_t dictionary_size);
+
+ ~VCDiffEngine();
+
+ // Initializes the object before use.
+ // This method must be called after constructing a VCDiffEngine object,
+ // and before any other method may be called. It should not be called
+ // twice on the same object.
+ // Returns true if initialization succeeded, or false if an error occurred,
+ // in which case no other method except the destructor may then be used
+ // on the object.
+ // The Init() method is the only one allowed to treat hashed_dictionary_
+ // as non-const.
+ bool Init();
+
+ size_t dictionary_size() const { return dictionary_size_; }
+
+ // Main worker function. Finds the best matches between the dictionary
+ // (source) and target data, and uses the coder to write a
+ // delta file window into *diff.
+ // Because it is a const function, many threads
+ // can call Encode() at once for the same VCDiffEngine object.
+ // All thread-specific data will be stored in the coder and diff arguments.
+ // The coder object must have been fully initialized (by calling its Init()
+ // method, if any) before calling this function.
+ //
+ // look_for_target_matches determines whether to look for matches
+ // within the previously encoded target data, or just within the source
+ // (dictionary) data. Please see vcencoder.h for a full explanation
+ // of this parameter.
+ void Encode(const char* target_data,
+ size_t target_size,
+ bool look_for_target_matches,
+ OutputStringInterface* diff,
+ CodeTableWriterInterface* coder) const;
+
+ private:
+ static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) {
+ return size >= kMinimumMatchSize;
+ }
+
+ // The following two functions use templates to produce two different
+ // versions of the code depending on the value of the option
+ // look_for_target_matches. This approach saves a test-and-branch instruction
+ // within the inner loop of EncodeCopyForBestMatch.
+ template<bool look_for_target_matches>
+ void EncodeInternal(const char* target_data,
+ size_t target_size,
+ OutputStringInterface* diff,
+ CodeTableWriterInterface* coder) const;
+
+ // If look_for_target_matches is true, then target_hash must point to a valid
+ // BlockHash object, and cannot be NULL. If look_for_target_matches is
+ // false, then the value of target_hash is ignored.
+ template<bool look_for_target_matches>
+ size_t EncodeCopyForBestMatch(uint32_t hash_value,
+ const char* target_candidate_start,
+ const char* unencoded_target_start,
+ size_t unencoded_target_size,
+ const BlockHash* target_hash,
+ CodeTableWriterInterface* coder) const;
+
+ void AddUnmatchedRemainder(const char* unencoded_target_start,
+ size_t unencoded_target_size,
+ CodeTableWriterInterface* coder) const;
+
+ void FinishEncoding(size_t target_size,
+ OutputStringInterface* diff,
+ CodeTableWriterInterface* coder) const;
+
+ const char* dictionary_; // A copy of the dictionary contents
+
+ const size_t dictionary_size_;
+
+ // A hash that contains one element for every kBlockSize bytes of dictionary_.
+ // This can be reused to encode many different target strings using the
+ // same dictionary, without the need to compute the hash values each time.
+ const BlockHash* hashed_dictionary_;
+
+ // Making these private avoids implicit copy constructor & assignment operator
+ VCDiffEngine(const VCDiffEngine&);
+ void operator=(const VCDiffEngine&);
+};
+
+} // namespace open_vcdiff
+
+#endif // OPEN_VCDIFF_VCDIFFENGINE_H_