aboutsummaryrefslogtreecommitdiff
path: root/src/vcdecoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vcdecoder.cc')
-rw-r--r--src/vcdecoder.cc1416
1 files changed, 1416 insertions, 0 deletions
diff --git a/src/vcdecoder.cc b/src/vcdecoder.cc
new file mode 100644
index 0000000..e6cabd4
--- /dev/null
+++ b/src/vcdecoder.cc
@@ -0,0 +1,1416 @@
+// Copyright 2008 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implements a Decoder for the format described in
+// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
+// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
+//
+// The RFC describes the possibility of using a secondary compressor
+// to further reduce the size of each section of the VCDIFF output.
+// That feature is not supported in this implementation of the encoder
+// and decoder.
+// No secondary compressor types have been publicly registered with
+// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
+// in the more than five years since the registry was created, so there
+// is no standard set of compressor IDs which would be generated by other
+// encoders or accepted by other decoders.
+
+#include <config.h>
+#include "google/vcdecoder.h"
+#include <stddef.h> // size_t, ptrdiff_t
+#include <stdint.h> // int32_t
+#include <string.h> // memcpy, memset
+#include <memory> // auto_ptr
+#include <string>
+#include "addrcache.h"
+#include "checksum.h"
+#include "codetable.h"
+#include "decodetable.h"
+#include "headerparser.h"
+#include "logging.h"
+#include "google/output_string.h"
+#include "varint_bigendian.h"
+#include "vcdiff_defs.h"
+
+namespace open_vcdiff {
+
+// This class is used to parse delta file windows as described
+// in RFC sections 4.2 and 4.3. Its methods are not thread-safe.
+//
+// Here is the window format copied from the RFC:
+//
+// Window1
+// Win_Indicator - byte
+// [Source segment size] - integer
+// [Source segment position] - integer
+// The delta encoding of the target window
+// Length of the delta encoding - integer
+// The delta encoding
+// Size of the target window - integer
+// Delta_Indicator - byte
+// Length of data for ADDs and RUNs - integer
+// Length of instructions and sizes - integer
+// Length of addresses for COPYs - integer
+// Data section for ADDs and RUNs - array of bytes
+// Instructions and sizes section - array of bytes
+// Addresses section for COPYs - array of bytes
+// Window2
+// ...
+//
+// Sample usage:
+//
+// VCDiffDeltaFileWindow delta_window_;
+// delta_window_.Init(parent);
+// ParseableChunk parseable_chunk(input_buffer,
+// input_size,
+// leftover_unencoded_bytes);
+// while (!parseable_chunk.Empty()) {
+// switch (delta_window_.DecodeWindow(&parseable_chunk)) {
+// case RESULT_END_OF_DATA:
+// <Read more input and retry DecodeWindow later.>
+// case RESULT_ERROR:
+// <Handle error case. An error log message has already been generated.>
+// }
+// }
+//
+// DecodeWindow consumes only a single window, and needs to be placed within
+// a loop if multiple windows are to be processed.
+//
+class VCDiffDeltaFileWindow {
+ public:
+ VCDiffDeltaFileWindow();
+ ~VCDiffDeltaFileWindow();
+
+ // Init() should be called immediately after constructing the
+ // VCDiffDeltaFileWindow(). It must be called before DecodeWindow() can be
+ // invoked, or an error will occur.
+ void Init(VCDiffStreamingDecoderImpl* parent);
+
+ // Resets the pointers to the data sections in the current window.
+ void Reset();
+
+ bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
+ unsigned char max_mode) {
+ return reader_.UseCodeTable(code_table_data, max_mode);
+ }
+
+ // Decodes a single delta window using the input data from *parseable_chunk.
+ // Appends the decoded target window to parent_->decoded_target(). Returns
+ // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if
+ // the end of input was reached before the entire window could be decoded and
+ // more input is expected (only possible if IsInterleaved() is true), or
+ // RESULT_ERROR if an error occurred during decoding. In the RESULT_ERROR
+ // case, the value of parseable_chunk->pointer_ is undefined; otherwise,
+ // parseable_chunk->Advance() is called to point to the input data position
+ // just after the data that has been decoded.
+ //
+ VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk);
+
+ bool FoundWindowHeader() const {
+ return found_header_;
+ }
+
+ bool MoreDataExpected() const {
+ // When parsing an interleaved-format delta file,
+ // every time DecodeBody() exits, interleaved_bytes_expected_
+ // will be decremented by the number of bytes parsed. If it
+ // reaches zero, then there is no more data expected because
+ // the size of the interleaved section (given in the window
+ // header) has been reached.
+ return IsInterleaved() && (interleaved_bytes_expected_ > 0);
+ }
+
+ size_t target_window_start_pos() const { return target_window_start_pos_; }
+
+ void set_target_window_start_pos(size_t new_start_pos) {
+ target_window_start_pos_ = new_start_pos;
+ }
+
+ // Returns the number of bytes remaining to be decoded in the target window.
+ // If not in the process of decoding a window, returns 0.
+ size_t TargetBytesRemaining();
+
+ private:
+ // Reads the header of the window section as described in RFC sections 4.2 and
+ // 4.3, up to and including the value "Length of addresses for COPYs". If the
+ // entire header is found, this function sets up the DeltaWindowSections
+ // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
+ // that the decoder can begin decoding the opcodes in these sections. Returns
+ // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
+ // available data was reached before the entire header could be read. (The
+ // latter may be an error condition if there is no more data available.)
+ // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
+ // parsed header.
+ //
+ VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
+
+ // After the window header has been parsed as far as the Delta_Indicator,
+ // this function is called to parse the following delta window header fields:
+ //
+ // Length of data for ADDs and RUNs - integer (VarintBE format)
+ // Length of instructions and sizes - integer (VarintBE format)
+ // Length of addresses for COPYs - integer (VarintBE format)
+ //
+ // If has_checksum_ is true, it also looks for the following element:
+ //
+ // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
+ //
+ // It sets up the DeltaWindowSections instructions_and_sizes_,
+ // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format
+ // is being used, all three sections will include the entire window body; if
+ // the standard format is used, three non-overlapping window sections will be
+ // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
+ // if standard format is being used and there is not enough input data to read
+ // the entire window body. Otherwise, returns RESULT_SUCCESS.
+ VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
+
+ // Decodes the body of the window section as described in RFC sections 4.3,
+ // including the sections "Data section for ADDs and RUNs", "Instructions
+ // and sizes section", and "Addresses section for COPYs". These sections
+ // must already have been set up by ReadWindowHeader(). Returns a
+ // non-negative value on success, or RESULT_END_OF_DATA if the end of input
+ // was reached before the entire window could be decoded (only possible if
+ // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
+ // decoding. Appends as much of the decoded target window as possible to
+ // parent->decoded_target().
+ //
+ int DecodeBody(ParseableChunk* parseable_chunk);
+
+ // Returns the number of bytes already decoded into the target window.
+ size_t TargetBytesDecoded();
+
+ // Decodes a single ADD instruction, updating parent_->decoded_target_.
+ VCDiffResult DecodeAdd(size_t size);
+
+ // Decodes a single RUN instruction, updating parent_->decoded_target_.
+ VCDiffResult DecodeRun(size_t size);
+
+ // Decodes a single COPY instruction, updating parent_->decoded_target_.
+ VCDiffResult DecodeCopy(size_t size, unsigned char mode);
+
+ // When using the interleaved format, this function is called both on parsing
+ // the header and on resuming after a RESULT_END_OF_DATA was returned from a
+ // previous call to DecodeBody(). It sets up all three section pointers to
+ // reference the same interleaved stream of instructions, sizes, addresses,
+ // and data. These pointers must be reset every time that work resumes on a
+ // delta window, because the input data string may have been changed or
+ // resized since DecodeBody() last returned.
+ void UpdateInterleavedSectionPointers(const char* data_pos,
+ const char* data_end) {
+ const ptrdiff_t available_data = data_end - data_pos;
+ // Don't read past the end of currently-available data
+ if (available_data > interleaved_bytes_expected_) {
+ instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
+ } else {
+ instructions_and_sizes_.Init(data_pos, available_data);
+ }
+ data_for_add_and_run_.Init(&instructions_and_sizes_);
+ addresses_for_copy_.Init(&instructions_and_sizes_);
+ }
+
+ // If true, the interleaved format described in AllowInterleaved() is used
+ // for the current delta file. Only valid after ReadWindowHeader() has been
+ // called and returned a positive number (i.e., the whole header was parsed),
+ // but before the window has finished decoding.
+ //
+ bool IsInterleaved() const {
+ // If the sections are interleaved, both addresses_for_copy_ and
+ // data_for_add_and_run_ should point at instructions_and_sizes_.
+ return !addresses_for_copy_.IsOwned();
+ }
+
+ // Executes a single COPY or ADD instruction, appending data to
+ // parent_->decoded_target().
+ void CopyBytes(const char* data, size_t size);
+
+ // Executes a single RUN instruction, appending data to
+ // parent_->decoded_target().
+ void RunByte(unsigned char byte, size_t size);
+
+ // Advance *parseable_chunk to point to the current position in the
+ // instructions/sizes section. If interleaved format is used, then
+ // decrement the number of expected bytes in the instructions/sizes section
+ // by the number of instruction/size bytes parsed.
+ void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
+
+ // The parent object which was passed to Init().
+ VCDiffStreamingDecoderImpl* parent_;
+
+ // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
+ // has been called and succeeded in parsing the delta window header, but the
+ // entire window has not yet been decoded.
+ bool found_header_;
+
+ // Contents and length of the current source window. source_segment_ptr_
+ // will be non-NULL if (a) the window section header for the current window
+ // has been read, but the window has not yet finished decoding; or
+ // (b) the window did not specify a source segment.
+ const char* source_segment_ptr_;
+ size_t source_segment_length_;
+
+ // The delta encoding window sections as defined in RFC section 4.3.
+ // The pointer for each section will be incremented as data is consumed and
+ // decoded from that section. If the interleaved format is used,
+ // data_for_add_and_run_ and addresses_for_copy_ will both point to
+ // instructions_and_sizes_; otherwise, they will be separate data sections.
+ //
+ DeltaWindowSection instructions_and_sizes_;
+ DeltaWindowSection data_for_add_and_run_;
+ DeltaWindowSection addresses_for_copy_;
+
+ // The expected bytes left to decode in instructions_and_sizes_. Only used
+ // for the interleaved format.
+ int interleaved_bytes_expected_;
+
+ // The expected length of the target window once it has been decoded.
+ size_t target_window_length_;
+
+ // The index in decoded_target at which the first byte of the current
+ // target window was/will be written.
+ size_t target_window_start_pos_;
+
+ // If has_checksum_ is true, then expected_checksum_ contains an Adler32
+ // checksum of the target window data. This is an extension included in the
+ // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
+ bool has_checksum_;
+ VCDChecksum expected_checksum_;
+
+ VCDiffCodeTableReader reader_;
+
+ // Making these private avoids implicit copy constructor & assignment operator
+ VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT
+ void operator=(const VCDiffDeltaFileWindow&);
+};
+
+// *** Inline methods for VCDiffDeltaFileWindow
+
+inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
+ Reset();
+}
+
+inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
+
+inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
+ parent_ = parent;
+}
+
+class VCDiffStreamingDecoderImpl {
+ public:
+ typedef std::string string;
+
+ // The default maximum target file size (and target window size) if
+ // SetMaximumTargetFileSize() is not called.
+ static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB
+
+ // The largest value that can be passed to SetMaximumTargetWindowSize().
+ // Using a larger value will result in an error.
+ static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX
+
+ // A constant that is the default value for planned_target_file_size_,
+ // indicating that the decoder does not have an expected length
+ // for the target data.
+ static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
+
+ VCDiffStreamingDecoderImpl();
+ ~VCDiffStreamingDecoderImpl();
+
+ // Resets all member variables to their initial states.
+ void Reset();
+
+ // These functions are identical to their counterparts
+ // in VCDiffStreamingDecoder.
+ //
+ void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
+
+ bool DecodeChunk(const char* data,
+ size_t len,
+ OutputStringInterface* output_string);
+
+ bool FinishDecoding();
+
+ // If true, the version of VCDIFF used in the current delta file allows
+ // for the interleaved format, in which instructions, addresses and data
+ // are all sent interleaved in the instructions section of each window
+ // rather than being sent in separate sections. This is not part of
+ // the VCDIFF draft standard, so we've defined a special version code
+ // 'S' which implies that this feature is available. Even if interleaving
+ // is supported, it is not mandatory; interleaved format will be implied
+ // if the address and data sections are both zero-length.
+ //
+ bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
+
+ // If true, the version of VCDIFF used in the current delta file allows
+ // each delta window to contain an Adler32 checksum of the target window data.
+ // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
+ // this checksum will appear as a variable-length integer, just after the
+ // "length of addresses for COPYs" value and before the window data sections.
+ // It is possible for some windows in a delta file to use the checksum feature
+ // and for others not to use it (and leave the flag bit set to 0.)
+ // Just as with AllowInterleaved(), this extension is not part of the draft
+ // standard and is only available when the version code 'S' is specified.
+ //
+ bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
+
+ bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
+ maximum_target_file_size_ = new_maximum_target_file_size;
+ return true;
+ }
+
+ bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
+ if (new_maximum_target_window_size > kTargetSizeLimit) {
+ VCD_ERROR << "Specified maximum target window size "
+ << new_maximum_target_window_size << " exceeds limit of "
+ << kTargetSizeLimit << " bytes" << VCD_ENDL;
+ return false;
+ }
+ maximum_target_window_size_ = new_maximum_target_window_size;
+ return true;
+ }
+
+ // See description of planned_target_file_size_, below.
+ bool HasPlannedTargetFileSize() const {
+ return planned_target_file_size_ != kUnlimitedBytes;
+ }
+
+ void SetPlannedTargetFileSize(size_t planned_target_file_size) {
+ planned_target_file_size_ = planned_target_file_size;
+ }
+
+ void AddToTotalTargetWindowSize(size_t window_size) {
+ total_of_target_window_sizes_ += window_size;
+ }
+
+ // Checks to see whether the decoded target data has reached its planned size.
+ bool ReachedPlannedTargetFileSize() const {
+ if (!HasPlannedTargetFileSize()) {
+ return false;
+ }
+ // The planned target file size should not have been exceeded.
+ // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
+ // each target window would not make the target file exceed that limit, and
+ // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
+ // exceeds the advertised target window size.
+ if (total_of_target_window_sizes_ > planned_target_file_size_) {
+ VCD_DFATAL << "Internal error: Decoded data size "
+ << total_of_target_window_sizes_
+ << " exceeds planned target file size "
+ << planned_target_file_size_ << VCD_ENDL;
+ return true;
+ }
+ return total_of_target_window_sizes_ == planned_target_file_size_;
+ }
+
+ // Checks to see whether adding a new target window of the specified size
+ // would exceed the planned target file size, the maximum target file size,
+ // or the maximum target window size. If so, logs an error and returns true;
+ // otherwise, returns false.
+ bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
+
+ // Returns the amount of input data passed to the last DecodeChunk()
+ // that was not consumed by the decoder. This is essential if
+ // SetPlannedTargetFileSize() is being used, in order to preserve the
+ // remaining input data stream once the planned target file has been decoded.
+ size_t GetUnconsumedDataSize() const {
+ return unparsed_bytes_.size();
+ }
+
+ // This function will return true if the decoder has parsed a complete delta
+ // file header plus zero or more delta file windows, with no data left over.
+ // It will also return true if no delta data at all was decoded. If these
+ // conditions are not met, then FinishDecoding() should not be called.
+ bool IsDecodingComplete() const {
+ if (!FoundFileHeader()) {
+ // No complete delta file header has been parsed yet. DecodeChunk()
+ // may have received some data that it hasn't yet parsed, in which case
+ // decoding is incomplete.
+ return unparsed_bytes_.empty();
+ } else if (custom_code_table_decoder_.get()) {
+ // The decoder is in the middle of parsing a custom code table.
+ return false;
+ } else if (delta_window_.FoundWindowHeader()) {
+ // The decoder is in the middle of parsing an interleaved format delta
+ // window.
+ return false;
+ } else if (ReachedPlannedTargetFileSize()) {
+ // The decoder found exactly the planned number of bytes. In this case
+ // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
+ // data after the end of the delta file.
+ return true;
+ } else {
+ // No complete delta file window has been parsed yet. DecodeChunk()
+ // may have received some data that it hasn't yet parsed, in which case
+ // decoding is incomplete.
+ return unparsed_bytes_.empty();
+ }
+ }
+
+ const char* dictionary_ptr() const { return dictionary_ptr_; }
+
+ size_t dictionary_size() const { return dictionary_size_; }
+
+ VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
+
+ string* decoded_target() { return &decoded_target_; }
+
+ bool allow_vcd_target() const { return allow_vcd_target_; }
+
+ void SetAllowVcdTarget(bool allow_vcd_target) {
+ if (start_decoding_was_called_) {
+ VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()"
+ << VCD_ENDL;
+ return;
+ }
+ allow_vcd_target_ = allow_vcd_target;
+ }
+
+ private:
+ // Reads the VCDiff delta file header section as described in RFC section 4.1,
+ // except the custom code table data. Returns RESULT_ERROR if an error
+ // occurred, or RESULT_END_OF_DATA if the end of available data was reached
+ // before the entire header could be read. (The latter may be an error
+ // condition if there is no more data available.) Otherwise, advances
+ // data->position_ past the header and returns RESULT_SUCCESS.
+ //
+ VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
+
+ // Indicates whether or not the header has already been read.
+ bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
+
+ // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
+ // file header, this function parses the custom cache sizes and initializes
+ // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
+ // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an
+ // error occurred, or RESULT_END_OF_DATA if the end of available data was
+ // reached before the custom cache sizes could be read. Otherwise, returns
+ // the number of bytes read.
+ //
+ int InitCustomCodeTable(const char* data_start, const char* data_end);
+
+ // If a custom code table was specified in the header section that was parsed
+ // by ReadDeltaFileHeader(), this function makes a recursive call to another
+ // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
+ // custom code table is expected to be supplied as an embedded VCDIFF
+ // encoding that uses the standard code table. Returns RESULT_ERROR if an
+ // error occurs, or RESULT_END_OF_DATA if the end of available data was
+ // reached before the entire custom code table could be read. Otherwise,
+ // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
+ // custom code table. If the function returns RESULT_SUCCESS or
+ // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
+ //
+ VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
+
+ // Called after the decoder exhausts all input data. This function
+ // copies from decoded_target_ into output_string all the data that
+ // has not yet been output. It sets decoded_target_output_position_
+ // to mark the start of the next data that needs to be output.
+ void AppendNewOutputText(OutputStringInterface* output_string);
+
+ // Appends to output_string the portion of decoded_target_ that has
+ // not yet been output, then clears decoded_target_. This function is
+ // called after each complete target window has been decoded if
+ // allow_vcd_target is false. In that case, there is no need to retain
+ // target data from any window except the current window.
+ void FlushDecodedTarget(OutputStringInterface* output_string);
+
+ // Contents and length of the source (dictionary) data.
+ const char* dictionary_ptr_;
+ size_t dictionary_size_;
+
+ // This string will be used to store any unparsed bytes left over when
+ // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
+ // It will also be used to concatenate those unparsed bytes with the data
+ // supplied to the next call to DecodeChunk(), so that they appear in
+ // contiguous memory.
+ string unparsed_bytes_;
+
+ // The portion of the target file that has been decoded so far. This will be
+ // used to fill the output string for DecodeChunk(), and will also be used to
+ // execute COPY instructions that reference target data. Since the source
+ // window can come from a range of addresses in the previously decoded target
+ // data, the entire target file needs to be available to the decoder, not just
+ // the current target window.
+ string decoded_target_;
+
+ // The VCDIFF version byte (also known as "header4") from the
+ // delta file header.
+ unsigned char vcdiff_version_code_;
+
+ VCDiffDeltaFileWindow delta_window_;
+
+ std::auto_ptr<VCDiffAddressCache> addr_cache_;
+
+ // Will be NULL unless a custom code table has been defined.
+ std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
+
+ // Used to receive the decoded custom code table.
+ string custom_code_table_string_;
+
+ // If a custom code table is specified, it will be expressed
+ // as an embedded VCDIFF delta file which uses the default code table
+ // as the source file (dictionary). Use a child decoder object
+ // to decode that delta file.
+ std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
+
+ // If set, then the decoder is expecting *exactly* this number of
+ // target bytes to be decoded from one or more delta file windows.
+ // If this number is exceeded while decoding a window, but was not met
+ // before starting on that window, an error will be reported.
+ // If FinishDecoding() is called before this number is met, an error
+ // will also be reported. This feature is used for decoding the
+ // embedded code table data within a VCDIFF delta file; we want to
+ // stop processing the embedded data once the entire code table has
+ // been decoded, and treat the rest of the available data as part
+ // of the enclosing delta file.
+ size_t planned_target_file_size_;
+
+ size_t maximum_target_file_size_;
+
+ size_t maximum_target_window_size_;
+
+ // Contains the sum of the decoded sizes of all target windows seen so far,
+ // including the expected total size of the current target window in progress
+ // (even if some of the current target window has not yet been decoded.)
+ size_t total_of_target_window_sizes_;
+
+ // Contains the byte position within decoded_target_ of the first data that
+ // has not yet been output by AppendNewOutputText().
+ size_t decoded_target_output_position_;
+
+ // This value is used to ensure the correct order of calls to the interface
+ // functions, i.e., a single call to StartDecoding(), followed by zero or
+ // more calls to DecodeChunk(), followed by a single call to
+ // FinishDecoding().
+ bool start_decoding_was_called_;
+
+ // If this value is true then the VCD_TARGET flag can be specified to allow
+ // the source segment to be chosen from the previously-decoded target data.
+ // (This is the default behavior.) If it is false, then specifying the
+ // VCD_TARGET flag is considered an error, and the decoder does not need to
+ // keep in memory any decoded target data prior to the current window.
+ bool allow_vcd_target_;
+
+ // Making these private avoids implicit copy constructor & assignment operator
+ VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT
+ void operator=(const VCDiffStreamingDecoderImpl&);
+};
+
+// *** Methods for VCDiffStreamingDecoderImpl
+
+const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
+const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
+
+VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
+ : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
+ maximum_target_window_size_(kDefaultMaximumTargetFileSize),
+ allow_vcd_target_(true) {
+ delta_window_.Init(this);
+ Reset();
+}
+
+// Reset() will delete the component objects without reallocating them.
+VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
+
+void VCDiffStreamingDecoderImpl::Reset() {
+ start_decoding_was_called_ = false;
+ dictionary_ptr_ = NULL;
+ dictionary_size_ = 0;
+ vcdiff_version_code_ = '\0';
+ planned_target_file_size_ = kUnlimitedBytes;
+ total_of_target_window_sizes_ = 0;
+ addr_cache_.reset();
+ custom_code_table_.reset();
+ custom_code_table_decoder_.reset();
+ delta_window_.Reset();
+ decoded_target_output_position_ = 0;
+}
+
+void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
+ size_t dictionary_size) {
+ if (start_decoding_was_called_) {
+ VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
+ << VCD_ENDL;
+ return;
+ }
+ unparsed_bytes_.clear();
+ decoded_target_.clear(); // delta_window_.Reset() depends on this
+ Reset();
+ dictionary_ptr_ = dictionary_ptr;
+ dictionary_size_ = dictionary_size;
+ start_decoding_was_called_ = true;
+}
+
+// Reads the VCDiff delta file header section as described in RFC section 4.1:
+//
+// Header1 - byte = 0xD6 (ASCII 'V' | 0x80)
+// Header2 - byte = 0xC3 (ASCII 'C' | 0x80)
+// Header3 - byte = 0xC4 (ASCII 'D' | 0x80)
+// Header4 - byte
+// Hdr_Indicator - byte
+// [Secondary compressor ID] - byte
+// [Length of code table data] - integer
+// [Code table data]
+//
+// Initializes the code table and address cache objects. Returns RESULT_ERROR
+// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
+// reached before the entire header could be read. (The latter may be an error
+// condition if there is no more data available.) Otherwise, returns
+// RESULT_SUCCESS, and removes the header bytes from the data string.
+//
+// It's relatively inefficient to expect this function to parse any number of
+// input bytes available, down to 1 byte, but it is necessary in case the input
+// is not a properly formatted VCDIFF delta file. If the entire input consists
+// of two bytes "12", then we should recognize that it does not match the
+// initial VCDIFF magic number "VCD" and report an error, rather than waiting
+// indefinitely for more input that will never arrive.
+//
+VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
+ ParseableChunk* data) {
+ if (FoundFileHeader()) {
+ return RESULT_SUCCESS;
+ }
+ size_t data_size = data->UnparsedSize();
+ const DeltaFileHeader* header =
+ reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
+ bool wrong_magic_number = false;
+ switch (data_size) {
+ // Verify only the bytes that are available.
+ default:
+ // Found header contents up to and including VCDIFF version
+ vcdiff_version_code_ = header->header4;
+ if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284)
+ (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol
+ VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ // fall through
+ case 3:
+ if (header->header3 != 0xC4) { // magic value 'D' | 0x80
+ wrong_magic_number = true;
+ }
+ // fall through
+ case 2:
+ if (header->header2 != 0xC3) { // magic value 'C' | 0x80
+ wrong_magic_number = true;
+ }
+ // fall through
+ case 1:
+ if (header->header1 != 0xD6) { // magic value 'V' | 0x80
+ wrong_magic_number = true;
+ }
+ // fall through
+ case 0:
+ if (wrong_magic_number) {
+ VCD_ERROR << "Did not find VCDIFF header bytes; "
+ "input is not a VCDIFF delta file" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
+ }
+ // Secondary compressor not supported.
+ if (header->hdr_indicator & VCD_DECOMPRESS) {
+ VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (header->hdr_indicator & VCD_CODETABLE) {
+ int bytes_parsed = InitCustomCodeTable(
+ data->UnparsedData() + sizeof(DeltaFileHeader),
+ data->End());
+ switch (bytes_parsed) {
+ case RESULT_ERROR:
+ return RESULT_ERROR;
+ case RESULT_END_OF_DATA:
+ return RESULT_END_OF_DATA;
+ default:
+ data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
+ }
+ } else {
+ addr_cache_.reset(new VCDiffAddressCache);
+ // addr_cache_->Init() will be called
+ // from VCDiffStreamingDecoderImpl::DecodeChunk()
+ data->Advance(sizeof(DeltaFileHeader));
+ }
+ return RESULT_SUCCESS;
+}
+
+int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
+ const char* data_end) {
+ // A custom code table is being specified. Parse the variable-length
+ // cache sizes and begin parsing the encoded custom code table.
+ int32_t near_cache_size = 0, same_cache_size = 0;
+ VCDiffHeaderParser header_parser(data_start, data_end);
+ if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
+ return header_parser.GetResult();
+ }
+ if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
+ return header_parser.GetResult();
+ }
+ custom_code_table_.reset(new struct VCDiffCodeTableData);
+ memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
+ custom_code_table_string_.clear();
+ addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
+ // addr_cache_->Init() will be called
+ // from VCDiffStreamingDecoderImpl::DecodeChunk()
+
+ // If we reach this point (the start of the custom code table)
+ // without encountering a RESULT_END_OF_DATA condition, then we won't call
+ // ReadDeltaFileHeader() again for this delta file.
+ //
+ // Instantiate a recursive decoder to interpret the custom code table
+ // as a VCDIFF encoding of the default code table.
+ custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
+ custom_code_table_decoder_->StartDecoding(
+ reinterpret_cast<const char*>(
+ &VCDiffCodeTableData::kDefaultCodeTableData),
+ sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
+ custom_code_table_decoder_->SetPlannedTargetFileSize(
+ sizeof(*custom_code_table_));
+ return static_cast<int>(header_parser.ParsedSize());
+}
+
+VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
+ ParseableChunk* data) {
+ if (!custom_code_table_decoder_.get()) {
+ return RESULT_SUCCESS;
+ }
+ if (!custom_code_table_.get()) {
+ VCD_DFATAL << "Internal error: custom_code_table_decoder_ is set,"
+ " but custom_code_table_ is NULL" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ OutputString<string> output_string(&custom_code_table_string_);
+ if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
+ data->UnparsedSize(),
+ &output_string)) {
+ return RESULT_ERROR;
+ }
+ if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
+ // Skip over the consumed data.
+ data->Finish();
+ return RESULT_END_OF_DATA;
+ }
+ if (!custom_code_table_decoder_->FinishDecoding()) {
+ return RESULT_ERROR;
+ }
+ if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
+ VCD_DFATAL << "Decoded custom code table size ("
+ << custom_code_table_string_.length()
+ << ") does not match size of a code table ("
+ << sizeof(*custom_code_table_) << ")" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ memcpy(custom_code_table_.get(),
+ custom_code_table_string_.data(),
+ sizeof(*custom_code_table_));
+ custom_code_table_string_.clear();
+ // Skip over the consumed data.
+ data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
+ custom_code_table_decoder_.reset();
+ delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
+ return RESULT_SUCCESS;
+}
+
+void VCDiffStreamingDecoderImpl::FlushDecodedTarget(
+ OutputStringInterface* output_string) {
+ output_string->append(
+ decoded_target_.data() + decoded_target_output_position_,
+ decoded_target_.size() - decoded_target_output_position_);
+ decoded_target_.clear();
+ delta_window_.set_target_window_start_pos(0);
+ decoded_target_output_position_ = 0;
+}
+
+void VCDiffStreamingDecoderImpl::AppendNewOutputText(
+ OutputStringInterface* output_string) {
+ const size_t bytes_decoded_this_chunk =
+ decoded_target_.size() - decoded_target_output_position_;
+ if (bytes_decoded_this_chunk > 0) {
+ size_t target_bytes_remaining = delta_window_.TargetBytesRemaining();
+ if (target_bytes_remaining > 0) {
+ // The decoder is midway through decoding a target window. Resize
+ // output_string to match the expected length. The interface guarantees
+ // not to resize output_string more than once per target window decoded.
+ output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
+ + target_bytes_remaining);
+ }
+ output_string->append(
+ decoded_target_.data() + decoded_target_output_position_,
+ bytes_decoded_this_chunk);
+ decoded_target_output_position_ = decoded_target_.size();
+ }
+}
+
+bool VCDiffStreamingDecoderImpl::DecodeChunk(
+ const char* data,
+ size_t len,
+ OutputStringInterface* output_string) {
+ if (!start_decoding_was_called_) {
+ VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL;
+ Reset();
+ return false;
+ }
+ ParseableChunk parseable_chunk(data, len);
+ if (!unparsed_bytes_.empty()) {
+ unparsed_bytes_.append(data, len);
+ parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
+ unparsed_bytes_.size());
+ }
+ VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
+ if (RESULT_SUCCESS == result) {
+ result = ReadCustomCodeTable(&parseable_chunk);
+ }
+ if (RESULT_SUCCESS == result) {
+ while (!parseable_chunk.Empty()) {
+ result = delta_window_.DecodeWindow(&parseable_chunk);
+ if (RESULT_SUCCESS != result) {
+ break;
+ }
+ if (ReachedPlannedTargetFileSize()) {
+ // Found exactly the length we expected. Stop decoding.
+ break;
+ }
+ if (!allow_vcd_target()) {
+ // VCD_TARGET will never be used to reference target data before the
+ // start of the current window, so flush and clear the contents of
+ // decoded_target_.
+ FlushDecodedTarget(output_string);
+ }
+ }
+ }
+ if (RESULT_ERROR == result) {
+ Reset(); // Don't allow further DecodeChunk calls
+ return false;
+ }
+ unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
+ parseable_chunk.UnparsedSize());
+ AppendNewOutputText(output_string);
+ return true;
+}
+
+// Finishes decoding after all data has been received. Returns true
+// if decoding of the entire stream was successful.
+bool VCDiffStreamingDecoderImpl::FinishDecoding() {
+ bool success = true;
+ if (!start_decoding_was_called_) {
+ VCD_WARNING << "FinishDecoding() called before StartDecoding(),"
+ " or called after DecodeChunk() returned false"
+ << VCD_ENDL;
+ success = false;
+ } else if (!IsDecodingComplete()) {
+ VCD_ERROR << "FinishDecoding() called before parsing entire"
+ " delta file window" << VCD_ENDL;
+ success = false;
+ }
+ // Reset the object state for the next decode operation
+ Reset();
+ return success;
+}
+
+bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
+ size_t window_size) const {
+ if (window_size > maximum_target_window_size_) {
+ VCD_ERROR << "Length of target window (" << window_size
+ << ") exceeds limit of " << maximum_target_window_size_
+ << " bytes" << VCD_ENDL;
+ return true;
+ }
+ if (HasPlannedTargetFileSize()) {
+ // The logical expression to check would be:
+ //
+ // total_of_target_window_sizes_ + window_size > planned_target_file_size_
+ //
+ // but the addition might cause an integer overflow if target_bytes_to_add
+ // is very large. So it is better to check target_bytes_to_add against
+ // the remaining planned target bytes.
+ size_t remaining_planned_target_file_size =
+ planned_target_file_size_ - total_of_target_window_sizes_;
+ if (window_size > remaining_planned_target_file_size) {
+ VCD_ERROR << "Length of target window (" << window_size
+ << " bytes) plus previous windows ("
+ << total_of_target_window_sizes_
+ << " bytes) would exceed planned size of "
+ << planned_target_file_size_ << " bytes" << VCD_ENDL;
+ return true;
+ }
+ }
+ size_t remaining_maximum_target_bytes =
+ maximum_target_file_size_ - total_of_target_window_sizes_;
+ if (window_size > remaining_maximum_target_bytes) {
+ VCD_ERROR << "Length of target window (" << window_size
+ << " bytes) plus previous windows ("
+ << total_of_target_window_sizes_
+ << " bytes) would exceed maximum target file size of "
+ << maximum_target_file_size_ << " bytes" << VCD_ENDL;
+ return true;
+ }
+ return false;
+}
+
+// *** Methods for VCDiffDeltaFileWindow
+
+void VCDiffDeltaFileWindow::Reset() {
+ found_header_ = false;
+
+ // Mark the start of the current target window.
+ target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
+ target_window_length_ = 0;
+
+ source_segment_ptr_ = NULL;
+ source_segment_length_ = 0;
+
+ instructions_and_sizes_.Invalidate();
+ data_for_add_and_run_.Invalidate();
+ addresses_for_copy_.Invalidate();
+
+ interleaved_bytes_expected_ = 0;
+
+ has_checksum_ = false;
+ expected_checksum_ = 0;
+}
+
+VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
+ VCDiffHeaderParser* header_parser) {
+ size_t add_and_run_data_length = 0;
+ size_t instructions_and_sizes_length = 0;
+ size_t addresses_length = 0;
+ if (!header_parser->ParseSectionLengths(has_checksum_,
+ &add_and_run_data_length,
+ &instructions_and_sizes_length,
+ &addresses_length,
+ &expected_checksum_)) {
+ return header_parser->GetResult();
+ }
+ if (parent_->AllowInterleaved() &&
+ (add_and_run_data_length == 0) &&
+ (addresses_length == 0)) {
+ // The interleaved format is being used.
+ interleaved_bytes_expected_ =
+ static_cast<int>(instructions_and_sizes_length);
+ UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
+ header_parser->End());
+ } else {
+ // If interleaved format is not used, then the whole window contents
+ // must be available before decoding can begin. If only part of
+ // the current window is available, then report end of data
+ // and re-parse the whole header when DecodeChunk() is called again.
+ if (header_parser->UnparsedSize() < (add_and_run_data_length +
+ instructions_and_sizes_length +
+ addresses_length)) {
+ return RESULT_END_OF_DATA;
+ }
+ data_for_add_and_run_.Init(header_parser->UnparsedData(),
+ add_and_run_data_length);
+ instructions_and_sizes_.Init(data_for_add_and_run_.End(),
+ instructions_and_sizes_length);
+ addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
+ if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
+ VCD_ERROR << "The end of the instructions section "
+ "does not match the end of the delta window" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ }
+ reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
+ instructions_and_sizes_.End());
+ return RESULT_SUCCESS;
+}
+
+// Here are the elements of the delta window header to be parsed,
+// from section 4 of the RFC:
+//
+// Window1
+// Win_Indicator - byte
+// [Source segment size] - integer
+// [Source segment position] - integer
+// The delta encoding of the target window
+// Length of the delta encoding - integer
+// The delta encoding
+// Size of the target window - integer
+// Delta_Indicator - byte
+// Length of data for ADDs and RUNs - integer
+// Length of instructions and sizes - integer
+// Length of addresses for COPYs - integer
+// Data section for ADDs and RUNs - array of bytes
+// Instructions and sizes section - array of bytes
+// Addresses section for COPYs - array of bytes
+//
+VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
+ ParseableChunk* parseable_chunk) {
+ std::string* decoded_target = parent_->decoded_target();
+ VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
+ parseable_chunk->End());
+ size_t source_segment_position = 0;
+ unsigned char win_indicator = 0;
+ if (!header_parser.ParseWinIndicatorAndSourceSegment(
+ parent_->dictionary_size(),
+ decoded_target->size(),
+ parent_->allow_vcd_target(),
+ &win_indicator,
+ &source_segment_length_,
+ &source_segment_position)) {
+ return header_parser.GetResult();
+ }
+ has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
+ if (!header_parser.ParseWindowLengths(&target_window_length_)) {
+ return header_parser.GetResult();
+ }
+ if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
+ // An error has been logged by TargetWindowWouldExceedSizeLimits().
+ return RESULT_ERROR;
+ }
+ header_parser.ParseDeltaIndicator();
+ VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
+ if (RESULT_SUCCESS != setup_return_code) {
+ return setup_return_code;
+ }
+ // Reserve enough space in the output string for the current target window.
+ const size_t wanted_capacity =
+ target_window_start_pos_ + target_window_length_;
+ if (decoded_target->capacity() < wanted_capacity) {
+ decoded_target->reserve(wanted_capacity);
+ }
+ // Get a pointer to the start of the source segment.
+ if (win_indicator & VCD_SOURCE) {
+ source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
+ } else if (win_indicator & VCD_TARGET) {
+ // This assignment must happen after the reserve().
+ // decoded_target should not be resized again while processing this window,
+ // so source_segment_ptr_ should remain valid.
+ source_segment_ptr_ = decoded_target->data() + source_segment_position;
+ }
+ // The whole window header was found and parsed successfully.
+ found_header_ = true;
+ parseable_chunk->Advance(header_parser.ParsedSize());
+ parent_->AddToTotalTargetWindowSize(target_window_length_);
+ return RESULT_SUCCESS;
+}
+
+void VCDiffDeltaFileWindow::UpdateInstructionPointer(
+ ParseableChunk* parseable_chunk) {
+ if (IsInterleaved()) {
+ size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
+ // Reduce expected instruction segment length by bytes parsed
+ interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
+ parseable_chunk->Advance(bytes_parsed);
+ }
+}
+
+inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
+ return parent_->decoded_target()->size() - target_window_start_pos_;
+}
+
+size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
+ if (target_window_length_ == 0) {
+ // There is no window being decoded at present
+ return 0;
+ } else {
+ return target_window_length_ - TargetBytesDecoded();
+ }
+}
+
+inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
+ parent_->decoded_target()->append(data, size);
+}
+
+inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
+ parent_->decoded_target()->append(size, byte);
+}
+
+VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
+ if (size > data_for_add_and_run_.UnparsedSize()) {
+ return RESULT_END_OF_DATA;
+ }
+ // Write the next "size" data bytes
+ CopyBytes(data_for_add_and_run_.UnparsedData(), size);
+ data_for_add_and_run_.Advance(size);
+ return RESULT_SUCCESS;
+}
+
+VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
+ if (data_for_add_and_run_.Empty()) {
+ return RESULT_END_OF_DATA;
+ }
+ // Write "size" copies of the next data byte
+ RunByte(*data_for_add_and_run_.UnparsedData(), size);
+ data_for_add_and_run_.Advance(1);
+ return RESULT_SUCCESS;
+}
+
+VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
+ unsigned char mode) {
+ // Keep track of the number of target bytes decoded as a local variable
+ // to avoid recalculating it each time it is needed.
+ size_t target_bytes_decoded = TargetBytesDecoded();
+ const VCDAddress here_address =
+ static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
+ const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
+ here_address,
+ mode,
+ addresses_for_copy_.UnparsedDataAddr(),
+ addresses_for_copy_.End());
+ switch (decoded_address) {
+ case RESULT_ERROR:
+ VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL;
+ return RESULT_ERROR;
+ case RESULT_END_OF_DATA:
+ return RESULT_END_OF_DATA;
+ default:
+ if ((decoded_address < 0) || (decoded_address > here_address)) {
+ VCD_DFATAL << "Internal error: unexpected address " << decoded_address
+ << " returned from DecodeAddress, with here_address = "
+ << here_address << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ break;
+ }
+ size_t address = static_cast<size_t>(decoded_address);
+ if ((address + size) <= source_segment_length_) {
+ // Copy all data from source segment
+ CopyBytes(&source_segment_ptr_[address], size);
+ return RESULT_SUCCESS;
+ }
+ // Copy some data from target window...
+ if (address < source_segment_length_) {
+ // ... plus some data from source segment
+ const size_t partial_copy_size = source_segment_length_ - address;
+ CopyBytes(&source_segment_ptr_[address], partial_copy_size);
+ target_bytes_decoded += partial_copy_size;
+ address += partial_copy_size;
+ size -= partial_copy_size;
+ }
+ address -= source_segment_length_;
+ // address is now based at start of target window
+ const char* const target_segment_ptr = parent_->decoded_target()->data() +
+ target_window_start_pos_;
+ while (size > (target_bytes_decoded - address)) {
+ // Recursive copy that extends into the yet-to-be-copied target data
+ const size_t partial_copy_size = target_bytes_decoded - address;
+ CopyBytes(&target_segment_ptr[address], partial_copy_size);
+ target_bytes_decoded += partial_copy_size;
+ address += partial_copy_size;
+ size -= partial_copy_size;
+ }
+ CopyBytes(&target_segment_ptr[address], size);
+ return RESULT_SUCCESS;
+}
+
+int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
+ if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
+ != parseable_chunk->UnparsedData())) {
+ VCD_DFATAL << "Internal error: interleaved format is used, but the"
+ " input pointer does not point to the instructions section"
+ << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ while (TargetBytesDecoded() < target_window_length_) {
+ int32_t decoded_size = VCD_INSTRUCTION_ERROR;
+ unsigned char mode = 0;
+ VCDiffInstructionType instruction =
+ reader_.GetNextInstruction(&decoded_size, &mode);
+ switch (instruction) {
+ case VCD_INSTRUCTION_END_OF_DATA:
+ UpdateInstructionPointer(parseable_chunk);
+ return RESULT_END_OF_DATA;
+ case VCD_INSTRUCTION_ERROR:
+ return RESULT_ERROR;
+ default:
+ break;
+ }
+ const size_t size = static_cast<size_t>(decoded_size);
+ // The value of "size" itself could be enormous (say, INT32_MAX)
+ // so check it individually against the limit to protect against
+ // overflow when adding it to something else.
+ if ((size > target_window_length_) ||
+ ((size + TargetBytesDecoded()) > target_window_length_)) {
+ VCD_ERROR << VCDiffInstructionName(instruction)
+ << " with size " << size
+ << " plus existing " << TargetBytesDecoded()
+ << " bytes of target data exceeds length of target"
+ " window (" << target_window_length_ << " bytes)"
+ << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ VCDiffResult result = RESULT_SUCCESS;
+ switch (instruction) {
+ case VCD_ADD:
+ result = DecodeAdd(size);
+ break;
+ case VCD_RUN:
+ result = DecodeRun(size);
+ break;
+ case VCD_COPY:
+ result = DecodeCopy(size, mode);
+ break;
+ default:
+ VCD_DFATAL << "Unexpected instruction type " << instruction
+ << "in opcode stream" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ switch (result) {
+ case RESULT_END_OF_DATA:
+ reader_.UnGetInstruction();
+ UpdateInstructionPointer(parseable_chunk);
+ return RESULT_END_OF_DATA;
+ case RESULT_ERROR:
+ return RESULT_ERROR;
+ case RESULT_SUCCESS:
+ break;
+ }
+ }
+ if (TargetBytesDecoded() != target_window_length_) {
+ VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded()
+ << " bytes) does not match expected size ("
+ << target_window_length_ << " bytes)" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ const char* const target_window_start =
+ parent_->decoded_target()->data() + target_window_start_pos_;
+ if (has_checksum_ &&
+ (ComputeAdler32(target_window_start, target_window_length_)
+ != expected_checksum_)) {
+ VCD_ERROR << "Target data does not match checksum; this could mean "
+ "that the wrong dictionary was used" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (!instructions_and_sizes_.Empty()) {
+ VCD_ERROR << "Excess instructions and sizes left over "
+ "after decoding target window" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (!IsInterleaved()) {
+ // Standard format is being used, with three separate sections for the
+ // instructions, data, and addresses.
+ if (!data_for_add_and_run_.Empty()) {
+ VCD_ERROR << "Excess ADD/RUN data left over "
+ "after decoding target window" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (!addresses_for_copy_.Empty()) {
+ VCD_ERROR << "Excess COPY addresses left over "
+ "after decoding target window" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ // Reached the end of the window. Update the ParseableChunk to point to the
+ // end of the addresses section, which is the last section in the window.
+ parseable_chunk->SetPosition(addresses_for_copy_.End());
+ } else {
+ // Interleaved format is being used.
+ UpdateInstructionPointer(parseable_chunk);
+ }
+ return RESULT_SUCCESS;
+}
+
+VCDiffResult VCDiffDeltaFileWindow::DecodeWindow(
+ ParseableChunk* parseable_chunk) {
+ if (!parent_) {
+ VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() "
+ "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ if (!found_header_) {
+ switch (ReadHeader(parseable_chunk)) {
+ case RESULT_END_OF_DATA:
+ return RESULT_END_OF_DATA;
+ case RESULT_ERROR:
+ return RESULT_ERROR;
+ default:
+ // Reset address cache between windows (RFC section 5.1)
+ if (!parent_->addr_cache()->Init()) {
+ VCD_DFATAL << "Error initializing address cache" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ }
+ } else {
+ // We are resuming a window that was partially decoded before a
+ // RESULT_END_OF_DATA was returned. This can only happen on the first
+ // loop iteration, and only if the interleaved format is enabled and used.
+ if (!IsInterleaved()) {
+ VCD_DFATAL << "Internal error: Resumed decoding of a delta file window"
+ " when interleaved format is not being used" << VCD_ENDL;
+ return RESULT_ERROR;
+ }
+ UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
+ parseable_chunk->End());
+ reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
+ instructions_and_sizes_.End());
+ }
+ switch (DecodeBody(parseable_chunk)) {
+ case RESULT_END_OF_DATA:
+ if (MoreDataExpected()) {
+ return RESULT_END_OF_DATA;
+ } else {
+ VCD_ERROR << "End of data reached while decoding VCDIFF delta file"
+ << VCD_ENDL;
+ // fall through to RESULT_ERROR case
+ }
+ case RESULT_ERROR:
+ return RESULT_ERROR;
+ default:
+ break; // DecodeBody succeeded
+ }
+ // Get ready to read a new delta window
+ Reset();
+ return RESULT_SUCCESS;
+}
+
+// *** Methods for VCDiffStreamingDecoder
+
+VCDiffStreamingDecoder::VCDiffStreamingDecoder()
+: impl_(new VCDiffStreamingDecoderImpl) { }
+
+VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
+
+void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
+ impl_->StartDecoding(source, len);
+}
+
+bool VCDiffStreamingDecoder::DecodeChunkToInterface(
+ const char* data,
+ size_t len,
+ OutputStringInterface* output_string) {
+ return impl_->DecodeChunk(data, len, output_string);
+}
+
+bool VCDiffStreamingDecoder::FinishDecoding() {
+ return impl_->FinishDecoding();
+}
+
+bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
+ size_t new_maximum_target_file_size) {
+ return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
+}
+
+bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
+ size_t new_maximum_target_window_size) {
+ return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
+}
+
+void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
+ impl_->SetAllowVcdTarget(allow_vcd_target);
+}
+
+bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
+ size_t dictionary_size,
+ const string& encoding,
+ OutputStringInterface* target) {
+ target->clear();
+ decoder_.StartDecoding(dictionary_ptr, dictionary_size);
+ if (!decoder_.DecodeChunkToInterface(encoding.data(),
+ encoding.size(),
+ target)) {
+ return false;
+ }
+ return decoder_.FinishDecoding();
+}
+
+} // namespace open_vcdiff