aboutsummaryrefslogtreecommitdiff
path: root/src/binary_parse/range_checked_byte_ptr.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/binary_parse/range_checked_byte_ptr.h')
-rw-r--r--src/binary_parse/range_checked_byte_ptr.h609
1 files changed, 0 insertions, 609 deletions
diff --git a/src/binary_parse/range_checked_byte_ptr.h b/src/binary_parse/range_checked_byte_ptr.h
deleted file mode 100644
index a0eadbb..0000000
--- a/src/binary_parse/range_checked_byte_ptr.h
+++ /dev/null
@@ -1,609 +0,0 @@
-// Copyright 2015 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_
-#define PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_
-
-#include <assert.h>
-
-#include <cstddef>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace piex {
-namespace binary_parse {
-
-// Since NaCl does not comply to C++11 we can not just use stdint.h.
-typedef unsigned short uint16; // NOLINT
-typedef short int16; // NOLINT
-typedef unsigned int uint32;
-typedef int int32;
-
-enum MemoryStatus {
- RANGE_CHECKED_BYTE_SUCCESS = 0,
- RANGE_CHECKED_BYTE_ERROR = 1,
- RANGE_CHECKED_BYTE_ERROR_OVERFLOW = 2,
- RANGE_CHECKED_BYTE_ERROR_UNDERFLOW = 3,
-};
-
-// Interface that RangeCheckedBytePtr uses to access the underlying array of
-// bytes. This allows RangeCheckedBytePtr to be used to access data as if it
-// were stored contiguously in memory, even if the data is in fact split up
-// into non-contiguous chunks and / or does not reside in memory.
-//
-// The only requirement is that the data can be read in pages of a fixed (but
-// configurable) size. Notionally, the byte array (which contains length()
-// bytes) is split up into non-overlapping pages of pageSize() bytes each.
-// (The last page may be shorter if length() is not a multiple of pageSize().)
-// There are therefore (length() - 1) / pageSize() + 1 such pages, with indexes
-// 0 through (length() - 1) / pageSize(). Page i contains the bytes from offset
-// i * pageSize() in the array up to and including the byte at offset
-// (i + 1) * pageSize() - 1 (or, in the case of the last page, length() - 1).
-//
-// In essence, RangeCheckedBytePtr and PagedByteArray together provide a poor
-// man's virtual-memory-and-memory-mapped-file work-alike in situations where
-// virtual memory cannot be used or would consume too much virtual address
-// space.
-//
-// Thread safety: In general, subclasses implementing this interface should
-// ensure that the member functions are thread-safe. It will then be safe to
-// access the same array from multiple threads. (Note that RangeCheckedBytePtr
-// itself is not thread-safe in the sense that a single instance of
-// RangeCheckedBytePtr cannot be used concurrently from multiple threads; it
-// is, however, safe to use different RangeCheckedBytePtr instances in
-// different threads to access the same PagedByteArray concurrently, assuming
-// that the PagedByteArray implementation is thread-safe.)
-class PagedByteArray {
- public:
- // Base class for pages in the byte array. Implementations of PagedByteArray
- // can create a subclass of the Page class to manage the lifetime of buffers
- // associated with a page returned by getPage(). For example, a
- // PagedByteArray backed by a file might define a Page subclass like this:
- //
- // class FilePage : public Page {
- // std::vector<unsigned char> bytes;
- // };
- //
- // The corresponding getPage() implementation could then look like this:
- //
- // void getPage(size_t page_index, const unsigned char** begin,
- // const unsigned char** end, std::shared_ptr<Page>* page)
- // {
- // // Create a new page.
- // std::shared_ptr<FilePage> file_page(new FilePage());
- //
- // // Read contents of page from file into file_page->bytes.
- // [...]
- //
- // // Set *begin and *end to point to beginning and end of
- // // file_page->bytes vector.
- // *begin = &file_page->bytes[0];
- // *end = *begin + file_page->bytes.size();
- //
- // // Return page to caller
- // *page = file_page;
- // }
- //
- // In this way, the storage associated with the page (the FilePage::bytes
- // vector) will be kept alive until the RangeCheckedBytePtr releases the
- // shared pointer.
- class Page {};
-
- typedef std::shared_ptr<Page> PagePtr;
-
- virtual ~PagedByteArray();
-
- // Returns the length of the array in bytes. The value returned must remain
- // the same on every call for the entire lifetime of the object.
- virtual size_t length() const = 0;
-
- // Returns the length of each page in bytes. (The last page may be shorter
- // than pageSize() if length() is not a multiple of pageSize() -- see also
- // the class-wide comment above.) The value returned must remain the same on
- // every call for the entire lifetime of the object.
- virtual size_t pageSize() const = 0;
-
- // Returns a pointer to a memory buffer containing the data for the page
- // with index "page_index".
- //
- // *begin is set to point to the first byte of the page; *end is set to point
- // one byte beyond the last byte in the page. This implies that:
- // - (*end - *begin) == pageSize() for every page except the last page
- // - (*end - *begin) == length() - pageSize() * ((length() - 1) / pageSize())
- // for the last page
- //
- // *page will be set to a SharedPtr that the caller will hold on to until
- // it no longer needs to access the memory buffer. The memory buffer will
- // remain valid until the SharedPtr is released or the PagedByteArray object
- // is destroyed. An implementation may choose to return a null SharedPtr;
- // this indicates that the memory buffer will remain valid until the
- // PagedByteArray object is destroyed, even if the caller does not hold on to
- // the SharedPtr. (This is intended as an optimization that some
- // implementations may choose to take advantage of, as a null SharedPtr is
- // cheaper to copy.)
- virtual void getPage(size_t page_index, const unsigned char **begin,
- const unsigned char **end, PagePtr *page) const = 0;
-};
-
-typedef std::shared_ptr<PagedByteArray> PagedByteArrayPtr;
-
-// Smart pointer that has the same semantics as a "const unsigned char *" (plus
-// some convenience functions) but provides range checking and the ability to
-// access arrays that are not contiguous in memory or do not reside entirely in
-// memory (through the PagedByteArray interface).
-//
-// In the following, we abbreviate RangeCheckedBytePtr as RCBP.
-//
-// The intent of this class is to allow easy security hardening of code that
-// parses binary data structures using raw byte pointers. To do this, only the
-// declarations of the pointers need to be changed; the code that uses the
-// pointers can remain unchanged.
-//
-// If an illegal operation occurs on a pointer, an error flag is set, and all
-// read operations from this point on return 0. This means that error checking
-// need not be done after every access; it is sufficient to check the error flag
-// (using errorOccurred()) once before the RCBP is destroyed. Again, this allows
-// the majority of the parsing code to remain unchanged. (Note caveats below
-// that apply if a copy of the pointer is created.)
-//
-// Legal operations are exactly the ones that would be legal on a raw C++
-// pointer. Read accesses are legal if they fall within the underlying array. A
-// RCBP may point to any element in the underlying array or one element beyond
-// the end of the array.
-//
-// For brevity, the documentation for individual member functions does not state
-// explicitly that the error flag will be set on out-of-range operations.
-//
-// Note:
-//
-// - Just as for raw pointers, it is legal for a pointer to point one element
-// beyond the end of the array, but it is illegal to use operator*() on such a
-// pointer.
-//
-// - If a copy of an RCBP is created, then performing illegal operations on the
-// copy affects the error flag of the copy, but not of the original pointer.
-// Note that using operator+ and operator- also creates a copy of the pointer.
-// For example:
-//
-// // Assume we have an RCBP called "p" and a size_t variable called
-// // "offset".
-// RangeCheckedBytePtr sub_data_structure = p + offset;
-//
-// If "offset" is large enough to cause an out-of-range access, then
-// sub_data_structure.errorOccurred() will be true, but p.errorOccurred() will
-// still be false. The error flag for sub_data_structure therefore needs to be
-// checked before it is destroyed.
-class RangeCheckedBytePtr {
- private:
- // This class maintains the following class invariants:
- // - page_data_ always points to a buffer of at least current_page_len_
- // bytes.
- //
- // - The current position lies within the sub-array, i.e.
- // sub_array_begin_ <= current_pos_ <= sub_array_end_
- //
- // - The sub-array is entirely contained within the array, i.e.
- // 0 <= sub_array_begin <= sub_array_end <= array_->length()
- //
- // - If the current page is non-empty, it lies completely within the
- // sub-array, i.e.
- // if _current_page_len_ >= 0, then
- // sub_array_begin_ <= page_begin_offset_
- // and
- // page_begin_offset_ + current_page_len_ <= sub_array_end_
- // (See also restrictPageToSubArray().)
- // (If _current_page_len_ == 0, then page_begin_offset_ may lie outside
- // the sub-array; this condition is harmless. Additional logic would be
- // required to make page_begin_offset_ lie within the sub-array in this
- // case, and it would serve no purpose other than to make the invariant
- // slightly simpler.)
- //
- // Note that it is _not_ a class invariant that current_pos_ needs to lie
- // within the current page. Making this an invariant would have two
- // undesirable consequences:
- // a) When operator[] is called with an index that lies beyond the end of
- // the current page, it would need to temporarily load the page that
- // contains this index, but it wouldn't be able to "retain" the page
- // (i.e. make it the current page) because that would violate the
- // proposed invariant. This would lead to inefficient behavior in the
- // case where code accesses a large range of indices beyond the end of
- // the page because a page would need to be loaded temporarily on each
- // access.
- // b) It would require more code: loadPageForOffset() would need to be
- // called anywhere that current_pos_ changes (whereas, with the present
- // approach, loadPageForOffset() is only called in operator[]).
-
- // PagedByteArray that is accessed by this pointer.
- PagedByteArrayPtr array_;
-
- // Pointer to the current page.
- mutable PagedByteArray::PagePtr page_;
-
- // Pointer to the current page's data buffer.
- mutable const unsigned char *page_data_;
-
- // All of the following offsets are defined relative to the beginning of
- // the array defined by the PagedByteArray array_.
-
- // Array offset that the pointer points to.
- size_t current_pos_;
-
- // Start offset of the current sub-array.
- size_t sub_array_begin_;
-
- // End offset of the current sub-array.
- size_t sub_array_end_;
-
- // Array offset corresponding to the "page_data_" pointer.
- mutable size_t page_begin_offset_;
-
- // Length of the current page.
- mutable size_t current_page_len_;
-
- // Error flag. This is mutable because methods that don't affect the value
- // of the pointer itself (such as operator[]) nevertheless need to be able to
- // signal error conditions.
- mutable MemoryStatus error_flag_;
-
- RangeCheckedBytePtr();
-
- public:
- // Creates a pointer that points to the first element of 'array', which has a
- // length of 'len'. The caller must ensure that the array remains valid until
- // this pointer and any pointers created from it have been destroyed.
- // Note: 'len' may be zero, but 'array' must in this case still be a valid,
- // non-null pointer.
- explicit RangeCheckedBytePtr(const unsigned char *array, const size_t len);
-
- // Creates a pointer that points to the first element of the given
- // PagedByteArray. The caller must ensure that this PagedByteArray remains
- // valid until this pointer and any pointers created from it have been
- // destroyed.
- explicit RangeCheckedBytePtr(PagedByteArray *array);
-
- // Creates an invalid RangeCheckedBytePtr. Calling errorOccurred() on the
- // result of invalidPointer() always returns true.
- // Do not check a RangeCheckedBytePtr for validity by comparing against
- // invalidPointer(); use errorOccurred() instead.
- static RangeCheckedBytePtr invalidPointer();
-
- // Returns a RangeCheckedBytePtr that points to a sub-array of this pointer's
- // underlying array. The sub-array starts at position 'pos' relative to this
- // pointer and is 'length' bytes long. The sub-array must lie within this
- // pointer's array, i.e. pos + length <= remainingLength() must hold. If this
- // condition is violated, an invalid pointer is returned.
- RangeCheckedBytePtr pointerToSubArray(size_t pos, size_t length) const;
-
- // Returns the number of bytes remaining in the array from this pointer's
- // present position.
- inline size_t remainingLength() const;
-
- // Returns the offset (or index) in the underlying array that this pointer
- // points to. If this pointer was created using pointerToSubArray(), the
- // offset is relative to the beginning of the sub-array (and not relative to
- // the beginning of the original array).
- size_t offsetInArray() const;
-
- // Returns whether an out-of-bounds error has ever occurred on this pointer in
- // the past. An error occurs if a caller attempts to read from a position
- // outside the bounds of the array or to move the pointer outside the bounds
- // of the array.
- //
- // The error flag is never reset. Once an error has occurred,
- // all subsequent attempts to read from the pointer (even within the bounds of
- // the array) return 0.
- //
- // Note that it is permissible for a pointer to point one element past the end
- // of the array, but it is not permissible to read from this position. This is
- // equivalent to the semantics of raw C++ pointers.
- inline bool errorOccurred() const;
-
- // Returns the substring of length 'length' located at position 'pos' relative
- // to this pointer.
- std::string substr(size_t pos, size_t length) const;
-
- // Returns 'length' number of bytes from the array starting at position 'pos'
- // relative to this pointer.
- std::vector<unsigned char> extractBytes(size_t pos, size_t length) const;
-
- // Equivalent to calling convert(0, output).
- template <class T>
- bool convert(T *output) const {
- union {
- T t;
- unsigned char ch[sizeof(T)];
- } buffer;
- for (size_t i = 0; i < sizeof(T); i++) {
- buffer.ch[i] = (*this)[i];
- }
- if (!errorOccurred()) {
- *output = buffer.t;
- }
- return !errorOccurred();
- }
-
- // Reinterprets this pointer as a pointer to an array of T, then returns the
- // element at position 'index' in this array of T. (Note that this position
- // corresponds to position index * sizeof(T) in the underlying byte array.)
- //
- // Returns true if successful; false if an out-of-range error occurred or if
- // the error flag was already set on the pointer when calling convert().
- //
- // The conversion from a sequence of sizeof(T) bytes to a T is performed in an
- // implementation-defined fashion. This conversion is equivalent to the one
- // obtained using the following union by filling the array 'ch' and then
- // reading the member 't':
- //
- // union {
- // T t;
- // unsigned char ch[sizeof(T)];
- // };
- //
- // Callers should note that, among other things, the conversion is not
- // endian-agnostic with respect to the endianness of T.
- template <class T>
- bool convert(size_t index, T *output) const {
- RangeCheckedBytePtr p = (*this) + index * sizeof(T);
- bool valid = p.convert(output);
- if (!valid) {
- error_flag_ = p.error_flag_;
- }
- return valid;
- }
-
- // Operators. Unless otherwise noted, these operators have the same semantics
- // as the same operators on an unsigned char pointer.
-
- // If an out-of-range access is attempted, returns 0 (and sets the error
- // flag).
- inline unsigned char operator[](size_t i) const;
-
- inline unsigned char operator*() const;
-
- inline RangeCheckedBytePtr &operator++();
-
- inline RangeCheckedBytePtr operator++(int);
-
- inline RangeCheckedBytePtr &operator--();
-
- inline RangeCheckedBytePtr operator--(int);
-
- inline RangeCheckedBytePtr &operator+=(size_t x);
-
- inline RangeCheckedBytePtr &operator-=(size_t x);
-
- inline friend RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p,
- size_t x);
-
- inline friend RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p,
- size_t x);
-
- // Tests whether x and y point at the same position in the underlying array.
- // Two pointers that point at the same position but have different
- // sub-arrays still compare equal. It is not legal to compare two pointers
- // that point into different paged byte arrays.
- friend bool operator==(const RangeCheckedBytePtr &x,
- const RangeCheckedBytePtr &y);
-
- // Returns !(x == y).
- friend bool operator!=(const RangeCheckedBytePtr &x,
- const RangeCheckedBytePtr &y);
-
- private:
- void loadPageForOffset(size_t offset) const;
- void restrictPageToSubArray() const;
-};
-
-// Returns the result of calling std::memcmp() on the sequences of 'num' bytes
-// pointed to by 'x' and 'y'. The result is undefined if either
-// x.remainingLength() or y.remainingLength() is less than 'num'.
-int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
- size_t num);
-
-// Returns the result of calling std::memcmp() (note: _not_ strcmp()) on the
-// y.length() number of bytes pointed to by 'x' and the string 'y'. The result
-// is undefined if x.remainingLength() is less than y.length().
-int strcmp(const RangeCheckedBytePtr &x, const std::string &y);
-
-// Returns the length of the zero-terminated string starting at 'src' (not
-// including the '\0' terminator). If no '\0' occurs before the end of the
-// array, the result is undefined.
-size_t strlen(const RangeCheckedBytePtr &src);
-
-// Integer decoding functions.
-//
-// These functions read signed (Get16s, Get32s) or unsigned (Get16u, Get32u)
-// integers from 'input'. The integer read from the input can be specified to be
-// either big-endian (big_endian == true) or little-endian
-// (little_endian == false). Signed integers are read in two's-complement
-// representation. The integer read in the specified format is then converted to
-// the implementation's native integer representation and returned. In other
-// words, the semantics of these functions are independent of the
-// implementation's endianness and signed integer representation.
-//
-// If an out-of-range error occurs, these functions do _not_ set the error flag
-// on 'input'. Instead, they set 'status' to RANGE_CHECKED_BYTE_ERROR and return
-// 0.
-//
-// Note:
-// - If an error occurs and 'status' is already set to an error value (i.e. a
-// value different from RANGE_CHECKED_BYTE_SUCCESS), the value of 'status' is
-// left unchanged.
-// - If the operation is successful, 'status' is left unchanged (i.e. it is not
-// actively set to RANGE_CHECKED_BYTE_SUCCESS).
-//
-// Together, these two properties mean that these functions can be used to read
-// a number of integers in succession with only a single error check, like this:
-//
-// MemoryStatus status = RANGE_CHECKED_BYTE_SUCCESS;
-// int16 val1 = Get16s(input, false, &status);
-// int32 val2 = Get32s(input + 2, false, &status);
-// uint32 val3 = Get32u(input + 6, false, &status);
-// if (status != RANGE_CHECKED_BYTE_SUCCESS) {
-// // error handling
-// }
-int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
- MemoryStatus *status);
-uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
- MemoryStatus *status);
-int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
- MemoryStatus *status);
-uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
- MemoryStatus *status);
-
-size_t RangeCheckedBytePtr::remainingLength() const {
- if (!errorOccurred()) {
- // current_pos_ <= sub_array_end_ is a class invariant, but protect
- // against violations of this invariant.
- if (current_pos_ <= sub_array_end_) {
- return sub_array_end_ - current_pos_;
- } else {
- assert(false);
- return 0;
- }
- } else {
- return 0;
- }
-}
-
-bool RangeCheckedBytePtr::errorOccurred() const {
- return error_flag_ != RANGE_CHECKED_BYTE_SUCCESS;
-}
-
-unsigned char RangeCheckedBytePtr::operator[](size_t i) const {
- // Check that pointer doesn't have an error flag set.
- if (!errorOccurred()) {
- // Offset in array to read from.
- const size_t read_offset = current_pos_ + i;
-
- // Check for the common case first: The byte we want to read lies in the
- // current page. For performance reasons, we don't check for the case
- // "read_offset < page_begin_offset_" explicitly; if it occurs, it will
- // lead to wraparound (which is well-defined for unsigned quantities), and
- // this will cause the test "pos_in_page < current_page_len_" to fail.
- size_t pos_in_page = read_offset - page_begin_offset_;
- if (pos_in_page < current_page_len_) {
- return page_data_[pos_in_page];
- }
-
- // Check that the offset we're trying to read lies within the sub-array
- // we're allowed to access.
- if (read_offset >= sub_array_begin_ && read_offset < sub_array_end_) {
- // Read the page that contains the offset "read_offset".
- loadPageForOffset(read_offset);
-
- // Compute the position within the new page from which we need to read.
- pos_in_page = read_offset - page_begin_offset_;
-
- // After the call to loadPageForOffset(), read_offset must lie within
- // the current page, and therefore pos_in_page must be less than the
- // length of the page. We nevertheless check for this to protect against
- // potential bugs in loadPageForOffset().
- assert(pos_in_page < current_page_len_);
- if (pos_in_page < current_page_len_) {
- return page_data_[pos_in_page];
- }
- }
- }
-
-// All error cases fall through to here.
-#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
- assert(false);
-#endif
- error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
- // return 0, which represents the invalid value
- return static_cast<unsigned char>(0);
-}
-
-unsigned char RangeCheckedBytePtr::operator*() const { return (*this)[0]; }
-
-RangeCheckedBytePtr &RangeCheckedBytePtr::operator++() {
- if (current_pos_ < sub_array_end_) {
- current_pos_++;
- } else {
-#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
- assert(false);
-#endif
- error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
- }
- return *this;
-}
-
-RangeCheckedBytePtr RangeCheckedBytePtr::operator++(int) {
- RangeCheckedBytePtr result(*this);
- ++(*this);
- return result;
-}
-
-RangeCheckedBytePtr &RangeCheckedBytePtr::operator--() {
- if (current_pos_ > sub_array_begin_) {
- current_pos_--;
- } else {
-#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
- assert(false);
-#endif
- error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW;
- }
- return *this;
-}
-
-RangeCheckedBytePtr RangeCheckedBytePtr::operator--(int) {
- RangeCheckedBytePtr result(*this);
- --(*this);
- return result;
-}
-
-RangeCheckedBytePtr &RangeCheckedBytePtr::operator+=(size_t x) {
- if (remainingLength() >= x) {
- current_pos_ += x;
- } else {
-#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
- assert(false);
-#endif
- error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
- }
- return *this;
-}
-
-RangeCheckedBytePtr &RangeCheckedBytePtr::operator-=(size_t x) {
- if (x <= current_pos_ - sub_array_begin_) {
- current_pos_ -= x;
- } else {
-#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
- assert(false);
-#endif
- error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW;
- }
- return *this;
-}
-
-RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, size_t x) {
- RangeCheckedBytePtr result(p);
- result += x;
- return result;
-}
-
-RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, size_t x) {
- RangeCheckedBytePtr result(p);
- result -= x;
- return result;
-}
-
-} // namespace binary_parse
-} // namespace piex
-
-#endif // PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_