summaryrefslogtreecommitdiff
path: root/webm_parser/src/master_parser.h
blob: 2000167d9c2f93c8b9ba303baaad9d2cd15db831 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS.  All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
#ifndef SRC_MASTER_PARSER_H_
#define SRC_MASTER_PARSER_H_

#include <cassert>
#include <cstdint>
#include <functional>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <utility>

#include "src/element_parser.h"
#include "src/id_parser.h"
#include "src/size_parser.h"
#include "src/skip_parser.h"
#include "src/unknown_parser.h"
#include "src/void_parser.h"
#include "webm/callback.h"
#include "webm/element.h"
#include "webm/id.h"
#include "webm/reader.h"
#include "webm/status.h"

namespace webm {

// A general purpose parser for EBML master elements.
//
// For example, if a document specification defines a Foo master element that
// has two boolean children (Bar and Baz), then a FooParser capable of parsing
// the Foo master element could be defined as follows:
//
// struct FooParser : public MasterParser {
//   FooParser()
//       : MasterParser(MakeChild<BoolParser>(Id::kBar),
//                      MakeChild<BoolParser>(Id::kBaz)) {}
// };
//
// See the MasterValueParser for an alternative class for parsing master
// elements into a data structure.
class MasterParser : public ElementParser {
 public:
  // Constructs a new MasterParser that uses the given
  // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the
  // appropriate parser/handler. Each argument must be of type
  // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not
  // explicitly provided for Id::kVoid, a VoidParser will automatically be used
  // for it.
  //
  // Initializer lists don't support move-only types (i.e. std::unique_ptr), so
  // instead a variadic template is used.
  template <typename... T>
  explicit MasterParser(T&&... parser_pairs) {
    // Prefer an odd reserve size. This makes libc++ use a prime number for the
    // bucket count. Otherwise, if it happens to be a power of 2, then libc++
    // will use a power-of-2 bucket count (and since Matroska EBML IDs have low
    // entropy in the low bits, there will be a lot of collisions). libstdc++
    // always prefers a prime bucket count. I'm not sure how MSVC or others are
    // implemented, but this shouldn't adversely affect them even if they are
    // implemented differently. Add one to the count because we'll likely need
    // to insert a parser for Id::kVoid.
    parsers_.reserve((sizeof...(T) + 1) | 1);

    // This dummy initializer list is just used to force the parameter pack to
    // be expanded, which turns the expression into a for-each "loop" that
    // inserts each argument into the map.
    auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...};
    (void)dummy;  // Silence unused variable warning.

    if (parsers_.find(Id::kVoid) == parsers_.end()) {
      InsertParser(MakeChild<VoidParser>(Id::kVoid));
    }
  }

  MasterParser(const MasterParser&) = delete;
  MasterParser& operator=(const MasterParser&) = delete;

  Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override;

  void InitAfterSeek(const Ancestory& child_ancestory,
                     const ElementMetadata& child_metadata) override;

  Status Feed(Callback* callback, Reader* reader,
              std::uint64_t* num_bytes_read) override;

  bool GetCachedMetadata(ElementMetadata* metadata) override;

  std::uint32_t header_size() const { return header_size_; }

  // Gets the size of this element. May be called before the parse is fully
  // complete (but only after Init() has already been called and successfully
  // returned).
  std::uint64_t size() const { return my_size_; }

  // Gets absolute byte position of the start of the element in the byte stream.
  // May be called before the parse is fully complete (but only after Init() has
  // already been called and successfully returned).
  std::uint64_t position() const { return my_position_; }

  // Gets the metadata for the child that is currently being parsed. This may
  // only be called while the child's body (not its header information like ID
  // and size) is being parsed.
  const ElementMetadata& child_metadata() const {
    assert(state_ == State::kValidatingChildSize ||
           state_ == State::kGettingAction ||
           state_ == State::kInitializingChildParser ||
           state_ == State::kReadingChildBody);
    return child_metadata_;
  }

 protected:
  // Allocates a new parser of type T, forwarding args to the constructor, and
  // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id
  // and the allocated parser.
  template <typename T, typename... Args>
  static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild(
      Id id, Args&&... args) {
    std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...));
    return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr));
  }

 private:
  // Parsing states for the finite-state machine.
  enum class State {
    /* clang-format off */
    // State                      Transitions to state      When
    kFirstReadOfChildId,       // kFinishingReadingChildId  size(id)  > 1
                               // kReadingChildSize         size(id) == 1
                               // kEndReached               EOF
    kFinishingReadingChildId,  // kReadingChildSize         done
    kReadingChildSize,         // kValidatingChildSize      done
    kValidatingChildSize,      // kGettingAction            done
                               // kEndReached               unknown id & unsized
    kGettingAction,            // kInitializingChildParser  done
    kInitializingChildParser,  // kReadingChildBody         done
    kReadingChildBody,         // kChildFullyParsed         child parse done
    kChildFullyParsed,         // kValidatingChildSize      cached metadata
                               // kFirstReadOfChildId       read  < my_size_
                               // kEndReached               read == my_size_
    kEndReached,               // No transitions from here (must call Init)
    /* clang-format on */
  };

  using StdHashId = std::hash<std::underlying_type<Id>::type>;

  // Hash functor for hashing Id enums for storage in std::unordered_map.
  struct IdHash : StdHashId {
    // Type aliases for conforming to the std::hash interface.
    using argument_type = Id;
    using result_type = StdHashId::result_type;

    // Returns the hash of the given id.
    result_type operator()(argument_type id) const {
      return StdHashId::operator()(static_cast<StdHashId::argument_type>(id));
    }
  };

  // The parser for parsing element Ids.
  IdParser id_parser_;

  // The parser for parsing element sizes.
  SizeParser size_parser_;

  // Metadata for the child element that is currently being parsed.
  ElementMetadata child_metadata_;

  // Maps child IDs to the appropriate parser that can handle that child.
  std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_;

  // The parser that is used to parse unknown children.
  UnknownParser unknown_parser_;

  // The parser that is used to skip over children.
  SkipParser skip_parser_;

  // The parser that is being used to parse the current child. This must be null
  // or a pointer in parsers_.
  ElementParser* child_parser_;

  // The current parsing action for the child that is currently being parsed.
  Action action_ = Action::kRead;

  // The current state of the parser.
  State state_;

  std::uint32_t header_size_;

  // The size of this element.
  std::uint64_t my_size_;

  std::uint64_t my_position_;

  std::uint64_t max_size_;

  // The total number of bytes read by this parser.
  std::uint64_t total_bytes_read_;

  // Set to true if parsing has completed and this parser consumed an extra
  // element header (ID and size) that wasn't from a child.
  bool has_cached_metadata_ = false;

  // Inserts the parser into the parsers_ map and asserts it is the only parser
  // registers to parse the corresponding Id.
  template <typename T>
  void InsertParser(T&& parser) {
    bool inserted = parsers_.insert(std::forward<T>(parser)).second;
    (void)inserted;  // Silence unused variable warning.
    assert(inserted);  // Make sure there aren't duplicates.
  }

  // Common initialization logic for Init/InitAfterseek.
  void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes,
                 std::uint64_t position);

  // Resets the internal parsers in preparation for parsing the next child.
  void PrepareForNextChild();
};

}  // namespace webm

#endif  // SRC_MASTER_PARSER_H_