aboutsummaryrefslogtreecommitdiff
path: root/libgav1/src/obu_parser.h
blob: 22a23966b5196c168d06c58176e58a495c47d252 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/*
 * Copyright 2019 The libgav1 Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LIBGAV1_SRC_OBU_PARSER_H_
#define LIBGAV1_SRC_OBU_PARSER_H_

#include <array>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <type_traits>

#include "src/buffer_pool.h"
#include "src/decoder_state.h"
#include "src/dsp/common.h"
#include "src/gav1/decoder_buffer.h"
#include "src/gav1/status_code.h"
#include "src/quantizer.h"
#include "src/utils/common.h"
#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/raw_bit_reader.h"
#include "src/utils/segmentation.h"
#include "src/utils/vector.h"

namespace libgav1 {

// structs and enums related to Open Bitstream Units (OBU).

enum {
  kMinimumMajorBitstreamLevel = 2,
  kSelectScreenContentTools = 2,
  kSelectIntegerMv = 2,
  kLoopRestorationTileSizeMax = 256,
  kGlobalMotionAlphaBits = 12,
  kGlobalMotionTranslationBits = 12,
  kGlobalMotionTranslationOnlyBits = 9,
  kGlobalMotionAlphaPrecisionBits = 15,
  kGlobalMotionTranslationPrecisionBits = 6,
  kGlobalMotionTranslationOnlyPrecisionBits = 3,
  kMaxTileWidth = 4096,
  kMaxTileArea = 4096 * 2304,
  kPrimaryReferenceNone = 7,
  // A special value of the scalability_mode_idc syntax element that indicates
  // the picture prediction structure is specified in scalability_structure().
  kScalabilitySS = 14
};  // anonymous enum

struct ObuHeader {
  ObuType type;
  bool has_extension;
  bool has_size_field;
  int8_t temporal_id;
  int8_t spatial_id;
};

enum BitstreamProfile : uint8_t {
  kProfile0,
  kProfile1,
  kProfile2,
  kMaxProfiles
};

// In the bitstream the level is encoded in five bits: the first three bits
// encode |major| - 2 and the last two bits encode |minor|.
//
// If the mapped level (major.minor) is in the tables in Annex A.3, there are
// bitstream conformance requirements on the maximum or minimum values of
// several variables. The encoded value of 31 (which corresponds to the mapped
// level 9.3) is the "maximum parameters" level and imposes no level-based
// constraints on the bitstream.
struct BitStreamLevel {
  uint8_t major;  // Range: 2-9.
  uint8_t minor;  // Range: 0-3.
};

struct ColorConfig {
  int8_t bitdepth;
  bool is_monochrome;
  ColorPrimary color_primary;
  TransferCharacteristics transfer_characteristics;
  MatrixCoefficients matrix_coefficients;
  // A binary value (0 or 1) that is associated with the VideoFullRangeFlag
  // variable specified in ISO/IEC 23091-4/ITUT H.273.
  // * 0: the studio swing representation.
  // * 1: the full swing representation.
  ColorRange color_range;
  int8_t subsampling_x;
  int8_t subsampling_y;
  ChromaSamplePosition chroma_sample_position;
  bool separate_uv_delta_q;
};

struct TimingInfo {
  uint32_t num_units_in_tick;
  uint32_t time_scale;
  bool equal_picture_interval;
  uint32_t num_ticks_per_picture;
};

struct DecoderModelInfo {
  uint8_t encoder_decoder_buffer_delay_length;
  uint32_t num_units_in_decoding_tick;
  uint8_t buffer_removal_time_length;
  uint8_t frame_presentation_time_length;
};

struct OperatingParameters {
  uint32_t decoder_buffer_delay[kMaxOperatingPoints];
  uint32_t encoder_buffer_delay[kMaxOperatingPoints];
  bool low_delay_mode_flag[kMaxOperatingPoints];
};

struct ObuSequenceHeader {
  // Section 7.5:
  //   Within a particular coded video sequence, the contents of
  //   sequence_header_obu must be bit-identical each time the sequence header
  //   appears except for the contents of operating_parameters_info. A new
  //   coded video sequence is required if the sequence header parameters
  //   change.
  //
  // IMPORTANT: ParametersChanged() is implemented with a memcmp() call. For
  // this to work, this object and the |old| object must be initialized with
  // an empty brace-enclosed list, which initializes any padding to zero bits.
  // See https://en.cppreference.com/w/cpp/language/zero_initialization.
  bool ParametersChanged(const ObuSequenceHeader& old) const;

  BitstreamProfile profile;
  bool still_picture;
  bool reduced_still_picture_header;
  int operating_points;
  int operating_point_idc[kMaxOperatingPoints];
  BitStreamLevel level[kMaxOperatingPoints];
  int8_t tier[kMaxOperatingPoints];
  int8_t frame_width_bits;
  int8_t frame_height_bits;
  int32_t max_frame_width;
  int32_t max_frame_height;
  bool frame_id_numbers_present;
  int8_t frame_id_length_bits;
  int8_t delta_frame_id_length_bits;
  bool use_128x128_superblock;
  bool enable_filter_intra;
  bool enable_intra_edge_filter;
  bool enable_interintra_compound;
  bool enable_masked_compound;
  bool enable_warped_motion;
  bool enable_dual_filter;
  bool enable_order_hint;
  // If enable_order_hint is true, order_hint_bits is in the range [1, 8].
  // If enable_order_hint is false, order_hint_bits is 0.
  int8_t order_hint_bits;
  // order_hint_shift_bits equals (32 - order_hint_bits) % 32.
  // This is used frequently in GetRelativeDistance().
  uint8_t order_hint_shift_bits;
  bool enable_jnt_comp;
  bool enable_ref_frame_mvs;
  bool choose_screen_content_tools;
  int8_t force_screen_content_tools;
  bool choose_integer_mv;
  int8_t force_integer_mv;
  bool enable_superres;
  bool enable_cdef;
  bool enable_restoration;
  ColorConfig color_config;
  bool timing_info_present_flag;
  TimingInfo timing_info;
  bool decoder_model_info_present_flag;
  DecoderModelInfo decoder_model_info;
  bool decoder_model_present_for_operating_point[kMaxOperatingPoints];
  bool initial_display_delay_present_flag;
  uint8_t initial_display_delay[kMaxOperatingPoints];
  bool film_grain_params_present;

  // IMPORTANT: the operating_parameters member must be at the end of the
  // struct so that ParametersChanged() can be implemented with a memcmp()
  // call.
  OperatingParameters operating_parameters;
};
// Verify it is safe to use offsetof with ObuSequenceHeader and to use memcmp
// to compare two ObuSequenceHeader objects.
static_assert(std::is_standard_layout<ObuSequenceHeader>::value, "");
// Verify operating_parameters is the last member of ObuSequenceHeader. The
// second assertion assumes that ObuSequenceHeader has no padding after the
// operating_parameters field. The first assertion is a sufficient condition
// for ObuSequenceHeader to have no padding after the operating_parameters
// field.
static_assert(alignof(ObuSequenceHeader) == alignof(OperatingParameters), "");
static_assert(sizeof(ObuSequenceHeader) ==
                  offsetof(ObuSequenceHeader, operating_parameters) +
                      sizeof(OperatingParameters),
              "");

struct TileBuffer {
  const uint8_t* data;
  size_t size;
};

enum MetadataType : uint8_t {
  // 0 is reserved for AOM use.
  kMetadataTypeHdrContentLightLevel = 1,
  kMetadataTypeHdrMasteringDisplayColorVolume = 2,
  kMetadataTypeScalability = 3,
  kMetadataTypeItutT35 = 4,
  kMetadataTypeTimecode = 5,
  // 6-31 are unregistered user private.
  // 32 and greater are reserved for AOM use.
};

struct ObuMetadata {
  // Maximum content light level.
  uint16_t max_cll;
  // Maximum frame-average light level.
  uint16_t max_fall;
  uint16_t primary_chromaticity_x[3];
  uint16_t primary_chromaticity_y[3];
  uint16_t white_point_chromaticity_x;
  uint16_t white_point_chromaticity_y;
  uint32_t luminance_max;
  uint32_t luminance_min;
  // ITU-T T.35.
  uint8_t itu_t_t35_country_code;
  uint8_t itu_t_t35_country_code_extension_byte;  // Valid if
                                                  // itu_t_t35_country_code is
                                                  // 0xFF.
  std::unique_ptr<uint8_t[]> itu_t_t35_payload_bytes;
  size_t itu_t_t35_payload_size;
};

class ObuParser : public Allocable {
 public:
  ObuParser(const uint8_t* const data, size_t size, int operating_point,
            BufferPool* const buffer_pool, DecoderState* const decoder_state)
      : data_(data),
        size_(size),
        operating_point_(operating_point),
        buffer_pool_(buffer_pool),
        decoder_state_(*decoder_state) {}

  // Not copyable or movable.
  ObuParser(const ObuParser& rhs) = delete;
  ObuParser& operator=(const ObuParser& rhs) = delete;

  // Returns true if there is more data that needs to be parsed.
  bool HasData() const;

  // Parses a sequence of Open Bitstream Units until a decodable frame is found
  // (or until the end of stream is reached). A decodable frame is considered to
  // be found when one of the following happens:
  //   * A kObuFrame is seen.
  //   * The kObuTileGroup containing the last tile is seen.
  //   * A kFrameHeader with show_existing_frame = true is seen.
  //
  // If the parsing is successful, relevant fields will be populated. The fields
  // are valid only if the return value is kStatusOk. Returns kStatusOk on
  // success, an error status otherwise. On success, |current_frame| will be
  // populated with a valid frame buffer.
  StatusCode ParseOneFrame(RefCountedBufferPtr* current_frame);

  // Getters. Only valid if ParseOneFrame() completes successfully.
  const Vector<ObuHeader>& obu_headers() const { return obu_headers_; }
  const ObuSequenceHeader& sequence_header() const { return sequence_header_; }
  const ObuFrameHeader& frame_header() const { return frame_header_; }
  const Vector<TileBuffer>& tile_buffers() const { return tile_buffers_; }
  const ObuMetadata& metadata() const { return metadata_; }

  // Setters.
  void set_sequence_header(const ObuSequenceHeader& sequence_header) {
    sequence_header_ = sequence_header;
    has_sequence_header_ = true;
  }

  // Moves |tile_buffers_| into |tile_buffers|.
  void MoveTileBuffer(Vector<TileBuffer>* tile_buffers) {
    *tile_buffers = std::move(tile_buffers_);
  }

 private:
  // Initializes the bit reader. This is a function of its own to make unit
  // testing of private functions simpler.
  LIBGAV1_MUST_USE_RESULT bool InitBitReader(const uint8_t* data, size_t size);

  // Parse helper functions.
  bool ParseHeader();  // 5.3.2 and 5.3.3.
  bool ParseColorConfig(ObuSequenceHeader* sequence_header);       // 5.5.2.
  bool ParseTimingInfo(ObuSequenceHeader* sequence_header);        // 5.5.3.
  bool ParseDecoderModelInfo(ObuSequenceHeader* sequence_header);  // 5.5.4.
  bool ParseOperatingParameters(ObuSequenceHeader* sequence_header,
                                int index);          // 5.5.5.
  bool ParseSequenceHeader(bool seen_frame_header);  // 5.5.1.
  bool ParseFrameParameters();                       // 5.9.2, 5.9.7 and 5.9.10.
  void MarkInvalidReferenceFrames();                 // 5.9.4.
  bool ParseFrameSizeAndRenderSize();                // 5.9.5 and 5.9.6.
  bool ParseSuperResParametersAndComputeImageSize();  // 5.9.8 and 5.9.9.
  // Checks the bitstream conformance requirement in Section 6.8.6.
  bool ValidateInterFrameSize() const;
  bool ParseReferenceOrderHint();
  static int FindLatestBackwardReference(
      const int current_frame_hint,
      const std::array<int, kNumReferenceFrameTypes>& shifted_order_hints,
      const std::array<bool, kNumReferenceFrameTypes>& used_frame);
  static int FindEarliestBackwardReference(
      const int current_frame_hint,
      const std::array<int, kNumReferenceFrameTypes>& shifted_order_hints,
      const std::array<bool, kNumReferenceFrameTypes>& used_frame);
  static int FindLatestForwardReference(
      const int current_frame_hint,
      const std::array<int, kNumReferenceFrameTypes>& shifted_order_hints,
      const std::array<bool, kNumReferenceFrameTypes>& used_frame);
  static int FindReferenceWithSmallestOutputOrder(
      const std::array<int, kNumReferenceFrameTypes>& shifted_order_hints);
  bool SetFrameReferences(int8_t last_frame_idx,
                          int8_t gold_frame_idx);  // 7.8.
  bool ParseLoopFilterParameters();                // 5.9.11.
  bool ParseDeltaQuantizer(int8_t* delta);         // 5.9.13.
  bool ParseQuantizerParameters();                 // 5.9.12.
  bool ParseSegmentationParameters();              // 5.9.14.
  bool ParseQuantizerIndexDeltaParameters();       // 5.9.17.
  bool ParseLoopFilterDeltaParameters();           // 5.9.18.
  void ComputeSegmentLosslessAndQIndex();
  bool ParseCdefParameters();             // 5.9.19.
  bool ParseLoopRestorationParameters();  // 5.9.20.
  bool ParseTxModeSyntax();               // 5.9.21.
  bool ParseFrameReferenceModeSyntax();   // 5.9.23.
  // Returns whether skip mode is allowed. When it returns true, it also sets
  // the frame_header_.skip_mode_frame array.
  bool IsSkipModeAllowed();
  bool ParseSkipModeParameters();  // 5.9.22.
  bool ReadAllowWarpedMotion();
  bool ParseGlobalParamSyntax(
      int ref, int index,
      const std::array<GlobalMotion, kNumReferenceFrameTypes>&
          prev_global_motions);        // 5.9.25.
  bool ParseGlobalMotionParameters();  // 5.9.24.
  bool ParseFilmGrainParameters();     // 5.9.30.
  bool ParseTileInfoSyntax();          // 5.9.15.
  bool ParseFrameHeader();             // 5.9.
  // |data| and |size| specify the payload data of the padding OBU.
  // NOTE: Although the payload data is available in the bit_reader_ member,
  // it is also passed to ParsePadding() as function parameters so that
  // ParsePadding() can find the trailing bit of the OBU and skip over the
  // payload data as an opaque chunk of data.
  bool ParsePadding(const uint8_t* data, size_t size);  // 5.7.
  bool ParseMetadataScalability();                      // 5.8.5 and 5.8.6.
  bool ParseMetadataTimecode();                         // 5.8.7.
  // |data| and |size| specify the payload data of the metadata OBU.
  // NOTE: Although the payload data is available in the bit_reader_ member,
  // it is also passed to ParseMetadata() as function parameters so that
  // ParseMetadata() can find the trailing bit of the OBU and either extract
  // or skip over the payload data as an opaque chunk of data.
  bool ParseMetadata(const uint8_t* data, size_t size);  // 5.8.
  // Adds and populates the TileBuffer for each tile in the tile group.
  bool AddTileBuffers(int start, int end, size_t total_size,
                      size_t tg_header_size, size_t bytes_consumed_so_far);
  bool ParseTileGroup(size_t size, size_t bytes_consumed_so_far);  // 5.11.1.

  // Parser elements.
  std::unique_ptr<RawBitReader> bit_reader_;
  const uint8_t* data_;
  size_t size_;
  const int operating_point_;

  // OBU elements. Only valid if ParseOneFrame() completes successfully.
  Vector<ObuHeader> obu_headers_;
  ObuSequenceHeader sequence_header_ = {};
  ObuFrameHeader frame_header_ = {};
  Vector<TileBuffer> tile_buffers_;
  ObuMetadata metadata_ = {};
  // The expected starting tile number of the next Tile Group.
  int next_tile_group_start_ = 0;
  // If true, the sequence_header_ field is valid.
  bool has_sequence_header_ = false;
  // If true, the obu_extension_flag syntax element in the OBU header must be
  // 0. Set to true when parsing a sequence header if OperatingPointIdc is 0.
  bool extension_disallowed_ = false;

  BufferPool* const buffer_pool_;
  DecoderState& decoder_state_;
  // Used by ParseOneFrame() to populate the current frame that is being
  // decoded. The invariant maintained is that this variable will be nullptr at
  // the beginning and at the end of each call to ParseOneFrame(). This ensures
  // that the ObuParser is not holding on to any references to the current
  // frame once the ParseOneFrame() call is complete.
  RefCountedBufferPtr current_frame_;

  // For unit testing private functions.
  friend class ObuParserTest;
};

}  // namespace libgav1

#endif  // LIBGAV1_SRC_OBU_PARSER_H_