aboutsummaryrefslogtreecommitdiff
path: root/libgav1/src/tile/tile.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libgav1/src/tile/tile.cc')
-rw-r--r--libgav1/src/tile/tile.cc1802
1 files changed, 994 insertions, 808 deletions
diff --git a/libgav1/src/tile/tile.cc b/libgav1/src/tile/tile.cc
index 96c724f..f79158f 100644
--- a/libgav1/src/tile/tile.cc
+++ b/libgav1/src/tile/tile.cc
@@ -17,6 +17,7 @@
#include <algorithm>
#include <array>
#include <cassert>
+#include <climits>
#include <cstdlib>
#include <cstring>
#include <memory>
@@ -25,9 +26,12 @@
#include <type_traits>
#include <utility>
+#include "src/frame_scratch_buffer.h"
#include "src/motion_vector.h"
#include "src/reconstruction.h"
#include "src/utils/bit_mask_set.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
#include "src/utils/logging.h"
#include "src/utils/segmentation.h"
#include "src/utils/stack.h"
@@ -45,8 +49,6 @@ constexpr int kReferenceScaleShift = 14;
// process is activated.
constexpr int kQuantizerCoefficientBaseRange = 12;
constexpr int kNumQuantizerBaseLevels = 2;
-constexpr int kQuantizerCoefficientBaseRangeContextClamp =
- kQuantizerCoefficientBaseRange + kNumQuantizerBaseLevels + 1;
constexpr int kCoeffBaseRangeMaxIterations =
kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
constexpr int kEntropyContextLeft = 0;
@@ -99,6 +101,14 @@ constexpr PredictionMode
kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
kPredictionModeD157, kPredictionModeDc};
+// Mask used to determine the index for mode_deltas lookup.
+constexpr BitMaskSet kPredictionModeDeltasMask(
+ kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
+ kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
+ kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
+ kPredictionModeNearNewMv, kPredictionModeNewNearMv,
+ kPredictionModeNewNewMv);
+
// This is computed as:
// min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
@@ -146,7 +156,10 @@ constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
{6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
/* clang-format on */
-constexpr uint8_t kCoeffBasePositionContextOffset[3] = {26, 31, 36};
+// Extended the table size from 3 to 16 by repeating the last element to avoid
+// the clips to row or column indices.
+constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
+ 26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
@@ -235,7 +248,7 @@ constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
-constexpr int8_t kWienerDefaultFilter[3] = {3, -7, 15};
+constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
// Maps compound prediction modes into single modes. For e.g.
// kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
@@ -264,31 +277,8 @@ PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
// log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
// dqDenom is always a power of two and hence right shift can be used instead of
// division.
-constexpr BitMaskSet kQuantizationShift2Mask(kTransformSize32x64,
- kTransformSize64x32,
- kTransformSize64x64);
-constexpr BitMaskSet kQuantizationShift1Mask(kTransformSize16x32,
- kTransformSize16x64,
- kTransformSize32x16,
- kTransformSize32x32,
- kTransformSize64x16);
-int GetQuantizationShift(TransformSize tx_size) {
- if (kQuantizationShift2Mask.Contains(tx_size)) {
- return 2;
- }
- if (kQuantizationShift1Mask.Contains(tx_size)) {
- return 1;
- }
- return 0;
-}
-
-// Input: 1d array index |index|, which indexes into a 2d array of width
-// 1 << |tx_width_log2|.
-// Output: 1d array index which indexes into a 2d array of width
-// (1 << |tx_width_log2|) + kQuantizedCoefficientBufferPadding.
-int PaddedIndex(int index, int tx_width_log2) {
- return index + MultiplyBy4(index >> tx_width_log2);
-}
+constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
// Returns the minimum of |length| or |max|-|start|. This is used to clamp array
// indices when accessing arrays whose bound is equal to |max|.
@@ -296,40 +286,151 @@ int GetNumElements(int length, int start, int max) {
return std::min(length, max - start);
}
+template <typename T>
+void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
+ // Specialize all columns cases (values in kTransformWidth4x4[]) for better
+ // performance.
+ switch (columns) {
+ case 1:
+ MemSetBlock<T>(rows, 1, value, dst, stride);
+ break;
+ case 2:
+ MemSetBlock<T>(rows, 2, value, dst, stride);
+ break;
+ case 4:
+ MemSetBlock<T>(rows, 4, value, dst, stride);
+ break;
+ case 8:
+ MemSetBlock<T>(rows, 8, value, dst, stride);
+ break;
+ default:
+ assert(columns == 16);
+ MemSetBlock<T>(rows, 16, value, dst, stride);
+ break;
+ }
+}
+
void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
TransformType tx_type,
TransformType transform_types[32][32]) {
const int y_offset = y4 - block.row4x4;
const int x_offset = x4 - block.column4x4;
- static_assert(sizeof(transform_types[0][0]) == 1, "");
- for (int i = 0; i < h4; ++i) {
- memset(&transform_types[y_offset + i][x_offset], tx_type, w4);
- }
+ TransformType* const dst = &transform_types[y_offset][x_offset];
+ SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
+}
+
+void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
+ const MotionVector& mv_to_store, ptrdiff_t stride,
+ int rows, int columns,
+ ReferenceFrameType* reference_frame_row_start,
+ MotionVector* mv) {
+ static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
+ do {
+ // Don't switch the following two memory setting functions.
+ // Some ARM CPUs are quite sensitive to the order.
+ memset(reference_frame_row_start, reference_frame_to_store, columns);
+ std::fill(mv, mv + columns, mv_to_store);
+ reference_frame_row_start += stride;
+ mv += stride;
+ } while (--rows != 0);
+}
+
+// Inverse transform process assumes that the quantized coefficients are stored
+// as a virtual 2d array of size |tx_width| x tx_height. If transform width is
+// 64, then this assumption is broken because the scan order used for populating
+// the coefficients for such transforms is the same as the one used for
+// corresponding transform with width 32 (e.g. the scan order used for 64x16 is
+// the same as the one used for 32x16). So we must restore the coefficients to
+// their correct positions and clean the positions they occupied.
+template <typename ResidualType>
+void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
+ ResidualType* residual) {
+ if (tx_width != 64) return;
+ const int rows = clamped_tx_height - 2;
+ auto* src = residual + 32 * rows;
+ residual += 64 * rows;
+ // Process 2 rows in each loop in reverse order to avoid overwrite.
+ int x = rows >> 1;
+ do {
+ // The 2 rows can be processed in order.
+ memcpy(residual, src, 32 * sizeof(src[0]));
+ memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
+ memset(src + 32, 0, 32 * sizeof(src[0]));
+ src -= 64;
+ residual -= 128;
+ } while (--x);
+ // Process the second row. The first row is already correct.
+ memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
+ memset(src + 32, 0, 32 * sizeof(src[0]));
+}
+
+void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
+ // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
+ // and 5.11.54).
+ constexpr int kMvBorder4x4 = 4;
+ const int row_border = kMvBorder4x4 + block.height4x4;
+ const int column_border = kMvBorder4x4 + block.width4x4;
+ const int macroblocks_to_top_edge = -block.row4x4;
+ const int macroblocks_to_bottom_edge =
+ block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
+ const int macroblocks_to_left_edge = -block.column4x4;
+ const int macroblocks_to_right_edge =
+ block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
+ min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
+ min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
+ max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
+ max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
+}
+
+// Section 8.3.2 in the spec, under coeff_base_eob.
+int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
+ if (index == 0) return 0;
+ const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
+ const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
+ const int tx_height = kTransformHeight[adjusted_tx_size];
+ if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
+ if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
+ return 3;
+}
+
+// Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
+// on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
+// the end of block case.
+int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
+ TransformClass tx_class) {
+ if (pos == 0) return 0;
+ const int tx_width = 1 << adjusted_tx_width_log2;
+ const int row = pos >> adjusted_tx_width_log2;
+ const int column = pos & (tx_width - 1);
+ // This return statement is equivalent to:
+ // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
+ // (tx_class == kTransformClassHorizontal && column == 0) ||
+ // (tx_class == kTransformClassVertical && row == 0))
+ // ? 7
+ // : 14;
+ return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
+ static_cast<int>((row | column) < 2)) |
+ (tx_class & static_cast<int>(column == 0)) |
+ ((tx_class >> 1) & static_cast<int>(row == 0)));
}
} // namespace
-Tile::Tile(
- int tile_number, const uint8_t* const data, size_t size,
- const ObuSequenceHeader& sequence_header,
- const ObuFrameHeader& frame_header, RefCountedBuffer* const current_frame,
- const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias,
- const std::array<RefCountedBufferPtr, kNumReferenceFrameTypes>&
- reference_frames,
- Array2D<TemporalMotionVector>* const motion_field_mv,
- const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint,
- const std::array<uint8_t, kWedgeMaskSize>& wedge_masks,
- const SymbolDecoderContext& symbol_decoder_context,
- SymbolDecoderContext* const saved_symbol_decoder_context,
- const SegmentationMap* prev_segment_ids, PostFilter* const post_filter,
- BlockParametersHolder* const block_parameters_holder,
- Array2D<int16_t>* const cdef_index,
- Array2D<TransformSize>* const inter_transform_sizes,
- const dsp::Dsp* const dsp, ThreadPool* const thread_pool,
- ResidualBufferPool* const residual_buffer_pool,
- DecoderScratchBufferPool* const decoder_scratch_buffer_pool,
- BlockingCounterWithStatus* const pending_tiles)
+Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
+ const ObuSequenceHeader& sequence_header,
+ const ObuFrameHeader& frame_header,
+ RefCountedBuffer* const current_frame, const DecoderState& state,
+ FrameScratchBuffer* const frame_scratch_buffer,
+ const WedgeMaskArray& wedge_masks,
+ SymbolDecoderContext* const saved_symbol_decoder_context,
+ const SegmentationMap* prev_segment_ids,
+ PostFilter* const post_filter, const dsp::Dsp* const dsp,
+ ThreadPool* const thread_pool,
+ BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
+ bool use_intra_prediction_buffer)
: number_(tile_number),
+ row_(number_ / frame_header.tile_info.tile_columns),
+ column_(number_ % frame_header.tile_info.tile_columns),
data_(data),
size_(size),
read_deltas_(false),
@@ -340,19 +441,18 @@ Tile::Tile(
current_quantizer_index_(frame_header.quantizer.base_index),
sequence_header_(sequence_header),
frame_header_(frame_header),
- current_frame_(*current_frame),
- reference_frame_sign_bias_(reference_frame_sign_bias),
- reference_frames_(reference_frames),
- motion_field_mv_(motion_field_mv),
- reference_order_hint_(reference_order_hint),
+ reference_frame_sign_bias_(state.reference_frame_sign_bias),
+ reference_frames_(state.reference_frame),
+ motion_field_(frame_scratch_buffer->motion_field),
+ reference_order_hint_(state.reference_order_hint),
wedge_masks_(wedge_masks),
reader_(data_, size_, frame_header_.enable_cdf_update),
- symbol_decoder_context_(symbol_decoder_context),
+ symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
saved_symbol_decoder_context_(saved_symbol_decoder_context),
prev_segment_ids_(prev_segment_ids),
dsp_(*dsp),
post_filter_(*post_filter),
- block_parameters_holder_(*block_parameters_holder),
+ block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
quantizer_(sequence_header_.color_config.bitdepth,
&frame_header_.quantizer),
residual_size_((sequence_header_.color_config.bitdepth == 8)
@@ -362,15 +462,20 @@ Tile::Tile(
frame_header_.allow_intrabc
? (sequence_header_.use_128x128_superblock ? 3 : 5)
: 1),
- cdef_index_(*cdef_index),
- inter_transform_sizes_(*inter_transform_sizes),
+ current_frame_(*current_frame),
+ cdef_index_(frame_scratch_buffer->cdef_index),
+ inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
thread_pool_(thread_pool),
- residual_buffer_pool_(residual_buffer_pool),
- decoder_scratch_buffer_pool_(decoder_scratch_buffer_pool),
+ residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
+ tile_scratch_buffer_pool_(
+ &frame_scratch_buffer->tile_scratch_buffer_pool),
pending_tiles_(pending_tiles),
- build_bit_mask_when_parsing_(false) {
- row_ = number_ / frame_header.tile_info.tile_columns;
- column_ = number_ % frame_header.tile_info.tile_columns;
+ frame_parallel_(frame_parallel),
+ use_intra_prediction_buffer_(use_intra_prediction_buffer),
+ intra_prediction_buffer_(
+ use_intra_prediction_buffer_
+ ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
+ : nullptr) {
row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
@@ -382,16 +487,71 @@ Tile::Tile(
superblock_columns_ =
(column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
block_width4x4_log2;
- // Enable multi-threading within a tile only if there are at least as many
- // superblock columns as |intra_block_copy_lag_|.
- split_parse_and_decode_ =
- thread_pool_ != nullptr && superblock_columns_ > intra_block_copy_lag_;
+ // If |split_parse_and_decode_| is true, we do the necessary setup for
+ // splitting the parsing and the decoding steps. This is done in the following
+ // two cases:
+ // 1) If there is multi-threading within a tile (this is done if
+ // |thread_pool_| is not nullptr and if there are at least as many
+ // superblock columns as |intra_block_copy_lag_|).
+ // 2) If |frame_parallel| is true.
+ split_parse_and_decode_ = (thread_pool_ != nullptr &&
+ superblock_columns_ > intra_block_copy_lag_) ||
+ frame_parallel;
+ if (frame_parallel_) {
+ reference_frame_progress_cache_.fill(INT_MIN);
+ }
memset(delta_lf_, 0, sizeof(delta_lf_));
delta_lf_all_zero_ = true;
- YuvBuffer* const buffer = current_frame->buffer();
+ const YuvBuffer& buffer = post_filter_.frame_buffer();
for (int plane = 0; plane < PlaneCount(); ++plane) {
- buffer_[plane].Reset(buffer->height(plane) + buffer->bottom_border(plane),
- buffer->stride(plane), buffer->data(plane));
+ // Verify that the borders are big enough for Reconstruct(). max_tx_length
+ // is the maximum value of tx_width and tx_height for the plane.
+ const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
+ // Reconstruct() may overwrite on the right. Since the right border of a
+ // row is followed in memory by the left border of the next row, the
+ // number of extra pixels to the right of a row is at least the sum of the
+ // left and right borders.
+ //
+ // Note: This assertion actually checks the sum of the left and right
+ // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
+ // and vertically shifted version of |buffer|. Since the sum of the left and
+ // right borders is not changed by the shift, we can just check the sum of
+ // the left and right borders of |buffer|.
+ assert(buffer.left_border(plane) + buffer.right_border(plane) >=
+ max_tx_length - 1);
+ // Reconstruct() may overwrite on the bottom. We need an extra border row
+ // on the bottom because we need the left border of that row.
+ //
+ // Note: This assertion checks the bottom border of
+ // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
+ // shift that the PostFilter constructor applied to |buffer| and reduce the
+ // bottom border by that amount.
+#ifndef NDEBUG
+ const int vertical_shift = static_cast<int>(
+ (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
+ buffer.stride(plane));
+ const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
+ assert(bottom_border >= max_tx_length);
+#endif
+ // In AV1, a transform block of height H starts at a y coordinate that is
+ // a multiple of H. If a transform block at the bottom of the frame has
+ // height H, then Reconstruct() will write up to the row with index
+ // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
+ // rows Reconstruct() may write to is
+ // Align(buffer.height(plane), max_tx_length).
+ buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
+ buffer.stride(plane),
+ post_filter_.GetUnfilteredBuffer(plane));
+ const int plane_height =
+ RightShiftWithRounding(frame_header_.height, subsampling_y_[plane]);
+ deblock_row_limit_[plane] =
+ std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3)
+ << subsampling_y_[plane]);
+ const int plane_width =
+ RightShiftWithRounding(frame_header_.width, subsampling_x_[plane]);
+ deblock_column_limit_[plane] =
+ std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3)
+ << subsampling_x_[plane]);
}
}
@@ -418,7 +578,10 @@ bool Tile::Init() {
return false;
}
} else {
- residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(32, 4096 * residual_size_);
+ // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
+ // checks when parsing quantized coefficients.
+ residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
+ 32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
if (residual_buffer_ == nullptr) {
LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
return false;
@@ -429,62 +592,165 @@ bool Tile::Init() {
return false;
}
}
+ if (frame_header_.use_ref_frame_mvs) {
+ assert(sequence_header_.enable_order_hint);
+ SetupMotionField(frame_header_, current_frame_, reference_frames_,
+ row4x4_start_, row4x4_end_, column4x4_start_,
+ column4x4_end_, &motion_field_);
+ }
+ ResetLoopRestorationParams();
return true;
}
-bool Tile::Decode(bool is_main_thread) {
- if (!Init()) {
- pending_tiles_->Decrement(false);
- return false;
+template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
+bool Tile::ProcessSuperBlockRow(int row4x4,
+ TileScratchBuffer* const scratch_buffer) {
+ if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
+ assert(scratch_buffer != nullptr);
+ const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+ for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
+ column4x4 += block_width4x4) {
+ if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, scratch_buffer,
+ processing_mode)) {
+ LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
+ row4x4, column4x4);
+ return false;
+ }
}
- if (frame_header_.use_ref_frame_mvs) {
- SetupMotionField(sequence_header_, frame_header_, current_frame_,
- reference_frames_, motion_field_mv_, row4x4_start_,
- row4x4_end_, column4x4_start_, column4x4_end_);
+ if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
+ SaveSymbolDecoderContext();
}
- ResetLoopRestorationParams();
+ if (processing_mode == kProcessingModeDecodeOnly ||
+ processing_mode == kProcessingModeParseAndDecode) {
+ PopulateIntraPredictionBuffer(row4x4);
+ }
+ return true;
+}
+
+// Used in frame parallel mode. The symbol decoder context need not be saved in
+// this case since it was done when parsing was complete.
+template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+ int row4x4, TileScratchBuffer* scratch_buffer);
+// Used in non frame parallel mode.
+template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
+ int row4x4, TileScratchBuffer* scratch_buffer);
+
+void Tile::SaveSymbolDecoderContext() {
+ if (frame_header_.enable_frame_end_update_cdf &&
+ number_ == frame_header_.tile_info.context_update_id) {
+ *saved_symbol_decoder_context_ = symbol_decoder_context_;
+ }
+}
+
+bool Tile::ParseAndDecode() {
// If this is the main thread, we build the loop filter bit masks when parsing
// so that it happens in the current thread. This ensures that the main thread
// does as much work as possible.
- build_bit_mask_when_parsing_ = is_main_thread;
if (split_parse_and_decode_) {
- if (!ThreadedDecode()) return false;
- } else {
- const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
- std::unique_ptr<DecoderScratchBuffer> scratch_buffer =
- decoder_scratch_buffer_pool_->Get();
- if (scratch_buffer == nullptr) {
+ if (!ThreadedParseAndDecode()) return false;
+ SaveSymbolDecoderContext();
+ return true;
+ }
+ std::unique_ptr<TileScratchBuffer> scratch_buffer =
+ tile_scratch_buffer_pool_->Get();
+ if (scratch_buffer == nullptr) {
+ pending_tiles_->Decrement(false);
+ LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+ return false;
+ }
+ const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+ for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
+ row4x4 += block_width4x4) {
+ if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
+ row4x4, scratch_buffer.get())) {
pending_tiles_->Decrement(false);
- LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
return false;
}
- for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
- row4x4 += block_width4x4) {
- for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
- column4x4 += block_width4x4) {
- if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4,
- scratch_buffer.get(),
- kProcessingModeParseAndDecode)) {
- pending_tiles_->Decrement(false);
- LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
- row4x4, column4x4);
- return false;
- }
- }
+ }
+ tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+ pending_tiles_->Decrement(true);
+ return true;
+}
+
+bool Tile::Parse() {
+ const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+ std::unique_ptr<TileScratchBuffer> scratch_buffer =
+ tile_scratch_buffer_pool_->Get();
+ if (scratch_buffer == nullptr) {
+ LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+ return false;
+ }
+ for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
+ row4x4 += block_width4x4) {
+ if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
+ row4x4, scratch_buffer.get())) {
+ return false;
}
- decoder_scratch_buffer_pool_->Release(std::move(scratch_buffer));
}
- if (frame_header_.enable_frame_end_update_cdf &&
- number_ == frame_header_.tile_info.context_update_id) {
- *saved_symbol_decoder_context_ = symbol_decoder_context_;
+ tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+ SaveSymbolDecoderContext();
+ return true;
+}
+
+bool Tile::Decode(
+ std::mutex* const mutex, int* const superblock_row_progress,
+ std::condition_variable* const superblock_row_progress_condvar) {
+ const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
+ const int block_width4x4_log2 =
+ sequence_header_.use_128x128_superblock ? 5 : 4;
+ std::unique_ptr<TileScratchBuffer> scratch_buffer =
+ tile_scratch_buffer_pool_->Get();
+ if (scratch_buffer == nullptr) {
+ LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
+ return false;
}
- if (!split_parse_and_decode_) {
- pending_tiles_->Decrement(true);
+ for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
+ row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
+ if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+ row4x4, scratch_buffer.get())) {
+ return false;
+ }
+ if (post_filter_.DoDeblock()) {
+ // Apply vertical deblock filtering for all the columns in this tile
+ // except for the first 64 columns.
+ post_filter_.ApplyDeblockFilter(
+ kLoopFilterTypeVertical, row4x4,
+ column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
+ block_width4x4);
+ // If this is the first superblock row of the tile, then we cannot apply
+ // horizontal deblocking here since we don't know if the top row is
+ // available. So it will be done by the calling thread in that case.
+ if (row4x4 != row4x4_start_) {
+ // Apply horizontal deblock filtering for all the columns in this tile
+ // except for the first and the last 64 columns.
+ // Note about the last tile of each row: For the last tile,
+ // column4x4_end may not be a multiple of 16. In that case it is still
+ // okay to simply subtract 16 since ApplyDeblockFilter() will only do
+ // the filters in increments of 64 columns (or 32 columns for chroma
+ // with subsampling).
+ post_filter_.ApplyDeblockFilter(
+ kLoopFilterTypeHorizontal, row4x4,
+ column4x4_start_ + kNum4x4InLoopFilterUnit,
+ column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
+ }
+ }
+ bool notify;
+ {
+ std::unique_lock<std::mutex> lock(*mutex);
+ notify = ++superblock_row_progress[index] ==
+ frame_header_.tile_info.tile_columns;
+ }
+ if (notify) {
+ // We are done decoding this superblock row. Notify the post filtering
+ // thread.
+ superblock_row_progress_condvar[index].notify_one();
+ }
}
+ tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
return true;
}
-bool Tile::ThreadedDecode() {
+bool Tile::ThreadedParseAndDecode() {
{
std::lock_guard<std::mutex> lock(threading_.mutex);
if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
@@ -499,8 +765,8 @@ bool Tile::ThreadedDecode() {
const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
// Begin parsing.
- std::unique_ptr<DecoderScratchBuffer> scratch_buffer =
- decoder_scratch_buffer_pool_->Get();
+ std::unique_ptr<TileScratchBuffer> scratch_buffer =
+ tile_scratch_buffer_pool_->Get();
if (scratch_buffer == nullptr) {
pending_tiles_->Decrement(false);
LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
@@ -535,7 +801,7 @@ bool Tile::ThreadedDecode() {
std::lock_guard<std::mutex> lock(threading_.mutex);
if (threading_.abort) break;
}
- decoder_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+ tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
// We are done parsing. We can return here since the calling thread will make
// sure that it waits for all the superblocks to be decoded.
@@ -593,13 +859,13 @@ void Tile::DecodeSuperBlock(int row_index, int column_index,
int block_width4x4) {
const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
- std::unique_ptr<DecoderScratchBuffer> scratch_buffer =
- decoder_scratch_buffer_pool_->Get();
+ std::unique_ptr<TileScratchBuffer> scratch_buffer =
+ tile_scratch_buffer_pool_->Get();
bool ok = scratch_buffer != nullptr;
if (ok) {
ok = ProcessSuperBlock(row4x4, column4x4, block_width4x4,
scratch_buffer.get(), kProcessingModeDecodeOnly);
- decoder_scratch_buffer_pool_->Release(std::move(scratch_buffer));
+ tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
}
std::unique_lock<std::mutex> lock(threading_.mutex);
if (ok) {
@@ -647,9 +913,38 @@ void Tile::DecodeSuperBlock(int row_index, int column_index,
}
}
-bool Tile::IsInside(int row4x4, int column4x4) const {
- return row4x4 >= row4x4_start_ && row4x4 < row4x4_end_ &&
- column4x4 >= column4x4_start_ && column4x4 < column4x4_end_;
+void Tile::PopulateIntraPredictionBuffer(int row4x4) {
+ const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
+ if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
+ return;
+ }
+ const size_t pixel_size =
+ (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
+ : sizeof(uint16_t));
+ for (int plane = 0; plane < PlaneCount(); ++plane) {
+ const int row_to_copy =
+ (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
+ const size_t pixels_to_copy =
+ (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
+ subsampling_x_[plane]) *
+ pixel_size;
+ const size_t column_start =
+ MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
+ void* start;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ if (sequence_header_.color_config.bitdepth > 8) {
+ Array2DView<uint16_t> buffer(
+ buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
+ reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
+ start = &buffer[row_to_copy][column_start];
+ } else // NOLINT
+#endif
+ {
+ start = &buffer_[plane][row_to_copy][column_start];
+ }
+ memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
+ start, pixels_to_copy);
+ }
}
int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
@@ -660,7 +955,7 @@ int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
const int tx_width = kTransformWidth[tx_size];
const int tx_height = kTransformHeight[tx_size];
- const BlockSize plane_size = block.residual_size[GetPlaneType(plane)];
+ const BlockSize plane_size = block.residual_size[plane];
const int block_width = kBlockWidthPixels[plane_size];
const int block_height = kBlockHeightPixels[plane_size];
@@ -785,150 +1080,167 @@ void Tile::ReadTransformType(const Block& block, int x4, int y4,
kTransformHeight4x4[tx_size], tx_type, transform_types_);
}
-// Section 8.3.2 in the spec, under coeff_base_eob.
-int Tile::GetCoeffBaseContextEob(TransformSize tx_size, int index) {
- if (index == 0) return 0;
- const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
- const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
- const int tx_height = kTransformHeight[adjusted_tx_size];
- if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
- if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
- return 3;
-}
-
-// Section 8.3.2 in the spec, under coeff_base.
-int Tile::GetCoeffBaseContext2D(const int32_t* const quantized_buffer,
- TransformSize tx_size,
- int adjusted_tx_width_log2, uint16_t pos) {
- if (pos == 0) return 0;
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// For a coefficient near the right boundary, the two right neighbors and the
+// one bottom-right neighbor may be out of boundary. We don't check the right
+// boundary for them, because the out of boundary neighbors project to positions
+// above the diagonal line which goes through the current coefficient and these
+// positions are still all 0s according to the diagonal scan order.
+template <typename ResidualType>
+void Tile::ReadCoeffBase2D(
+ const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
+ int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
+ uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+ ResidualType* const quantized_buffer) {
const int tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 4, DivideBy2(1 + (std::min(quantized[1], 3) + // {0, 1}
- std::min(quantized[padded_tx_width], 3) + // {1, 0}
- std::min(quantized[padded_tx_width + 1], 3) + // {1, 1}
- std::min(quantized[2], 3) + // {0, 2}
- std::min(quantized[MultiplyBy2(padded_tx_width)],
- 3)))); // {2, 0}
- const int row = pos >> adjusted_tx_width_log2;
- const int column = pos & (tx_width - 1);
- return context + kCoeffBaseContextOffset[tx_size][std::min(row, 4)]
- [std::min(column, 4)];
+ int i = eob - 2;
+ do {
+ constexpr auto threshold = static_cast<ResidualType>(3);
+ const uint16_t pos = scan[i];
+ const int row = pos >> adjusted_tx_width_log2;
+ const int column = pos & (tx_width - 1);
+ auto* const quantized = &quantized_buffer[pos];
+ int context;
+ if (pos == 0) {
+ context = 0;
+ } else {
+ context = std::min(
+ 4, DivideBy2(
+ 1 + (std::min(quantized[1], threshold) + // {0, 1}
+ std::min(quantized[tx_width], threshold) + // {1, 0}
+ std::min(quantized[tx_width + 1], threshold) + // {1, 1}
+ std::min(quantized[2], threshold) + // {0, 2}
+ std::min(quantized[MultiplyBy2(tx_width)],
+ threshold)))); // {2, 0}
+ context += kCoeffBaseContextOffset[tx_size][std::min(row, 4)]
+ [std::min(column, 4)];
+ }
+ int level =
+ reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+ if (level > kNumQuantizerBaseLevels) {
+ // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+ // + 1, because we clip the overall output to 6 and the unclipped
+ // quantized values will always result in an output of greater than 6.
+ context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
+ quantized[tx_width] + // {1, 0}
+ quantized[tx_width + 1])); // {1, 1}
+ if (pos != 0) {
+ context += 14 >> static_cast<int>((row | column) < 2);
+ }
+ level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
+ }
+ quantized[0] = level;
+ } while (--i >= 0);
}
-// Section 8.3.2 in the spec, under coeff_base.
-int Tile::GetCoeffBaseContextHorizontal(const int32_t* const quantized_buffer,
- TransformSize /*tx_size*/,
- int adjusted_tx_width_log2,
- uint16_t pos) {
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// For a coefficient near the right boundary, the four right neighbors may be
+// out of boundary. We don't do the boundary check for the first three right
+// neighbors, because even for the transform blocks with smallest width 4, the
+// first three out of boundary neighbors project to positions left of the
+// current coefficient and these positions are still all 0s according to the
+// column scan order. However, when transform block width is 4 and the current
+// coefficient is on the right boundary, its fourth right neighbor projects to
+// the under position on the same column, which could be nonzero. Therefore, we
+// must skip the fourth right neighbor. To make it simple, for any coefficient,
+// we always do the boundary check for its fourth right neighbor.
+template <typename ResidualType>
+void Tile::ReadCoeffBaseHorizontal(
+ const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
+ int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
+ uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+ ResidualType* const quantized_buffer) {
const int tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 4, DivideBy2(1 + (std::min(quantized[1], 3) + // {0, 1}
- std::min(quantized[padded_tx_width], 3) + // {1, 0}
- std::min(quantized[2], 3) + // {0, 2}
- std::min(quantized[3], 3) + // {0, 3}
- std::min(quantized[4], 3)))); // {0, 4}
- const int index = pos & (tx_width - 1);
- return context + kCoeffBasePositionContextOffset[std::min(index, 2)];
+ int i = eob - 2;
+ do {
+ constexpr auto threshold = static_cast<ResidualType>(3);
+ const uint16_t pos = scan[i];
+ const int column = pos & (tx_width - 1);
+ auto* const quantized = &quantized_buffer[pos];
+ int context = std::min(
+ 4,
+ DivideBy2(1 +
+ (std::min(quantized[1], threshold) + // {0, 1}
+ std::min(quantized[tx_width], threshold) + // {1, 0}
+ std::min(quantized[2], threshold) + // {0, 2}
+ std::min(quantized[3], threshold) + // {0, 3}
+ std::min(quantized[4],
+ static_cast<ResidualType>(
+ (column + 4 < tx_width) ? 3 : 0))))); // {0, 4}
+ context += kCoeffBasePositionContextOffset[column];
+ int level =
+ reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+ if (level > kNumQuantizerBaseLevels) {
+ // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+ // + 1, because we clip the overall output to 6 and the unclipped
+ // quantized values will always result in an output of greater than 6.
+ context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
+ quantized[tx_width] + // {1, 0}
+ quantized[2])); // {0, 2}
+ if (pos != 0) {
+ context += 14 >> static_cast<int>(column == 0);
+ }
+ level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
+ }
+ quantized[0] = level;
+ } while (--i >= 0);
}
-// Section 8.3.2 in the spec, under coeff_base.
-int Tile::GetCoeffBaseContextVertical(const int32_t* const quantized_buffer,
- TransformSize /*tx_size*/,
- int adjusted_tx_width_log2,
- uint16_t pos) {
+// Section 8.3.2 in the spec, under coeff_base and coeff_br.
+// Bottom boundary checks are avoided by the padded rows.
+// Right boundary check is performed explicitly.
+template <typename ResidualType>
+void Tile::ReadCoeffBaseVertical(
+ const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
+ int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
+ uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+ ResidualType* const quantized_buffer) {
const int tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 4, DivideBy2(1 + (std::min(quantized[1], 3) + // {0, 1}
- std::min(quantized[padded_tx_width], 3) + // {1, 0}
- std::min(quantized[MultiplyBy2(padded_tx_width)],
- 3) + // {2, 0}
- std::min(quantized[padded_tx_width * 3], 3) + // {3, 0}
- std::min(quantized[MultiplyBy4(padded_tx_width)],
- 3)))); // {4, 0}
-
- const int index = pos >> adjusted_tx_width_log2;
- return context + kCoeffBasePositionContextOffset[std::min(index, 2)];
-}
-
-// Section 8.3.2 in the spec, under coeff_br.
-int Tile::GetCoeffBaseRangeContext2D(const int32_t* const quantized_buffer,
- int adjusted_tx_width_log2, int pos) {
- const uint8_t tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 6, DivideBy2(
- 1 +
- std::min(quantized[1],
- kQuantizerCoefficientBaseRangeContextClamp) + // {0, 1}
- std::min(quantized[padded_tx_width],
- kQuantizerCoefficientBaseRangeContextClamp) + // {1, 0}
- std::min(quantized[padded_tx_width + 1],
- kQuantizerCoefficientBaseRangeContextClamp))); // {1, 1}
- if (pos == 0) return context;
- const int row = pos >> adjusted_tx_width_log2;
- const int column = pos & (tx_width - 1);
- return context + (((row | column) < 2) ? 7 : 14);
-}
-
-// Section 8.3.2 in the spec, under coeff_br.
-int Tile::GetCoeffBaseRangeContextHorizontal(
- const int32_t* const quantized_buffer, int adjusted_tx_width_log2,
- int pos) {
- const uint8_t tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 6, DivideBy2(
- 1 +
- std::min(quantized[1],
- kQuantizerCoefficientBaseRangeContextClamp) + // {0, 1}
- std::min(quantized[padded_tx_width],
- kQuantizerCoefficientBaseRangeContextClamp) + // {1, 0}
- std::min(quantized[2],
- kQuantizerCoefficientBaseRangeContextClamp))); // {0, 2}
- if (pos == 0) return context;
- const int column = pos & (tx_width - 1);
- return context + ((column == 0) ? 7 : 14);
-}
-
-// Section 8.3.2 in the spec, under coeff_br.
-int Tile::GetCoeffBaseRangeContextVertical(
- const int32_t* const quantized_buffer, int adjusted_tx_width_log2,
- int pos) {
- const uint8_t tx_width = 1 << adjusted_tx_width_log2;
- const int padded_tx_width = tx_width + kQuantizedCoefficientBufferPadding;
- const int32_t* const quantized =
- &quantized_buffer[PaddedIndex(pos, adjusted_tx_width_log2)];
- const int context = std::min(
- 6, DivideBy2(
- 1 +
- std::min(quantized[1],
- kQuantizerCoefficientBaseRangeContextClamp) + // {0, 1}
- std::min(quantized[padded_tx_width],
- kQuantizerCoefficientBaseRangeContextClamp) + // {1, 0}
- std::min(quantized[MultiplyBy2(padded_tx_width)],
- kQuantizerCoefficientBaseRangeContextClamp))); // {2, 0}
- if (pos == 0) return context;
- const int row = pos >> adjusted_tx_width_log2;
- return context + ((row == 0) ? 7 : 14);
+ int i = eob - 2;
+ do {
+ constexpr auto threshold = static_cast<ResidualType>(3);
+ const uint16_t pos = scan[i];
+ const int row = pos >> adjusted_tx_width_log2;
+ const int column = pos & (tx_width - 1);
+ auto* const quantized = &quantized_buffer[pos];
+ const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
+ int context =
+ std::min(4, DivideBy2(1 + (std::min(quantized_column1, 3) + // {0, 1}
+ std::min(quantized[tx_width],
+ threshold) + // {1, 0}
+ std::min(quantized[MultiplyBy2(tx_width)],
+ threshold) + // {2, 0}
+ std::min(quantized[tx_width * 3],
+ threshold) + // {3, 0}
+ std::min(quantized[MultiplyBy4(tx_width)],
+ threshold)))); // {4, 0}
+ context += kCoeffBasePositionContextOffset[row];
+ int level =
+ reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
+ if (level > kNumQuantizerBaseLevels) {
+ // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
+ // + 1, because we clip the overall output to 6 and the unclipped
+ // quantized values will always result in an output of greater than 6.
+ int context =
+ std::min(6, DivideBy2(1 + quantized_column1 + // {0, 1}
+ quantized[tx_width] + // {1, 0}
+ quantized[MultiplyBy2(tx_width)])); // {2, 0}
+ if (pos != 0) {
+ context += 14 >> static_cast<int>(row == 0);
+ }
+ level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
+ }
+ quantized[0] = level;
+ } while (--i >= 0);
}
int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
- int dc_sign = std::accumulate(
+ // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
+ int8_t dc_sign = std::accumulate(
dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
@@ -938,6 +1250,8 @@ int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
// if (dc_sign < 0) return 1;
// if (dc_sign > 0) return 2;
// return 0;
+ // And it is better than:
+ // return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
return static_cast<int>(dc_sign < 0) +
MultiplyBy2(static_cast<int>(dc_sign > 0));
}
@@ -1020,23 +1334,21 @@ void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
}
}
-template <bool is_dc_coefficient>
+template <typename ResidualType, bool is_dc_coefficient>
bool Tile::ReadSignAndApplyDequantization(
- const Block& block, int32_t* const quantized_buffer,
- const uint16_t* const scan, int i, int adjusted_tx_width_log2, int tx_width,
- int q_value, const uint8_t* const quantizer_matrix, int shift,
- int min_value, int max_value, uint16_t* const dc_sign_cdf,
- int8_t* const dc_category, int* const coefficient_level) {
- int pos = is_dc_coefficient ? 0 : scan[i];
- const int pos_index =
- is_dc_coefficient ? 0 : PaddedIndex(pos, adjusted_tx_width_log2);
- // If quantized_buffer[pos_index] is zero, then the rest of the function has
- // no effect.
- if (quantized_buffer[pos_index] == 0) return true;
- const bool sign = is_dc_coefficient ? reader_.ReadSymbol(dc_sign_cdf)
- : static_cast<bool>(reader_.ReadBit());
- if (quantized_buffer[pos_index] >
- kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
+ const uint16_t* const scan, int i, int q_value,
+ const uint8_t* const quantizer_matrix, int shift, int max_value,
+ uint16_t* const dc_sign_cdf, int8_t* const dc_category,
+ int* const coefficient_level, ResidualType* residual_buffer) {
+ const int pos = is_dc_coefficient ? 0 : scan[i];
+ // If residual_buffer[pos] is zero, then the rest of the function has no
+ // effect.
+ int level = residual_buffer[pos];
+ if (level == 0) return true;
+ const int sign = is_dc_coefficient
+ ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
+ : reader_.ReadBit();
+ if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
int length = 0;
bool golomb_length_bit = false;
do {
@@ -1051,13 +1363,13 @@ bool Tile::ReadSignAndApplyDequantization(
for (int i = length - 2; i >= 0; --i) {
x = (x << 1) | reader_.ReadBit();
}
- quantized_buffer[pos_index] += x - 1;
+ level += x - 1;
}
- if (is_dc_coefficient && quantized_buffer[0] > 0) {
- *dc_category = sign ? -1 : 1;
+ if (is_dc_coefficient) {
+ *dc_category = (sign != 0) ? -1 : 1;
}
- quantized_buffer[pos_index] &= 0xfffff;
- *coefficient_level += quantized_buffer[pos_index];
+ level &= 0xfffff;
+ *coefficient_level += level;
// Apply dequantization. Step 1 of section 7.12.3 in the spec.
int q = q_value;
if (quantizer_matrix != nullptr) {
@@ -1065,34 +1377,21 @@ bool Tile::ReadSignAndApplyDequantization(
}
// The intermediate multiplication can exceed 32 bits, so it has to be
// performed by promoting one of the values to int64_t.
- int32_t dequantized_value =
- (static_cast<int64_t>(q) * quantized_buffer[pos_index]) & 0xffffff;
+ int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
dequantized_value >>= shift;
- if (sign) {
- dequantized_value = -dequantized_value;
- }
- // Inverse transform process assumes that the quantized coefficients are
- // stored as a virtual 2d array of size |tx_width| x |tx_height|. If
- // transform width is 64, then this assumption is broken because the scan
- // order used for populating the coefficients for such transforms is the
- // same as the one used for corresponding transform with width 32 (e.g. the
- // scan order used for 64x16 is the same as the one used for 32x16). So we
- // have to recompute the value of pos so that it reflects the index of the
- // 2d array of size 64 x |tx_height|.
- if (!is_dc_coefficient && tx_width == 64) {
- const int row_index = DivideBy32(pos);
- const int column_index = Mod32(pos);
- pos = MultiplyBy64(row_index) + column_index;
- }
- if (sequence_header_.color_config.bitdepth == 8) {
- auto* const residual_buffer = reinterpret_cast<int16_t*>(*block.residual);
- residual_buffer[pos] = Clip3(dequantized_value, min_value, max_value);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
- auto* const residual_buffer = reinterpret_cast<int32_t*>(*block.residual);
- residual_buffer[pos] = Clip3(dequantized_value, min_value, max_value);
-#endif
- }
+ // At this point:
+ // * |dequantized_value| is always non-negative.
+ // * |sign| can be either 0 or 1.
+ // * min_value = -(max_value + 1).
+ // We need to apply the following:
+ // dequantized_value = sign ? -dequantized_value : dequantized_value;
+ // dequantized_value = Clip3(dequantized_value, min_value, max_value);
+ //
+ // Note that -x == ~(x - 1).
+ //
+ // Now, The above two lines can be done with a std::min and xor as follows:
+ dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
+ residual_buffer[pos] = dequantized_value;
return true;
}
@@ -1109,10 +1408,11 @@ int Tile::ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context,
return level;
}
-int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
- int start_x, int start_y,
- TransformSize tx_size,
- TransformType* const tx_type) {
+template <typename ResidualType>
+int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
+ int start_x, int start_y,
+ TransformSize tx_size,
+ TransformType* const tx_type) {
const int x4 = DivideBy4(start_x);
const int y4 = DivideBy4(start_y);
const int w4 = kTransformWidth4x4[tx_size];
@@ -1134,19 +1434,15 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
}
const int tx_width = kTransformWidth[tx_size];
const int tx_height = kTransformHeight[tx_size];
- memset(*block.residual, 0, tx_width * tx_height * residual_size_);
- const int clamped_tx_width = std::min(tx_width, 32);
+ const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
+ const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
+ const int tx_padding =
+ (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
+ auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
+ // Clear padding to avoid bottom boundary checks when parsing quantized
+ // coefficients.
+ memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
const int clamped_tx_height = std::min(tx_height, 32);
- const int padded_tx_width =
- clamped_tx_width + kQuantizedCoefficientBufferPadding;
- const int padded_tx_height =
- clamped_tx_height + kQuantizedCoefficientBufferPadding;
- int32_t* const quantized = block.scratch_buffer->quantized_buffer;
- // Only the first |padded_tx_width| * |padded_tx_height| values of |quantized|
- // will be used by this function and the functions to which it is passed into.
- // So we simply need to zero out those values before it is being used.
- memset(quantized, 0,
- padded_tx_width * padded_tx_height * sizeof(quantized[0]));
if (plane == kPlaneY) {
ReadTransformType(block, x4, y4, tx_size);
}
@@ -1181,9 +1477,9 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
cdf = symbol_decoder_context_.eob_pt_1024_cdf[plane_type];
break;
}
- const int16_t eob_pt =
+ const int eob_pt =
1 + reader_.ReadSymbol(cdf, kEobPt16SymbolCount + eob_multi_size);
- int16_t eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
+ int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
if (eob_pt >= 3) {
context = eob_pt - 3;
const bool eob_extra = reader_.ReadSymbol(
@@ -1199,23 +1495,6 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
}
}
const uint16_t* scan = kScan[tx_class][tx_size];
- const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
- const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
- // Lookup used to call the right variant of GetCoeffBaseContext*() based on
- // the transform class.
- static constexpr int (Tile::*kGetCoeffBaseContextFunc[])(
- const int32_t*, TransformSize, int, uint16_t) = {
- &Tile::GetCoeffBaseContext2D, &Tile::GetCoeffBaseContextHorizontal,
- &Tile::GetCoeffBaseContextVertical};
- auto get_coeff_base_context_func = kGetCoeffBaseContextFunc[tx_class];
- // Lookup used to call the right variant of GetCoeffBaseRangeContext*() based
- // on the transform class.
- static constexpr int (Tile::*kGetCoeffBaseRangeContextFunc[])(
- const int32_t*, int, int) = {&Tile::GetCoeffBaseRangeContext2D,
- &Tile::GetCoeffBaseRangeContextHorizontal,
- &Tile::GetCoeffBaseRangeContextVertical};
- auto get_coeff_base_range_context_func =
- kGetCoeffBaseRangeContextFunc[tx_class];
const int clamped_tx_size_context = std::min(tx_size_context, 3);
// Read the last coefficient.
{
@@ -1227,36 +1506,37 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
.coeff_base_eob_cdf[tx_size_context][plane_type][context],
kCoeffBaseEobSymbolCount);
if (level > kNumQuantizerBaseLevels) {
- level += ReadCoeffBaseRange(clamped_tx_size_context,
- (this->*get_coeff_base_range_context_func)(
- quantized, adjusted_tx_width_log2, pos),
- plane_type);
+ level += ReadCoeffBaseRange(
+ clamped_tx_size_context,
+ GetCoeffBaseRangeContextEob(adjusted_tx_width_log2, pos, tx_class),
+ plane_type);
}
- quantized[PaddedIndex(pos, adjusted_tx_width_log2)] = level;
+ residual[pos] = level;
+ }
+ if (eob > 1) {
+ // Read all the other coefficients.
+ // Lookup used to call the right variant of ReadCoeffBase*() based on the
+ // transform class.
+ static constexpr void (Tile::*kGetCoeffBaseFunc[])(
+ const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
+ int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
+ uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
+ ResidualType* quantized_buffer) = {
+ &Tile::ReadCoeffBase2D<ResidualType>,
+ &Tile::ReadCoeffBaseHorizontal<ResidualType>,
+ &Tile::ReadCoeffBaseVertical<ResidualType>};
+ (this->*kGetCoeffBaseFunc[tx_class])(
+ scan, plane_type, tx_size, clamped_tx_size_context,
+ adjusted_tx_width_log2, eob,
+ symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
+ residual);
}
- // Read all the other coefficients.
- for (int i = eob - 2; i >= 0; --i) {
- const uint16_t pos = scan[i];
- context = (this->*get_coeff_base_context_func)(quantized, tx_size,
- adjusted_tx_width_log2, pos);
- int level = reader_.ReadSymbol<kCoeffBaseSymbolCount>(
- symbol_decoder_context_
- .coeff_base_cdf[tx_size_context][plane_type][context]);
- if (level > kNumQuantizerBaseLevels) {
- level += ReadCoeffBaseRange(clamped_tx_size_context,
- (this->*get_coeff_base_range_context_func)(
- quantized, adjusted_tx_width_log2, pos),
- plane_type);
- }
- quantized[PaddedIndex(pos, adjusted_tx_width_log2)] = level;
- }
- const int min_value = -(1 << (7 + sequence_header_.color_config.bitdepth));
const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
const int current_quantizer_index = GetQIndex(
frame_header_.segmentation, bp.segment_id, current_quantizer_index_);
const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
- const int shift = GetQuantizationShift(tx_size);
+ const int shift = kQuantizationShift[tx_size];
const uint8_t* const quantizer_matrix =
(frame_header_.quantizer.use_matrix &&
*tx_type < kTransformTypeIdentityIdentity &&
@@ -1268,24 +1548,27 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
int coefficient_level = 0;
int8_t dc_category = 0;
uint16_t* const dc_sign_cdf =
- (quantized[0] != 0)
+ (residual[0] != 0)
? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
x4, y4, w4, h4, plane)]
: nullptr;
assert(scan[0] == 0);
- if (!ReadSignAndApplyDequantization</*is_dc_coefficient=*/true>(
- block, quantized, scan, 0, adjusted_tx_width_log2, tx_width,
- dc_q_value, quantizer_matrix, shift, min_value, max_value,
- dc_sign_cdf, &dc_category, &coefficient_level)) {
+ if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
+ scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
+ &dc_category, &coefficient_level, residual)) {
return -1;
}
- for (int i = 1; i < eob; ++i) {
- if (!ReadSignAndApplyDequantization</*is_dc_coefficient=*/false>(
- block, quantized, scan, i, adjusted_tx_width_log2, tx_width,
- ac_q_value, quantizer_matrix, shift, min_value, max_value, nullptr,
- nullptr, &coefficient_level)) {
- return -1;
- }
+ if (eob > 1) {
+ int i = 1;
+ do {
+ if (!ReadSignAndApplyDequantization<ResidualType,
+ /*is_dc_coefficient=*/false>(
+ scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
+ nullptr, &coefficient_level, residual)) {
+ return -1;
+ }
+ } while (++i < eob);
+ MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
}
SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
dc_category);
@@ -1295,6 +1578,25 @@ int16_t Tile::ReadTransformCoefficients(const Block& block, Plane plane,
return eob;
}
+// CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
+// |function| depending on the value of |sequence_header_.color_config.bitdepth|
+// with the variadic arguments.
+#if LIBGAV1_MAX_BITDEPTH >= 10
+#define CALL_BITDEPTH_FUNCTION(function, ...) \
+ do { \
+ if (sequence_header_.color_config.bitdepth > 8) { \
+ function<uint16_t>(__VA_ARGS__); \
+ } else { \
+ function<uint8_t>(__VA_ARGS__); \
+ } \
+ } while (false)
+#else
+#define CALL_BITDEPTH_FUNCTION(function, ...) \
+ do { \
+ function<uint8_t>(__VA_ARGS__); \
+ } while (false)
+#endif
+
bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
int base_y, TransformSize tx_size, int x, int y,
ProcessingMode mode) {
@@ -1317,15 +1619,8 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
mode == kProcessingModeParseAndDecode;
if (do_decode && !bp.is_inter) {
if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) {
- if (sequence_header_.color_config.bitdepth == 8) {
- PalettePrediction<uint8_t>(block, plane, start_x, start_y, x, y,
- tx_size);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
- PalettePrediction<uint16_t>(block, plane, start_x, start_y, x, y,
- tx_size);
-#endif
- }
+ CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
+ x, y, tx_size);
} else {
const PredictionMode mode =
(plane == kPlaneY)
@@ -1337,37 +1632,17 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
(sub_block_column4x4 >> subsampling_x) + step_x + 1;
const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
- const bool has_left =
- x > 0 || (plane == kPlaneY ? block.left_available
- : block.LeftAvailableChroma());
- const bool has_top =
- y > 0 ||
- (plane == kPlaneY ? block.top_available : block.TopAvailableChroma());
- if (sequence_header_.color_config.bitdepth == 8) {
- IntraPrediction<uint8_t>(
- block, plane, start_x, start_y, has_left, has_top,
- block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
- block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
- mode, tx_size);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
- IntraPrediction<uint16_t>(
- block, plane, start_x, start_y, has_left, has_top,
- block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
- block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
- mode, tx_size);
-#endif
- }
+ const bool has_left = x > 0 || block.left_available[plane];
+ const bool has_top = y > 0 || block.top_available[plane];
+
+ CALL_BITDEPTH_FUNCTION(
+ IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
+ block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
+ block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
+ mode, tx_size);
if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) {
- if (sequence_header_.color_config.bitdepth == 8) {
- ChromaFromLumaPrediction<uint8_t>(block, plane, start_x, start_y,
- tx_size);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
- ChromaFromLumaPrediction<uint16_t>(block, plane, start_x, start_y,
- tx_size);
-#endif
- }
+ CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
+ start_y, tx_size);
}
}
if (plane == kPlaneY) {
@@ -1381,34 +1656,35 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
if (!bp.skip) {
const int sb_row_index = SuperBlockRowIndex(block.row4x4);
const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
- switch (mode) {
- case kProcessingModeParseAndDecode: {
- TransformType tx_type;
- const int16_t non_zero_coeff_count = ReadTransformCoefficients(
+ if (mode == kProcessingModeDecodeOnly) {
+ TransformParameterQueue& tx_params =
+ *residual_buffer_threaded_[sb_row_index][sb_column_index]
+ ->transform_parameters();
+ ReconstructBlock(block, plane, start_x, start_y, tx_size,
+ tx_params.Type(), tx_params.NonZeroCoeffCount());
+ tx_params.Pop();
+ } else {
+ TransformType tx_type;
+ int non_zero_coeff_count;
+#if LIBGAV1_MAX_BITDEPTH >= 10
+ if (sequence_header_.color_config.bitdepth > 8) {
+ non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
block, plane, start_x, start_y, tx_size, &tx_type);
- if (non_zero_coeff_count < 0) return false;
+ } else // NOLINT
+#endif
+ {
+ non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
+ block, plane, start_x, start_y, tx_size, &tx_type);
+ }
+ if (non_zero_coeff_count < 0) return false;
+ if (mode == kProcessingModeParseAndDecode) {
ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
non_zero_coeff_count);
- break;
- }
- case kProcessingModeParseOnly: {
- TransformType tx_type;
- const int16_t non_zero_coeff_count = ReadTransformCoefficients(
- block, plane, start_x, start_y, tx_size, &tx_type);
- if (non_zero_coeff_count < 0) return false;
+ } else {
+ assert(mode == kProcessingModeParseOnly);
residual_buffer_threaded_[sb_row_index][sb_column_index]
->transform_parameters()
->Push(non_zero_coeff_count, tx_type);
- break;
- }
- case kProcessingModeDecodeOnly: {
- TransformParameterQueue& tx_params =
- *residual_buffer_threaded_[sb_row_index][sb_column_index]
- ->transform_parameters();
- ReconstructBlock(block, plane, start_x, start_y, tx_size,
- tx_params.Type(), tx_params.NonZeroCoeffCount());
- tx_params.Pop();
- break;
}
}
}
@@ -1417,11 +1693,8 @@ bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
&block.scratch_buffer
->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
[(sub_block_column4x4 >> subsampling_x) + 1];
- for (int i = 0; i < step_y; ++i) {
- static_assert(sizeof(bool) == 1, "");
- memset(block_decoded, 1, step_x);
- block_decoded += DecoderScratchBuffer::kBlockDecodedStride;
- }
+ SetBlockValues<bool>(step_y, step_x, true, block_decoded,
+ TileScratchBuffer::kBlockDecodedStride);
}
return true;
}
@@ -1437,7 +1710,7 @@ bool Tile::TransformTree(const Block& block, int start_x, int start_y,
stack.Push(TransformTreeNode(start_x, start_y,
static_cast<TransformSize>(plane_size)));
- while (!stack.Empty()) {
+ do {
TransformTreeNode node = stack.Pop();
const int row = DivideBy4(node.y);
const int column = DivideBy4(node.x);
@@ -1479,24 +1752,18 @@ bool Tile::TransformTree(const Block& block, int start_x, int start_y,
stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
- }
+ } while (!stack.Empty());
return true;
}
void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
int start_y, TransformSize tx_size,
- TransformType tx_type,
- int16_t non_zero_coeff_count) {
+ TransformType tx_type, int non_zero_coeff_count) {
+ // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
assert(non_zero_coeff_count >= 0);
if (non_zero_coeff_count == 0) return;
- // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
- if (sequence_header_.color_config.bitdepth == 8) {
- Reconstruct(dsp_, tx_type, tx_size,
- frame_header_.segmentation.lossless[block.bp->segment_id],
- reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
- &buffer_[plane], non_zero_coeff_count);
#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
+ if (sequence_header_.color_config.bitdepth > 8) {
Array2DView<uint16_t> buffer(
buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
@@ -1504,7 +1771,13 @@ void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
frame_header_.segmentation.lossless[block.bp->segment_id],
reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
&buffer, non_zero_coeff_count);
+ } else // NOLINT
#endif
+ {
+ Reconstruct(dsp_, tx_type, tx_size,
+ frame_header_.segmentation.lossless[block.bp->segment_id],
+ reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
+ &buffer_[plane], non_zero_coeff_count);
}
if (split_parse_and_decode_) {
*block.residual +=
@@ -1513,8 +1786,8 @@ void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
}
bool Tile::Residual(const Block& block, ProcessingMode mode) {
- const int width_chunks = std::max(1, kBlockWidthPixels[block.size] >> 6);
- const int height_chunks = std::max(1, kBlockHeightPixels[block.size] >> 6);
+ const int width_chunks = std::max(1, block.width >> 6);
+ const int height_chunks = std::max(1, block.height >> 6);
const BlockSize size_chunk4x4 =
(width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
const BlockParameters& bp = *block.bp;
@@ -1574,7 +1847,7 @@ bool Tile::Residual(const Block& block, ProcessingMode mode) {
bool Tile::IsMvValid(const Block& block, bool is_compound) const {
const BlockParameters& bp = *block.bp;
for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
- for (int mv_component : bp.mv[i].mv) {
+ for (int mv_component : bp.mv.mv[i].mv) {
if (std::abs(mv_component) >= (1 << 14)) {
return false;
}
@@ -1583,22 +1856,20 @@ bool Tile::IsMvValid(const Block& block, bool is_compound) const {
if (!block.bp->prediction_parameters->use_intra_block_copy) {
return true;
}
- const int block_width = kBlockWidthPixels[block.size];
- const int block_height = kBlockHeightPixels[block.size];
- if ((bp.mv[0].mv[0] & 7) != 0 || (bp.mv[0].mv[1] & 7) != 0) {
+ if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
return false;
}
- const int delta_row = bp.mv[0].mv[0] >> 3;
- const int delta_column = bp.mv[0].mv[1] >> 3;
+ const int delta_row = bp.mv.mv[0].mv[0] >> 3;
+ const int delta_column = bp.mv.mv[0].mv[1] >> 3;
int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
- const int src_bottom_edge = src_top_edge + block_height;
- const int src_right_edge = src_left_edge + block_width;
+ const int src_bottom_edge = src_top_edge + block.height;
+ const int src_right_edge = src_left_edge + block.width;
if (block.HasChroma()) {
- if (block_width < 8 && subsampling_x_[kPlaneU] != 0) {
+ if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
src_left_edge -= 4;
}
- if (block_height < 8 && subsampling_y_[kPlaneU] != 0) {
+ if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
src_top_edge -= 4;
}
}
@@ -1636,58 +1907,102 @@ bool Tile::IsMvValid(const Block& block, bool is_compound) const {
wavefront_offset;
}
-bool Tile::AssignMv(const Block& block, bool is_compound) {
- MotionVector predicted_mv[2] = {};
+bool Tile::AssignInterMv(const Block& block, bool is_compound) {
+ int min[2];
+ int max[2];
+ GetClampParameters(block, min, max);
BlockParameters& bp = *block.bp;
- for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
- const PredictionParameters& prediction_parameters =
- *block.bp->prediction_parameters;
- const PredictionMode mode = prediction_parameters.use_intra_block_copy
- ? kPredictionModeNewMv
- : GetSinglePredictionMode(i, bp.y_mode);
- if (prediction_parameters.use_intra_block_copy) {
- predicted_mv[0] = prediction_parameters.ref_mv_stack[0].mv[0];
- if (predicted_mv[0].mv[0] == 0 && predicted_mv[0].mv[1] == 0) {
- predicted_mv[0] = prediction_parameters.ref_mv_stack[1].mv[0];
- }
- if (predicted_mv[0].mv[0] == 0 && predicted_mv[0].mv[1] == 0) {
- const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
- if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
- predicted_mv[0].mv[1] = -MultiplyBy8(
- MultiplyBy4(super_block_size4x4) + kIntraBlockCopyDelayPixels);
- } else {
- predicted_mv[0].mv[0] = -MultiplyBy32(super_block_size4x4);
+ const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
+ if (is_compound) {
+ for (int i = 0; i < 2; ++i) {
+ const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
+ MotionVector predicted_mv;
+ if (mode == kPredictionModeGlobalMv) {
+ predicted_mv = prediction_parameters.global_mv[i];
+ } else {
+ const int ref_mv_index = (mode == kPredictionModeNearestMv ||
+ (mode == kPredictionModeNewMv &&
+ prediction_parameters.ref_mv_count <= 1))
+ ? 0
+ : prediction_parameters.ref_mv_index;
+ predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
+ if (ref_mv_index < prediction_parameters.ref_mv_count) {
+ predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
+ predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
}
}
- } else if (mode == kPredictionModeGlobalMv) {
- predicted_mv[i] = prediction_parameters.global_mv[i];
+ if (mode == kPredictionModeNewMv) {
+ ReadMotionVector(block, i);
+ bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
+ bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
+ } else {
+ bp.mv.mv[i] = predicted_mv;
+ }
+ }
+ } else {
+ const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
+ MotionVector predicted_mv;
+ if (mode == kPredictionModeGlobalMv) {
+ predicted_mv = prediction_parameters.global_mv[0];
} else {
const int ref_mv_index = (mode == kPredictionModeNearestMv ||
(mode == kPredictionModeNewMv &&
prediction_parameters.ref_mv_count <= 1))
? 0
: prediction_parameters.ref_mv_index;
- predicted_mv[i] = prediction_parameters.ref_mv_stack[ref_mv_index].mv[i];
+ predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
+ if (ref_mv_index < prediction_parameters.ref_mv_count) {
+ predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
+ predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
+ }
}
if (mode == kPredictionModeNewMv) {
- ReadMotionVector(block, i);
- bp.mv[i].mv[0] += predicted_mv[i].mv[0];
- bp.mv[i].mv[1] += predicted_mv[i].mv[1];
+ ReadMotionVector(block, 0);
+ bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
+ bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
} else {
- bp.mv[i] = predicted_mv[i];
+ bp.mv.mv[0] = predicted_mv;
}
}
return IsMvValid(block, is_compound);
}
+bool Tile::AssignIntraMv(const Block& block) {
+ // TODO(linfengz): Check if the clamping process is necessary.
+ int min[2];
+ int max[2];
+ GetClampParameters(block, min, max);
+ BlockParameters& bp = *block.bp;
+ const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
+ const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
+ ReadMotionVector(block, 0);
+ if (ref_mv_0.mv32 == 0) {
+ const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
+ if (ref_mv_1.mv32 == 0) {
+ const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
+ if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
+ bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
+ bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
+ } else {
+ bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
+ }
+ } else {
+ bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
+ bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
+ }
+ } else {
+ bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
+ bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
+ }
+ return IsMvValid(block, /*is_compound=*/false);
+}
+
void Tile::ResetEntropyContext(const Block& block) {
- const int block_width4x4 = kNum4x4BlocksWide[block.size];
- const int block_height4x4 = kNum4x4BlocksHigh[block.size];
for (int plane = 0; plane < (block.HasChroma() ? PlaneCount() : 1); ++plane) {
const int subsampling_x = subsampling_x_[plane];
const int start_x = block.column4x4 >> subsampling_x;
const int end_x =
- std::min((block.column4x4 + block_width4x4) >> subsampling_x,
+ std::min((block.column4x4 + block.width4x4) >> subsampling_x,
frame_header_.columns4x4);
memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
end_x - start_x);
@@ -1696,7 +2011,7 @@ void Tile::ResetEntropyContext(const Block& block) {
const int subsampling_y = subsampling_y_[plane];
const int start_y = block.row4x4 >> subsampling_y;
const int end_y =
- std::min((block.row4x4 + block_height4x4) >> subsampling_y,
+ std::min((block.row4x4 + block.height4x4) >> subsampling_y,
frame_header_.rows4x4);
memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
end_y - start_y);
@@ -1705,12 +2020,15 @@ void Tile::ResetEntropyContext(const Block& block) {
}
}
-void Tile::ComputePrediction(const Block& block) {
+bool Tile::ComputePrediction(const Block& block) {
+ const BlockParameters& bp = *block.bp;
+ if (!bp.is_inter) return true;
const int mask =
(1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
1;
const int sub_block_row4x4 = block.row4x4 & mask;
const int sub_block_column4x4 = block.column4x4 & mask;
+ const int plane_count = block.HasChroma() ? PlaneCount() : 1;
// Returns true if this block applies local warping. The state is determined
// in the Y plane and carried for use in the U/V planes.
// But the U/V planes will not apply warping when the block size is smaller
@@ -1718,20 +2036,19 @@ void Tile::ComputePrediction(const Block& block) {
bool is_local_valid = false;
// Local warping parameters, similar usage as is_local_valid.
GlobalMotion local_warp_params;
- for (int plane = 0; plane < (block.HasChroma() ? PlaneCount() : 1); ++plane) {
+ int plane = 0;
+ do {
const int8_t subsampling_x = subsampling_x_[plane];
const int8_t subsampling_y = subsampling_y_[plane];
- const BlockSize plane_size =
- block.residual_size[GetPlaneType(static_cast<Plane>(plane))];
+ const BlockSize plane_size = block.residual_size[plane];
const int block_width4x4 = kNum4x4BlocksWide[plane_size];
const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
const int block_width = MultiplyBy4(block_width4x4);
const int block_height = MultiplyBy4(block_height4x4);
const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
- const BlockParameters& bp = *block.bp;
- if (bp.is_inter && bp.reference_frame[1] == kReferenceFrameIntra) {
- const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
+ if (bp.reference_frame[1] == kReferenceFrameIntra) {
+ const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
const int tr_column4x4 =
(sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
const int bl_row4x4 =
@@ -1740,88 +2057,98 @@ void Tile::ComputePrediction(const Block& block) {
const TransformSize tx_size =
k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
[k4x4HeightLog2[plane_size]];
- const bool has_left =
- plane == kPlaneY ? block.left_available : block.LeftAvailableChroma();
- const bool has_top =
- plane == kPlaneY ? block.top_available : block.TopAvailableChroma();
- if (sequence_header_.color_config.bitdepth == 8) {
- IntraPrediction<uint8_t>(
- block, static_cast<Plane>(plane), base_x, base_y, has_left, has_top,
- block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
- block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
- kInterIntraToIntraMode[block.bp->prediction_parameters
- ->inter_intra_mode],
- tx_size);
-#if LIBGAV1_MAX_BITDEPTH >= 10
- } else {
- IntraPrediction<uint16_t>(
- block, static_cast<Plane>(plane), base_x, base_y, has_left, has_top,
- block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
- block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
- kInterIntraToIntraMode[block.bp->prediction_parameters
- ->inter_intra_mode],
- tx_size);
-#endif
- }
+ const bool has_left = block.left_available[plane];
+ const bool has_top = block.top_available[plane];
+ CALL_BITDEPTH_FUNCTION(
+ IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
+ has_left, has_top,
+ block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
+ block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
+ kInterIntraToIntraMode[block.bp->prediction_parameters
+ ->inter_intra_mode],
+ tx_size);
}
- if (bp.is_inter) {
- int candidate_row = (block.row4x4 >> subsampling_y) << subsampling_y;
- int candidate_column = (block.column4x4 >> subsampling_x)
- << subsampling_x;
- bool some_use_intra = false;
- for (int r = 0; r < (block_height4x4 << subsampling_y); ++r) {
- for (int c = 0; c < (block_width4x4 << subsampling_x); ++c) {
- auto* const bp = block_parameters_holder_.Find(candidate_row + r,
- candidate_column + c);
- if (bp != nullptr && bp->reference_frame[0] == kReferenceFrameIntra) {
- some_use_intra = true;
- break;
- }
+ int candidate_row = block.row4x4;
+ int candidate_column = block.column4x4;
+ bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
+ if (!some_use_intra && plane != 0) {
+ candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
+ candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
+ if (candidate_row != block.row4x4) {
+ // Top block.
+ const BlockParameters& bp_top =
+ *block_parameters_holder_.Find(candidate_row, block.column4x4);
+ some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
+ if (!some_use_intra && candidate_column != block.column4x4) {
+ // Top-left block.
+ const BlockParameters& bp_top_left =
+ *block_parameters_holder_.Find(candidate_row, candidate_column);
+ some_use_intra =
+ bp_top_left.reference_frame[0] == kReferenceFrameIntra;
}
- if (some_use_intra) break;
- }
- int prediction_width;
- int prediction_height;
- if (some_use_intra) {
- candidate_row = block.row4x4;
- candidate_column = block.column4x4;
- prediction_width = block_width;
- prediction_height = block_height;
- } else {
- prediction_width = kBlockWidthPixels[block.size] >> subsampling_x;
- prediction_height = kBlockHeightPixels[block.size] >> subsampling_y;
}
- for (int r = 0, y = 0; y < block_height; y += prediction_height, ++r) {
- for (int c = 0, x = 0; x < block_width; x += prediction_width, ++c) {
- InterPrediction(block, static_cast<Plane>(plane), base_x + x,
- base_y + y, prediction_width, prediction_height,
- candidate_row + r, candidate_column + c,
- &is_local_valid, &local_warp_params);
- }
+ if (!some_use_intra && candidate_column != block.column4x4) {
+ // Left block.
+ const BlockParameters& bp_left =
+ *block_parameters_holder_.Find(block.row4x4, candidate_column);
+ some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
}
}
- }
+ int prediction_width;
+ int prediction_height;
+ if (some_use_intra) {
+ candidate_row = block.row4x4;
+ candidate_column = block.column4x4;
+ prediction_width = block_width;
+ prediction_height = block_height;
+ } else {
+ prediction_width = block.width >> subsampling_x;
+ prediction_height = block.height >> subsampling_y;
+ }
+ int r = 0;
+ int y = 0;
+ do {
+ int c = 0;
+ int x = 0;
+ do {
+ if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
+ base_y + y, prediction_width, prediction_height,
+ candidate_row + r, candidate_column + c,
+ &is_local_valid, &local_warp_params)) {
+ return false;
+ }
+ ++c;
+ x += prediction_width;
+ } while (x < block_width);
+ ++r;
+ y += prediction_height;
+ } while (y < block_height);
+ } while (++plane < plane_count);
+ return true;
}
+#undef CALL_BITDEPTH_FUNCTION
+
void Tile::PopulateDeblockFilterLevel(const Block& block) {
if (!post_filter_.DoDeblock()) return;
BlockParameters& bp = *block.bp;
+ const int mode_id =
+ static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
for (int i = 0; i < kFrameLfCount; ++i) {
if (delta_lf_all_zero_) {
bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
- bp.segment_id, i, bp.reference_frame[0],
- LoopFilterMask::GetModeId(bp.y_mode));
+ bp.segment_id, i, bp.reference_frame[0], mode_id);
} else {
bp.deblock_filter_level[i] =
deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]]
- [LoopFilterMask::GetModeId(bp.y_mode)];
+ [mode_id];
}
}
}
bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
ParameterTree* const tree,
- DecoderScratchBuffer* const scratch_buffer,
+ TileScratchBuffer* const scratch_buffer,
ResidualPtr* residual) {
// Do not process the block if the starting point is beyond the visible frame.
// This is equivalent to the has_row/has_column check in the
@@ -1831,34 +2158,34 @@ bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
column4x4 >= frame_header_.columns4x4) {
return true;
}
- Block block(*this, row4x4, column4x4, block_size, scratch_buffer, residual,
- tree->parameters());
- block.bp->size = block_size;
- block_parameters_holder_.FillCache(row4x4, column4x4, block_size,
- tree->parameters());
- block.bp->prediction_parameters =
+ BlockParameters& bp = *tree->parameters();
+ block_parameters_holder_.FillCache(row4x4, column4x4, block_size, &bp);
+ Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+ bp.size = block_size;
+ bp.prediction_parameters =
split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
new (std::nothrow) PredictionParameters())
: std::move(prediction_parameters_);
- if (block.bp->prediction_parameters == nullptr) return false;
+ if (bp.prediction_parameters == nullptr) return false;
if (!DecodeModeInfo(block)) return false;
+ bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv ||
+ bp.y_mode == kPredictionModeGlobalGlobalMv) &&
+ !IsBlockDimension4(bp.size);
PopulateDeblockFilterLevel(block);
if (!ReadPaletteTokens(block)) return false;
DecodeTransformSize(block);
- BlockParameters& bp = *block.bp;
// Part of Section 5.11.37 in the spec (implemented as a simple lookup).
- bp.uv_transform_size =
- frame_header_.segmentation.lossless[bp.segment_id]
- ? kTransformSize4x4
- : kUVTransformSize[block.residual_size[kPlaneTypeUV]];
+ bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id]
+ ? kTransformSize4x4
+ : kUVTransformSize[block.residual_size[kPlaneU]];
if (bp.skip) ResetEntropyContext(block);
- const int block_width4x4 = kNum4x4BlocksWide[block_size];
- const int block_height4x4 = kNum4x4BlocksHigh[block_size];
if (split_parse_and_decode_) {
if (!Residual(block, kProcessingModeParseOnly)) return false;
} else {
- ComputePrediction(block);
- if (!Residual(block, kProcessingModeParseAndDecode)) return false;
+ if (!ComputePrediction(block) ||
+ !Residual(block, kProcessingModeParseAndDecode)) {
+ return false;
+ }
}
// If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all
// blocks. We don't need to call save bp.segment_id in the current frame
@@ -1870,25 +2197,22 @@ bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
// save bp.segment_id in the current frame.
if (frame_header_.segmentation.enabled &&
frame_header_.segmentation.update_map) {
- const int x_limit =
- std::min(frame_header_.columns4x4 - column4x4, block_width4x4);
- const int y_limit =
- std::min(frame_header_.rows4x4 - row4x4, block_height4x4);
+ const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
+ static_cast<int>(block.width4x4));
+ const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
+ static_cast<int>(block.height4x4));
current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit,
y_limit, bp.segment_id);
}
- if (build_bit_mask_when_parsing_ || !split_parse_and_decode_) {
- BuildBitMask(row4x4, column4x4, block_size);
- }
+ StoreMotionFieldMvsIntoCurrentFrame(block);
if (!split_parse_and_decode_) {
- StoreMotionFieldMvsIntoCurrentFrame(block);
- prediction_parameters_ = std::move(block.bp->prediction_parameters);
+ prediction_parameters_ = std::move(bp.prediction_parameters);
}
return true;
}
bool Tile::DecodeBlock(ParameterTree* const tree,
- DecoderScratchBuffer* const scratch_buffer,
+ TileScratchBuffer* const scratch_buffer,
ResidualPtr* residual) {
const int row4x4 = tree->row4x4();
const int column4x4 = tree->column4x4();
@@ -1897,21 +2221,18 @@ bool Tile::DecodeBlock(ParameterTree* const tree,
return true;
}
const BlockSize block_size = tree->block_size();
- Block block(*this, row4x4, column4x4, block_size, scratch_buffer, residual,
- tree->parameters());
- ComputePrediction(block);
- if (!Residual(block, kProcessingModeDecodeOnly)) return false;
- if (!build_bit_mask_when_parsing_) {
- BuildBitMask(row4x4, column4x4, block_size);
+ Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
+ if (!ComputePrediction(block) ||
+ !Residual(block, kProcessingModeDecodeOnly)) {
+ return false;
}
- StoreMotionFieldMvsIntoCurrentFrame(block);
block.bp->prediction_parameters.reset(nullptr);
return true;
}
bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
ParameterTree* const root,
- DecoderScratchBuffer* const scratch_buffer,
+ TileScratchBuffer* const scratch_buffer,
ResidualPtr* residual) {
Stack<ParameterTree*, kDfsStackSize> stack;
@@ -2025,7 +2346,7 @@ void Tile::ResetLoopRestorationParams() {
}
void Tile::ResetCdef(const int row4x4, const int column4x4) {
- if (cdef_index_[0] == nullptr) return;
+ if (!sequence_header_.enable_cdef) return;
const int row = DivideBy16(row4x4);
const int column = DivideBy16(column4x4);
cdef_index_[row][column] = -1;
@@ -2039,7 +2360,7 @@ void Tile::ResetCdef(const int row4x4, const int column4x4) {
}
}
-void Tile::ClearBlockDecoded(DecoderScratchBuffer* const scratch_buffer,
+void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
int row4x4, int column4x4) {
// Set everything to false.
memset(scratch_buffer->block_decoded, 0,
@@ -2075,7 +2396,7 @@ void Tile::ClearBlockDecoded(DecoderScratchBuffer* const scratch_buffer,
}
bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
- DecoderScratchBuffer* const scratch_buffer,
+ TileScratchBuffer* const scratch_buffer,
ProcessingMode mode) {
const bool parsing =
mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
@@ -2139,11 +2460,11 @@ bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
}
bool Tile::DecodeSuperBlock(ParameterTree* const tree,
- DecoderScratchBuffer* const scratch_buffer,
+ TileScratchBuffer* const scratch_buffer,
ResidualPtr* residual) {
Stack<ParameterTree*, kDfsStackSize> stack;
stack.Push(tree);
- while (!stack.Empty()) {
+ do {
ParameterTree* const node = stack.Pop();
if (node->partition() != kPartitionNone) {
for (int i = 3; i >= 0; --i) {
@@ -2157,7 +2478,7 @@ bool Tile::DecodeSuperBlock(ParameterTree* const tree,
node->row4x4(), node->column4x4());
return false;
}
- }
+ } while (!stack.Empty());
return true;
}
@@ -2189,222 +2510,87 @@ void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
}
}
-void Tile::BuildBitMask(int row4x4, int column4x4, BlockSize block_size) {
- if (!post_filter_.DoDeblock()) return;
- if (block_size <= kBlock64x64) {
- BuildBitMaskHelper(row4x4, column4x4, block_size, true, true);
- } else {
- const int block_width4x4 = kNum4x4BlocksWide[block_size];
- const int block_height4x4 = kNum4x4BlocksHigh[block_size];
- for (int y = 0; y < block_height4x4; y += 16) {
- for (int x = 0; x < block_width4x4; x += 16) {
- BuildBitMaskHelper(row4x4 + y, column4x4 + x, kBlock64x64, x == 0,
- y == 0);
- }
- }
- }
-}
-
-void Tile::BuildBitMaskHelper(int row4x4, int column4x4, BlockSize block_size,
- const bool is_vertical_block_border,
- const bool is_horizontal_block_border) {
- const int block_width4x4 = kNum4x4BlocksWide[block_size];
- const int block_height4x4 = kNum4x4BlocksHigh[block_size];
- BlockParameters& bp = *block_parameters_holder_.Find(row4x4, column4x4);
- const bool skip = bp.skip && bp.is_inter;
- LoopFilterMask* const masks = post_filter_.masks();
- const int unit_id = DivideBy16(row4x4) * masks->num_64x64_blocks_per_row() +
- DivideBy16(column4x4);
-
- for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
- // For U and V planes, do not build bit masks if level == 0.
- if (plane > kPlaneY && frame_header_.loop_filter.level[plane + 1] == 0) {
- continue;
- }
- // Build bit mask for vertical edges.
- const int subsampling_x = subsampling_x_[plane];
- const int subsampling_y = subsampling_y_[plane];
- const int plane_width =
- RightShiftWithRounding(frame_header_.width, subsampling_x);
- const int column_limit =
- std::min({column4x4 + block_width4x4, frame_header_.columns4x4,
- DivideBy4(plane_width + 3) << subsampling_x});
- const int plane_height =
- RightShiftWithRounding(frame_header_.height, subsampling_y);
- const int row_limit =
- std::min({row4x4 + block_height4x4, frame_header_.rows4x4,
- DivideBy4(plane_height + 3) << subsampling_y});
- const int row_start = GetDeblockPosition(row4x4, subsampling_y);
- const int column_start = GetDeblockPosition(column4x4, subsampling_x);
- if (row_start >= row_limit || column_start >= column_limit) {
- continue;
- }
- const int vertical_step = 1 << subsampling_y;
- const int horizontal_step = 1 << subsampling_x;
- const BlockParameters& bp =
- *block_parameters_holder_.Find(row_start, column_start);
- const int horizontal_level_index =
- kDeblockFilterLevelIndex[plane][kLoopFilterTypeHorizontal];
- const int vertical_level_index =
- kDeblockFilterLevelIndex[plane][kLoopFilterTypeVertical];
- const uint8_t vertical_level =
- bp.deblock_filter_level[vertical_level_index];
-
- for (int row = row_start; row < row_limit; row += vertical_step) {
- for (int column = column_start; column < column_limit;) {
- const TransformSize tx_size = (plane == kPlaneY)
- ? inter_transform_sizes_[row][column]
- : bp.uv_transform_size;
- // (1). Don't filter frame boundary.
- // (2). For tile boundary, we don't know whether the previous tile is
- // available or not, thus we handle it after all tiles are decoded.
- const bool is_vertical_border =
- (column == column_start) && is_vertical_block_border;
- if (column == GetDeblockPosition(column4x4_start_, subsampling_x) ||
- (skip && !is_vertical_border)) {
- column += kNum4x4BlocksWide[tx_size] << subsampling_x;
- continue;
- }
-
- // bp_left is the parameter of the left prediction block which
- // is guaranteed to be inside the tile.
- const BlockParameters& bp_left =
- *block_parameters_holder_.Find(row, column - horizontal_step);
- const uint8_t left_level =
- is_vertical_border
- ? bp_left.deblock_filter_level[vertical_level_index]
- : vertical_level;
- // We don't have to check if the left block is skipped or not,
- // because if the current transform block is on the edge of the coding
- // block, is_vertical_border is true; if it's not on the edge,
- // left skip is equal to skip.
- if (vertical_level != 0 || left_level != 0) {
- const TransformSize left_tx_size =
- (plane == kPlaneY)
- ? inter_transform_sizes_[row][column - horizontal_step]
- : bp_left.uv_transform_size;
- const LoopFilterTransformSizeId transform_size_id =
- GetTransformSizeIdWidth(tx_size, left_tx_size);
- const int r = row & (kNum4x4InLoopFilterMaskUnit - 1);
- const int c = column & (kNum4x4InLoopFilterMaskUnit - 1);
- const int shift = LoopFilterMask::GetShift(r, c);
- const int index = LoopFilterMask::GetIndex(r);
- const auto mask = static_cast<uint64_t>(1) << shift;
- masks->SetLeft(mask, unit_id, plane, transform_size_id, index);
- const uint8_t current_level =
- (vertical_level == 0) ? left_level : vertical_level;
- masks->SetLevel(current_level, unit_id, plane,
- kLoopFilterTypeVertical,
- LoopFilterMask::GetLevelOffset(r, c));
- }
- column += kNum4x4BlocksWide[tx_size] << subsampling_x;
- }
- }
-
- // Build bit mask for horizontal edges.
- const uint8_t horizontal_level =
- bp.deblock_filter_level[horizontal_level_index];
- for (int column = column_start; column < column_limit;
- column += horizontal_step) {
- for (int row = row_start; row < row_limit;) {
- const TransformSize tx_size = (plane == kPlaneY)
- ? inter_transform_sizes_[row][column]
- : bp.uv_transform_size;
-
- // (1). Don't filter frame boundary.
- // (2). For tile boundary, we don't know whether the previous tile is
- // available or not, thus we handle it after all tiles are decoded.
- const bool is_horizontal_border =
- (row == row_start) && is_horizontal_block_border;
- if (row == GetDeblockPosition(row4x4_start_, subsampling_y) ||
- (skip && !is_horizontal_border)) {
- row += kNum4x4BlocksHigh[tx_size] << subsampling_y;
- continue;
- }
-
- // bp_top is the parameter of the top prediction block which is
- // guaranteed to be inside the tile.
- const BlockParameters& bp_top =
- *block_parameters_holder_.Find(row - vertical_step, column);
- const uint8_t top_level =
- is_horizontal_border
- ? bp_top.deblock_filter_level[horizontal_level_index]
- : horizontal_level;
- // We don't have to check it the top block is skippped or not,
- // because if the current transform block is on the edge of the coding
- // block, is_horizontal_border is true; if it's not on the edge,
- // top skip is equal to skip.
- if (horizontal_level != 0 || top_level != 0) {
- const TransformSize top_tx_size =
- (plane == kPlaneY)
- ? inter_transform_sizes_[row - vertical_step][column]
- : bp_top.uv_transform_size;
- const LoopFilterTransformSizeId transform_size_id =
- static_cast<LoopFilterTransformSizeId>(
- std::min({kTransformHeightLog2[tx_size] - 2,
- kTransformHeightLog2[top_tx_size] - 2, 2}));
- const int r = row & (kNum4x4InLoopFilterMaskUnit - 1);
- const int c = column & (kNum4x4InLoopFilterMaskUnit - 1);
- const int shift = LoopFilterMask::GetShift(r, c);
- const int index = LoopFilterMask::GetIndex(r);
- const auto mask = static_cast<uint64_t>(1) << shift;
- masks->SetTop(mask, unit_id, plane, transform_size_id, index);
- const uint8_t current_level =
- (horizontal_level == 0) ? top_level : horizontal_level;
- masks->SetLevel(current_level, unit_id, plane,
- kLoopFilterTypeHorizontal,
- LoopFilterMask::GetLevelOffset(r, c));
- }
- row += kNum4x4BlocksHigh[tx_size] << subsampling_y;
- }
- }
+void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
+ if (frame_header_.refresh_frame_flags == 0 ||
+ IsIntraFrame(frame_header_.frame_type)) {
+ return;
}
-}
+ // Iterate over odd rows/columns beginning at the first odd row/column for the
+ // block. It is done this way because motion field mvs are only needed at a
+ // 8x8 granularity.
+ const int row_start4x4 = block.row4x4 | 1;
+ const int row_limit4x4 =
+ std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
+ if (row_start4x4 >= row_limit4x4) return;
+ const int column_start4x4 = block.column4x4 | 1;
+ const int column_limit4x4 =
+ std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
+ if (column_start4x4 >= column_limit4x4) return;
-void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
// The largest reference MV component that can be saved.
constexpr int kRefMvsLimit = (1 << 12) - 1;
const BlockParameters& bp = *block.bp;
- ReferenceFrameType reference_frame_to_store = kReferenceFrameNone;
- MotionVector mv_to_store = {};
+ ReferenceInfo* reference_info = current_frame_.reference_info();
for (int i = 1; i >= 0; --i) {
- if (bp.reference_frame[i] > kReferenceFrameIntra &&
- std::abs(bp.mv[i].mv[MotionVector::kRow]) <= kRefMvsLimit &&
- std::abs(bp.mv[i].mv[MotionVector::kColumn]) <= kRefMvsLimit &&
- GetRelativeDistance(
- reference_order_hint_
- [frame_header_.reference_frame_index[bp.reference_frame[i] -
- kReferenceFrameLast]],
- frame_header_.order_hint, sequence_header_.enable_order_hint,
- sequence_header_.order_hint_bits) < 0) {
- reference_frame_to_store = bp.reference_frame[i];
- mv_to_store = bp.mv[i];
- break;
- }
- }
- // Iterate over odd rows/columns beginning at the first odd row/column for the
- // block. It is done this way because motion field mvs are only needed at a
- // 8x8 granularity.
- const int row_start = block.row4x4 | 1;
- const int row_limit = std::min(block.row4x4 + kNum4x4BlocksHigh[block.size],
- frame_header_.rows4x4);
- const int column_start = block.column4x4 | 1;
- const int column_limit =
- std::min(block.column4x4 + kNum4x4BlocksWide[block.size],
- frame_header_.columns4x4);
- for (int row = row_start; row < row_limit; row += 2) {
- const int row_index = DivideBy2(row);
- ReferenceFrameType* const reference_frame_row_start =
- current_frame_.motion_field_reference_frame(row_index,
- DivideBy2(column_start));
- static_assert(sizeof(reference_frame_to_store) == sizeof(int8_t), "");
- memset(reference_frame_row_start, reference_frame_to_store,
- DivideBy2(column_limit - column_start + 1));
- if (reference_frame_to_store <= kReferenceFrameIntra) continue;
- for (int column = column_start; column < column_limit; column += 2) {
+ const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
+ // Must make a local copy so that StoreMotionFieldMvs() knows there is no
+ // overlap between load and store.
+ const MotionVector mv_to_store = bp.mv.mv[i];
+ const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]);
+ const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]);
+ if (reference_frame_to_store > kReferenceFrameIntra &&
+ // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
+ // absolute values and then compare with kRefMvsLimit to save a branch.
+ // The next line is equivalent to:
+ // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
+ (mv_row | mv_column) <= kRefMvsLimit &&
+ reference_info->relative_distance_from[reference_frame_to_store] < 0) {
+ const int row_start8x8 = DivideBy2(row_start4x4);
+ const int row_limit8x8 = DivideBy2(row_limit4x4);
+ const int column_start8x8 = DivideBy2(column_start4x4);
+ const int column_limit8x8 = DivideBy2(column_limit4x4);
+ const int rows = row_limit8x8 - row_start8x8;
+ const int columns = column_limit8x8 - column_start8x8;
+ const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
+ ReferenceFrameType* const reference_frame_row_start =
+ &reference_info
+ ->motion_field_reference_frame[row_start8x8][column_start8x8];
MotionVector* const mv =
- current_frame_.motion_field_mv(row_index, DivideBy2(column));
- *mv = mv_to_store;
+ &reference_info->motion_field_mv[row_start8x8][column_start8x8];
+
+ // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
+ // and simplifies std::fill() for these cases.
+ if (columns <= 1) {
+ // Don't change the above condition to (columns == 1).
+ // Condition (columns <= 1) may help the compiler simplify the inlining
+ // of the general case of StoreMotionFieldMvs() by eliminating the
+ // (columns == 0) case.
+ assert(columns == 1);
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ 1, reference_frame_row_start, mv);
+ } else if (columns == 2) {
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ 2, reference_frame_row_start, mv);
+ } else if (columns == 4) {
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ 4, reference_frame_row_start, mv);
+ } else if (columns == 8) {
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ 8, reference_frame_row_start, mv);
+ } else if (columns == 16) {
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ 16, reference_frame_row_start, mv);
+ } else if (columns < 16) {
+ // This always true condition (columns < 16) may help the compiler
+ // simplify the inlining of the following function.
+ // This general case is rare and usually only happens to the blocks
+ // which contain the right boundary of the frame.
+ StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
+ columns, reference_frame_row_start, mv);
+ } else {
+ assert(false);
+ }
+ return;
}
}
}