aboutsummaryrefslogtreecommitdiff
path: root/libgav1/src/dsp/convolve.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libgav1/src/dsp/convolve.cc')
-rw-r--r--libgav1/src/dsp/convolve.cc312
1 files changed, 152 insertions, 160 deletions
diff --git a/libgav1/src/dsp/convolve.cc b/libgav1/src/dsp/convolve.cc
index 5358473..c8df357 100644
--- a/libgav1/src/dsp/convolve.cc
+++ b/libgav1/src/dsp/convolve.cc
@@ -29,33 +29,44 @@ namespace libgav1 {
namespace dsp {
namespace {
-constexpr int kSubPixelMask = (1 << kSubPixelBits) - 1;
constexpr int kHorizontalOffset = 3;
constexpr int kVerticalOffset = 3;
-int GetFilterIndex(const int filter_index, const int length) {
- if (length <= 4) {
- if (filter_index == kInterpolationFilterEightTap ||
- filter_index == kInterpolationFilterEightTapSharp) {
- return 4;
- }
- if (filter_index == kInterpolationFilterEightTapSmooth) {
- return 5;
- }
- }
- return filter_index;
-}
+// Compound prediction output ranges from ConvolveTest.ShowRange.
+// Bitdepth: 8 Input range: [ 0, 255]
+// intermediate range: [ -7140, 23460]
+// first pass output range: [ -1785, 5865]
+// intermediate range: [ -328440, 589560]
+// second pass output range: [ 0, 255]
+// compound second pass output range: [ -5132, 9212]
+//
+// Bitdepth: 10 Input range: [ 0, 1023]
+// intermediate range: [ -28644, 94116]
+// first pass output range: [ -7161, 23529]
+// intermediate range: [-1317624, 2365176]
+// second pass output range: [ 0, 1023]
+// compound second pass output range: [ 3988, 61532]
+//
+// Bitdepth: 12 Input range: [ 0, 4095]
+// intermediate range: [ -114660, 376740]
+// first pass output range: [ -7166, 23546]
+// intermediate range: [-1318560, 2366880]
+// second pass output range: [ 0, 4095]
+// compound second pass output range: [ 3974, 61559]
template <int bitdepth, typename Pixel>
-void ConvolveScale2D_C(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int vertical_filter_index,
- const int inter_round_bits_vertical, const int subpixel_x,
- const int subpixel_y, const int step_x, const int step_y, const int width,
- const int height, void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveScale2D_C(const void* const reference,
+ const ptrdiff_t reference_stride,
+ const int horizontal_filter_index,
+ const int vertical_filter_index, const int subpixel_x,
+ const int subpixel_y, const int step_x, const int step_y,
+ const int width, const int height, void* prediction,
+ const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
+ constexpr int kRoundBitsVertical =
+ (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
@@ -65,7 +76,6 @@ void ConvolveScale2D_C(
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(2 * kMaxSuperBlockSizeInPixels + 8)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
- const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
const int max_pixel_value = (1 << bitdepth) - 1;
// Horizontal filter.
@@ -87,16 +97,13 @@ void ConvolveScale2D_C(
int p = subpixel_x;
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << (bitdepth + kFilterBits - 1);
+ int sum = 0;
const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
const int filter_id = (p >> 6) & kSubPixelMask;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
}
- assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
- intermediate[x] = static_cast<int16_t>(
- RightShiftWithRounding(sum, kRoundBitsHorizontal));
+ intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
p += step_x;
} while (++x < width);
@@ -107,26 +114,21 @@ void ConvolveScale2D_C(
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
- const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal;
int p = subpixel_y & 1023;
y = 0;
do {
const int filter_id = (p >> 6) & kSubPixelMask;
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << offset_bits;
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum +=
- kSubPixelFilters[filter_index][filter_id][k] *
+ kHalfSubPixelFilters[filter_index][filter_id][k] *
intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
x];
}
- assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
- dest[x] = static_cast<Pixel>(
- Clip3(RightShiftWithRounding(sum, inter_round_bits_vertical) -
- single_round_offset,
- 0, max_pixel_value));
+ dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
+ max_pixel_value);
} while (++x < width);
dest += dest_stride;
@@ -135,15 +137,23 @@ void ConvolveScale2D_C(
}
template <int bitdepth, typename Pixel>
-void ConvolveCompoundScale2D_C(
- const void* const reference, const ptrdiff_t reference_stride,
- const int horizontal_filter_index, const int vertical_filter_index,
- const int inter_round_bits_vertical, const int subpixel_x,
- const int subpixel_y, const int step_x, const int step_y, const int width,
- const int height, void* prediction, const ptrdiff_t pred_stride) {
+void ConvolveCompoundScale2D_C(const void* const reference,
+ const ptrdiff_t reference_stride,
+ const int horizontal_filter_index,
+ const int vertical_filter_index,
+ const int subpixel_x, const int subpixel_y,
+ const int step_x, const int step_y,
+ const int width, const int height,
+ void* prediction, const ptrdiff_t pred_stride) {
+ // All compound functions output to the predictor buffer with |pred_stride|
+ // equal to |width|.
+ assert(pred_stride == width);
+ // Compound functions start at 4x4.
+ assert(width >= 4 && height >= 4);
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
+ constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
const int intermediate_height =
(((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
kScaleSubPixelBits) +
@@ -172,16 +182,13 @@ void ConvolveCompoundScale2D_C(
int p = subpixel_x;
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << (bitdepth + kFilterBits - 1);
+ int sum = 0;
const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
const int filter_id = (p >> 6) & kSubPixelMask;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src_x[k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
}
- assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
- intermediate[x] = static_cast<int16_t>(
- RightShiftWithRounding(sum, kRoundBitsHorizontal));
+ intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
p += step_x;
} while (++x < width);
@@ -192,24 +199,22 @@ void ConvolveCompoundScale2D_C(
// Vertical filter.
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
- const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal;
int p = subpixel_y & 1023;
y = 0;
do {
const int filter_id = (p >> 6) & kSubPixelMask;
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << offset_bits;
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
sum +=
- kSubPixelFilters[filter_index][filter_id][k] *
+ kHalfSubPixelFilters[filter_index][filter_id][k] *
intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
x];
}
- assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
- dest[x] = static_cast<uint16_t>(
- RightShiftWithRounding(sum, inter_round_bits_vertical));
+ sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
+ sum += (bitdepth == 8) ? 0 : kCompoundOffset;
+ dest[x] = sum;
} while (++x < width);
dest += pred_stride;
@@ -221,15 +226,19 @@ template <int bitdepth, typename Pixel>
void ConvolveCompound2D_C(const void* const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
- const int vertical_filter_index,
- const int inter_round_bits_vertical,
- const int subpixel_x, const int subpixel_y,
- const int /*step_x*/, const int /*step_y*/,
- const int width, const int height, void* prediction,
+ const int vertical_filter_index, const int subpixel_x,
+ const int subpixel_y, const int width,
+ const int height, void* prediction,
const ptrdiff_t pred_stride) {
+ // All compound functions output to the predictor buffer with |pred_stride|
+ // equal to |width|.
+ assert(pred_stride == width);
+ // Compound functions start at 4x4.
+ assert(width >= 4 && height >= 4);
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
+ constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
const int intermediate_height = height + kSubPixelTaps - 1;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
@@ -249,18 +258,17 @@ void ConvolveCompound2D_C(const void* const reference,
kVerticalOffset * src_stride - kHorizontalOffset;
auto* dest = static_cast<uint16_t*>(prediction);
int filter_id = (subpixel_x >> 6) & kSubPixelMask;
+ // If |filter_id| == 0 then ConvolveVertical() should be called.
+ assert(filter_id != 0);
int y = 0;
do {
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << (bitdepth + kFilterBits - 1);
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
- assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
- intermediate[x] = static_cast<int16_t>(
- RightShiftWithRounding(sum, kRoundBitsHorizontal));
+ intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
} while (++x < width);
src += src_stride;
@@ -271,20 +279,20 @@ void ConvolveCompound2D_C(const void* const reference,
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
- const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal;
+ // If |filter_id| == 0 then ConvolveHorizontal() should be called.
+ assert(filter_id != 0);
y = 0;
do {
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << offset_bits;
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] *
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
intermediate[k * intermediate_stride + x];
}
- assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
- dest[x] = static_cast<uint16_t>(
- RightShiftWithRounding(sum, inter_round_bits_vertical));
+ sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
+ sum += (bitdepth == 8) ? 0 : kCompoundOffset;
+ dest[x] = sum;
} while (++x < width);
dest += pred_stride;
@@ -300,21 +308,20 @@ void ConvolveCompound2D_C(const void* const reference,
template <int bitdepth, typename Pixel>
void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
const int horizontal_filter_index,
- const int vertical_filter_index,
- const int inter_round_bits_vertical, const int subpixel_x,
- const int subpixel_y, const int /*step_x*/,
- const int /*step_y*/, const int width, const int height,
+ const int vertical_filter_index, const int subpixel_x,
+ const int subpixel_y, const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
+ constexpr int kRoundBitsVertical =
+ (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
const int intermediate_height = height + kSubPixelTaps - 1;
// The output of the horizontal filter, i.e. the intermediate_result, is
// guaranteed to fit in int16_t.
int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
(kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
const int intermediate_stride = kMaxSuperBlockSizeInPixels;
- const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
const int max_pixel_value = (1 << bitdepth) - 1;
// Horizontal filter.
@@ -330,18 +337,17 @@ void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
int filter_id = (subpixel_x >> 6) & kSubPixelMask;
+ // If |filter_id| == 0 then ConvolveVertical() should be called.
+ assert(filter_id != 0);
int y = 0;
do {
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << (bitdepth + kFilterBits - 1);
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
- assert(sum >= 0 && sum < (1 << (bitdepth + kFilterBits + 1)));
- intermediate[x] = static_cast<int16_t>(
- RightShiftWithRounding(sum, kRoundBitsHorizontal));
+ intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
} while (++x < width);
src += src_stride;
@@ -352,22 +358,19 @@ void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
filter_index = GetFilterIndex(vertical_filter_index, height);
intermediate = intermediate_result;
filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
- const int offset_bits = bitdepth + 2 * kFilterBits - kRoundBitsHorizontal;
+ // If |filter_id| == 0 then ConvolveHorizontal() should be called.
+ assert(filter_id != 0);
y = 0;
do {
int x = 0;
do {
- // An offset to guarantee the sum is non negative.
- int sum = 1 << offset_bits;
+ int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] *
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
intermediate[k * intermediate_stride + x];
}
- assert(sum >= 0 && sum < (1 << (offset_bits + 2)));
- dest[x] = static_cast<Pixel>(
- Clip3(RightShiftWithRounding(sum, inter_round_bits_vertical) -
- single_round_offset,
- 0, max_pixel_value));
+ dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
+ max_pixel_value);
} while (++x < width);
dest += dest_stride;
@@ -385,9 +388,7 @@ void ConvolveHorizontal_C(const void* const reference,
const ptrdiff_t reference_stride,
const int horizontal_filter_index,
const int /*vertical_filter_index*/,
- const int /*inter_round_bits_vertical*/,
const int subpixel_x, const int /*subpixel_y*/,
- const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
@@ -407,11 +408,10 @@ void ConvolveHorizontal_C(const void* const reference,
do {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
- sum = RightShiftWithRounding(sum, kRoundBitsHorizontal);
- dest[x] = static_cast<Pixel>(
- Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value));
+ sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
+ dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
} while (++x < width);
src += src_stride;
@@ -429,9 +429,7 @@ void ConvolveVertical_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
- const int /*inter_round_bits_vertical*/,
const int /*subpixel_x*/, const int subpixel_y,
- const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const int filter_index = GetFilterIndex(vertical_filter_index, height);
@@ -441,18 +439,9 @@ void ConvolveVertical_C(const void* const reference,
auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
- // First filter is always a copy.
- if (filter_id == 0) {
- // Move |src| down the actual values and not the start of the context.
- src = static_cast<const Pixel*>(reference);
- int y = 0;
- do {
- memcpy(dest, src, width * sizeof(src[0]));
- src += src_stride;
- dest += dest_stride;
- } while (++y < height);
- return;
- }
+ // Copy filters must call ConvolveCopy().
+ assert(filter_id != 0);
+
const int max_pixel_value = (1 << bitdepth) - 1;
int y = 0;
do {
@@ -460,11 +449,11 @@ void ConvolveVertical_C(const void* const reference,
do {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] *
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
src[k * src_stride + x];
}
- dest[x] = static_cast<Pixel>(
- Clip3(RightShiftWithRounding(sum, kFilterBits), 0, max_pixel_value));
+ dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
+ max_pixel_value);
} while (++x < width);
src += src_stride;
@@ -477,10 +466,8 @@ void ConvolveCopy_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
- const int /*inter_round_bits_vertical*/,
const int /*subpixel_x*/, const int /*subpixel_y*/,
- const int /*step_x*/, const int /*step_y*/, const int width,
- const int height, void* prediction,
+ const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
const auto* src = static_cast<const uint8_t*>(reference);
auto* dest = static_cast<uint8_t*>(prediction);
@@ -497,23 +484,29 @@ void ConvolveCompoundCopy_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int /*vertical_filter_index*/,
- const int /*inter_round_bits_vertical*/,
const int /*subpixel_x*/, const int /*subpixel_y*/,
- const int /*step_x*/, const int /*step_y*/,
const int width, const int height, void* prediction,
const ptrdiff_t pred_stride) {
+ // All compound functions output to the predictor buffer with |pred_stride|
+ // equal to |width|.
+ assert(pred_stride == width);
+ // Compound functions start at 4x4.
+ assert(width >= 4 && height >= 4);
+ constexpr int kRoundBitsVertical =
+ ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
+ : kInterRoundBitsVertical) -
+ kInterRoundBitsCompoundVertical;
const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<uint16_t*>(prediction);
- const int compound_round_offset =
- (1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
int y = 0;
do {
int x = 0;
do {
- dest[x] = (src[x] << 4) + compound_round_offset;
+ int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
+ sum += src[x];
+ dest[x] = sum << kRoundBitsVertical;
} while (++x < width);
-
src += src_stride;
dest += pred_stride;
} while (++y < height);
@@ -528,10 +521,13 @@ template <int bitdepth, typename Pixel>
void ConvolveCompoundHorizontal_C(
const void* const reference, const ptrdiff_t reference_stride,
const int horizontal_filter_index, const int /*vertical_filter_index*/,
- const int inter_round_bits_vertical, const int subpixel_x,
- const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t pred_stride) {
+ const int subpixel_x, const int /*subpixel_y*/, const int width,
+ const int height, void* prediction, const ptrdiff_t pred_stride) {
+ // All compound functions output to the predictor buffer with |pred_stride|
+ // equal to |width|.
+ assert(pred_stride == width);
+ // Compound functions start at 4x4.
+ assert(width >= 4 && height >= 4);
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
@@ -540,19 +536,19 @@ void ConvolveCompoundHorizontal_C(
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
auto* dest = static_cast<uint16_t*>(prediction);
const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
- const int bits_shift = kFilterBits - inter_round_bits_vertical;
- const int compound_round_offset =
- (1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
+ // Copy filters must call ConvolveCopy().
+ assert(filter_id != 0);
int y = 0;
do {
int x = 0;
do {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] * src[x + k];
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
}
- sum = RightShiftWithRounding(sum, kRoundBitsHorizontal) << bits_shift;
- dest[x] = sum + compound_round_offset;
+ sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
+ sum += (bitdepth == 8) ? 0 : kCompoundOffset;
+ dest[x] = sum;
} while (++x < width);
src += src_stride;
@@ -570,11 +566,14 @@ void ConvolveCompoundVertical_C(const void* const reference,
const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/,
const int vertical_filter_index,
- const int inter_round_bits_vertical,
const int /*subpixel_x*/, const int subpixel_y,
- const int /*step_x*/, const int /*step_y*/,
const int width, const int height,
void* prediction, const ptrdiff_t pred_stride) {
+ // All compound functions output to the predictor buffer with |pred_stride|
+ // equal to |width|.
+ assert(pred_stride == width);
+ // Compound functions start at 4x4.
+ assert(width >= 4 && height >= 4);
constexpr int kRoundBitsHorizontal = (bitdepth == 12)
? kInterRoundBitsHorizontal12bpp
: kInterRoundBitsHorizontal;
@@ -584,23 +583,21 @@ void ConvolveCompoundVertical_C(const void* const reference,
static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
auto* dest = static_cast<uint16_t*>(prediction);
const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
- const int bits_shift = kFilterBits - kRoundBitsHorizontal;
- const int compound_round_offset =
- (1 << (bitdepth + 4)) + (1 << (bitdepth + 3));
+ // Copy filters must call ConvolveCopy().
+ assert(filter_id != 0);
int y = 0;
do {
int x = 0;
do {
int sum = 0;
for (int k = 0; k < kSubPixelTaps; ++k) {
- sum += kSubPixelFilters[filter_index][filter_id][k] *
+ sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
src[k * src_stride + x];
}
- dest[x] = RightShiftWithRounding(LeftShift(sum, bits_shift),
- inter_round_bits_vertical) +
- compound_round_offset;
+ sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
+ sum += (bitdepth == 8) ? 0 : kCompoundOffset;
+ dest[x] = sum;
} while (++x < width);
-
src += src_stride;
dest += pred_stride;
} while (++y < height);
@@ -616,13 +613,11 @@ template <int bitdepth, typename Pixel>
void ConvolveIntraBlockCopy2D_C(
const void* const reference, const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*inter_round_bits_vertical*/, const int /*subpixel_x*/,
- const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t pred_stride) {
- const auto* src = reinterpret_cast<const Pixel*>(reference);
+ const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
+ const int height, void* prediction, const ptrdiff_t pred_stride) {
+ const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
- auto* dest = reinterpret_cast<Pixel*>(prediction);
+ auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const int intermediate_height = height + 1;
uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
@@ -647,8 +642,8 @@ void ConvolveIntraBlockCopy2D_C(
do {
int x = 0;
do {
- dest[x] = static_cast<Pixel>(
- RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2));
+ dest[x] =
+ RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
} while (++x < width);
intermediate += width;
@@ -668,21 +663,18 @@ template <int bitdepth, typename Pixel, bool is_horizontal>
void ConvolveIntraBlockCopy1D_C(
const void* const reference, const ptrdiff_t reference_stride,
const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
- const int /*inter_round_bits_vertical*/, const int /*subpixel_x*/,
- const int /*subpixel_y*/, const int /*step_x*/, const int /*step_y*/,
- const int width, const int height, void* prediction,
- const ptrdiff_t pred_stride) {
- const auto* src = reinterpret_cast<const Pixel*>(reference);
+ const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
+ const int height, void* prediction, const ptrdiff_t pred_stride) {
+ const auto* src = static_cast<const Pixel*>(reference);
const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
- auto* dest = reinterpret_cast<Pixel*>(prediction);
+ auto* dest = static_cast<Pixel*>(prediction);
const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
int y = 0;
do {
int x = 0;
do {
- dest[x] = static_cast<Pixel>(
- RightShiftWithRounding(src[x] + src[x + offset], 1));
+ dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
} while (++x < width);
src += src_stride;