aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaines Sy <hainesy@google.com>2024-04-23 20:25:28 -0700
committerHaines Sy <hainesy@google.com>2024-04-24 07:28:26 +0000
commit9c08eee2df1ce5ece28c25568f571fd1c98fc40d (patch)
treebc5b0fd746b8e33e1c9cc711275fe571b8386755
parentf76d5f0595fa6c550bea6516c3f43fa9d84bc2b3 (diff)
downloadbinary_translation-9c08eee2df1ce5ece28c25568f571fd1c98fc40d.tar.gz
Implement Rem(u) vv and vx as vector intrinsics
Test: m berberis_all Change-Id: I3c4c0cdf6febafaa022b136d7cbbb964c1011dcc
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h4
-rw-r--r--interpreter/riscv64/interpreter.h12
-rw-r--r--interpreter/riscv64/interpreter_test.cc146
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h6
4 files changed, 168 insertions, 0 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 6a04cf3d..c2eb4a65 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -466,6 +466,8 @@ class Decoder {
kVmxnormm = 0b011111,
kVdivuvv = 0b100000,
kVdivvv = 0b100001,
+ kVremuvv = 0b100010,
+ kVremvv = 0b100011,
kVmulhuvv = 0b100100,
kVmulvv = 0b100101,
kVmulhsuvv = 0b100110,
@@ -500,6 +502,8 @@ class Decoder {
kVRXUnary0 = 0b010000,
kVdivuvx = 0b100000,
kVdivvx = 0b100001,
+ kVremuvx = 0b100010,
+ kVremvx = 0b100011,
kVmulhuvx = 0b100100,
kVmulvx = 0b100101,
kVmulhsuvx = 0b100110,
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 3a43bc68..469b4728 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -2335,6 +2335,12 @@ class Interpreter {
case Decoder::VOpMVvOpcode::kVdivvv:
return OpVectorvv<intrinsics::Vdivvv<SignedType>, SignedType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVremuvv:
+ return OpVectorvv<intrinsics::Vremvv<UnsignedType>, UnsignedType, vlmul, vta, vma>(
+ args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVremvv:
+ return OpVectorvv<intrinsics::Vremvv<SignedType>, SignedType, vlmul, vta, vma>(
+ args.dst, args.src1, args.src2);
case Decoder::VOpMVvOpcode::kVmulhuvv:
return OpVectorvv<intrinsics::Vmulhvv<UnsignedType>, UnsignedType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
@@ -2452,6 +2458,12 @@ class Interpreter {
case Decoder::VOpMVxOpcode::kVdivvx:
return OpVectorvx<intrinsics::Vdivvx<SignedType>, SignedType, vlmul, vta, vma>(
args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
+ case Decoder::VOpMVxOpcode::kVremuvx:
+ return OpVectorvx<intrinsics::Vremvx<UnsignedType>, UnsignedType, vlmul, vta, vma>(
+ args.dst, args.src1, MaybeTruncateTo<UnsignedType>(arg2));
+ case Decoder::VOpMVxOpcode::kVremvx:
+ return OpVectorvx<intrinsics::Vremvx<SignedType>, SignedType, vlmul, vta, vma>(
+ args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
case Decoder::VOpMVxOpcode::kVmulhsuvx:
return OpVectorvx<intrinsics::Vmulhsuvx<SignedType>, SignedType, vlmul, vta, vma>(
args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 3042d413..2b108445 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -10922,6 +10922,152 @@ TEST_F(Riscv64InterpreterTest, TestVdiv) {
kVectorComparisonSource);
}
+TEST_F(Riscv64InterpreterTest, TestVrem) {
+ TestVectorInstruction(
+ 0x890c2457, // vremu.vv v8, v16, v24, v0.t
+ {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {10, 13, 10, 13, 0, 0, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0},
+ {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 128, 22, 64},
+ {74, 72, 70, 68, 65, 64, 62, 60, 17, 17, 17, 17, 17, 17, 17, 17},
+ {116, 125, 112, 121, 107, 117, 104, 113, 101, 109, 98, 105, 94, 101, 90, 97},
+ {84, 93, 80, 89, 79, 85, 76, 81, 68, 77, 65, 73, 61, 69, 57, 65},
+ {187, 187, 187, 169, 187, 187, 187, 169, 187, 187, 187, 169, 187, 187, 187, 169},
+ {169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169}},
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0d81, 0x0d81, 0x0000, 0x0000, 0x0000, 0x0d81, 0x0000, 0x0000},
+ {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0x8000, 0x4016},
+ {0x484a, 0x4446, 0x4041, 0x3c3e, 0x1111, 0x1111, 0x1111, 0x1111},
+ {0x7d74, 0x7970, 0x756b, 0x7168, 0x6d65, 0x6962, 0x655e, 0x615a},
+ {0x5d54, 0x5950, 0x554f, 0x514c, 0x4d44, 0x4941, 0x453d, 0x4139},
+ {0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb},
+ {0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0d81'0d81, 0x0000'0000, 0x0d80'cccd, 0x0000'0000},
+ {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000},
+ {0x4446'484a, 0x3c3e'4041, 0x1111'1111, 0x1111'1111},
+ {0x7970'7d74, 0x7168'756b, 0x6962'6d65, 0x615a'655e},
+ {0x5950'5d54, 0x514c'554f, 0x4941'4d44, 0x4139'453d},
+ {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb},
+ {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x1111'1110'51c5'51c6, 0x1111'1110'51c5'1112},
+ {0x4016'4016'4016'4016, 0x4016'8000'0000'0000},
+ {0x3c3e'4041'4446'484a, 0x1111'1111'1111'1111},
+ {0x7168'756b'7970'7d74, 0x615a'655e'6962'6d65},
+ {0x514c'554f'5950'5d54, 0x4139'453d'4941'4d44},
+ {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb},
+ {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}},
+ kVectorComparisonSource);
+
+ TestVectorInstruction(
+ 0x8900e457, // vremu.vx v8, v16, x1, v0.t
+ {{5, 70, 5, 70, 5, 70, 5, 70, 64, 64, 64, 64, 85, 85, 85, 85},
+ {10, 64, 10, 64, 85, 85, 85, 85, 0, 0, 10, 64, 85, 85, 85, 85},
+ {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 128, 22, 64},
+ {0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17},
+ {74, 85, 74, 85, 74, 85, 74, 85, 76, 85, 76, 85, 76, 85, 76, 85},
+ {74, 85, 74, 85, 78, 85, 78, 85, 75, 85, 75, 85, 75, 85, 75, 85},
+ {17, 17, 17, 169, 17, 17, 17, 169, 17, 17, 17, 169, 17, 17, 17, 169},
+ {169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169}},
+ {{0x455b, 0x455b, 0x455b, 0x455b, 0x4040, 0x4040, 0x5555, 0x5555},
+ {0x40b4, 0x40b4, 0x5555, 0x5555, 0x0000, 0x40b4, 0x5555, 0x5555},
+ {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0x8000, 0x4016},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x1111, 0x1111, 0x1111, 0x1111},
+ {0x554a, 0x554a, 0x554a, 0x554a, 0x554c, 0x554c, 0x554c, 0x554c},
+ {0x554a, 0x554a, 0x554e, 0x554e, 0x554b, 0x554b, 0x554b, 0x554b},
+ {0x1111, 0xa9bb, 0x1111, 0xa9bb, 0x1111, 0xa9bb, 0x1111, 0xa9bb},
+ {0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9}},
+ {{0x455b'455b, 0x455b'455b, 0x4040'4040, 0x5555'5555},
+ {0x40b4'40b4, 0x5555'5555, 0x40b4'0000, 0x5555'5555},
+ {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000},
+ {0x0000'0000, 0x0000'0000, 0x1111'1111, 0x1111'1111},
+ {0x554a'554a, 0x554a'554a, 0x554c'554c, 0x554c'554c},
+ {0x554a'554a, 0x554e'554e, 0x554b'554b, 0x554b'554b},
+ {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb},
+ {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}},
+ {{0x455b'455b'455b'455b, 0x5555'5554'9595'9596},
+ {0x5555'5554'9609'960a, 0x5555'5554'9609'5556},
+ {0x4016'4016'4016'4016, 0x4016'8000'0000'0000},
+ {0x0000'0000'0000'0000, 0x1111'1111'1111'1111},
+ {0x554a'554a'554a'554a, 0x554c'554c'554c'554c},
+ {0x554e'554e'554a'554a, 0x554b'554b'554b'554b},
+ {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb},
+ {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}},
+ kVectorComparisonSource);
+
+ TestVectorInstruction(
+ 0x8d0c2457, // vrem.vv v8, v16, v24, v0.t
+ {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {248, 13, 248, 13, 255, 255, 255, 255, 0, 0, 248, 13, 255, 255, 255, 255},
+ {7, 0, 7, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0},
+ {170, 170, 170, 170, 170, 170, 170, 170, 17, 17, 17, 17, 17, 17, 17, 17},
+ {244, 255, 244, 255, 244, 255, 244, 255, 246, 255, 246, 255, 246, 255, 246, 255},
+ {244, 255, 244, 255, 248, 255, 248, 255, 245, 255, 245, 255, 245, 255, 245, 255},
+ {251, 249, 247, 227, 242, 241, 239, 219, 234, 233, 231, 253, 227, 225, 223, 237},
+ {233, 229, 253, 247, 238, 235, 249, 241, 244, 253, 253, 249, 249, 253, 253, 255}},
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0d81, 0x0d81, 0xffff, 0xffff, 0x0000, 0x0d81, 0xffff, 0xffff},
+ {0x000b, 0x000b, 0x000b, 0x000b, 0x0000, 0x0000, 0xfff8, 0x000b},
+ {0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x1111, 0x1111, 0x1111, 0x1111},
+ {0xfff4, 0xfff4, 0xfff4, 0xfff4, 0xfff6, 0xfff6, 0xfff6, 0xfff6},
+ {0xfff4, 0xfff4, 0xfff8, 0xfff8, 0xfff5, 0xfff5, 0xfff5, 0xfff5},
+ {0xf8fb, 0xe2f7, 0xf0f2, 0xdaef, 0xe8ea, 0xfc13, 0xe0e3, 0xec03},
+ {0xe3e9, 0xf4fd, 0xfe05, 0xff0d, 0xf803, 0xfb15, 0xff31, 0xfffd}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0d81'0d81, 0xffff'ffff, 0x0d80'cccd, 0xffff'ffff},
+ {0x000b'fb79, 0x000b'fb79, 0x0000'0000, 0x000c'3b63},
+ {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111},
+ {0xfff4'fff4, 0xfff4'fff4, 0xfff6'fff6, 0xfff6'fff6},
+ {0xfff4'fff4, 0xfff8'fff8, 0xfff5'fff5, 0xfff5'fff5},
+ {0xe2f6'f8fb, 0xdaee'f0f2, 0xfc12'1619, 0xec02'060b},
+ {0xf4fb'0109, 0xff09'131c, 0xfb0d'1f30, 0xffaa'5551}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0xffff'ffff'40b4'40b4, 0xffff'ffff'40b4'0000},
+ {0x000c'000c'000b'fb79, 0x000c'3ff5'bff5'bb63},
+ {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111},
+ {0xfff4'fff4'fff4'fff4, 0xfff6'fff6'fff6'fff6},
+ {0xfff8'fff8'fff4'fff4, 0xfff5'fff5'fff5'fff5},
+ {0xdaee'f0f1'e2f6'f8fb, 0xec02'0609'fc12'1619},
+ {0xff09'1318'2731'3b49, 0xffaa'54ff'aa54'ffa4}},
+ kVectorComparisonSource);
+
+ TestVectorInstruction(
+ 0x8d00e457, // vrem.vx v8, v16, x1, v0.t
+ {{5, 240, 5, 240, 5, 240, 5, 240, 64, 64, 64, 64, 255, 255, 255, 255},
+ {180, 64, 180, 64, 255, 255, 255, 255, 0, 0, 180, 64, 255, 255, 255, 255},
+ {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 214, 22, 64},
+ {0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17},
+ {244, 255, 244, 255, 244, 255, 244, 255, 246, 255, 246, 255, 246, 255, 246, 255},
+ {244, 255, 244, 255, 248, 255, 248, 255, 245, 255, 245, 255, 245, 255, 245, 255},
+ {187, 187, 187, 255, 187, 187, 187, 255, 187, 187, 187, 255, 187, 187, 187, 255},
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+ {{0xf005, 0xf005, 0xf005, 0xf005, 0x4040, 0x4040, 0xffff, 0xffff},
+ {0x40b4, 0x40b4, 0xffff, 0xffff, 0x0000, 0x40b4, 0xffff, 0xffff},
+ {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0xd556, 0x4016},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x1111, 0x1111, 0x1111, 0x1111},
+ {0xfff4, 0xfff4, 0xfff4, 0xfff4, 0xfff6, 0xfff6, 0xfff6, 0xfff6},
+ {0xfff4, 0xfff4, 0xfff8, 0xfff8, 0xfff5, 0xfff5, 0xfff5, 0xfff5},
+ {0xbbbb, 0xff11, 0xbbbb, 0xff11, 0xbbbb, 0xff11, 0xbbbb, 0xff11},
+ {0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff}},
+ {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0xffff'ffff},
+ {0x40b4'40b4, 0xffff'ffff, 0x40b4'0000, 0xffff'ffff},
+ {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000},
+ {0x0000'0000, 0x0000'0000, 0x1111'1111, 0x1111'1111},
+ {0xfff4'fff4, 0xfff4'fff4, 0xfff6'fff6, 0xfff6'fff6},
+ {0xfff4'fff4, 0xfff8'fff8, 0xfff5'fff5, 0xfff5'fff5},
+ {0xff11'1111, 0xff11'1111, 0xff11'1111, 0xff11'1111},
+ {0xfefe'feff, 0xfefe'feff, 0xfefe'feff, 0xfefe'feff}},
+ {{0xf005'f005'f005'f005, 0xffff'ffff'4040'4040},
+ {0xffff'ffff'40b4'40b4, 0xffff'ffff'40b4'0000},
+ {0x4016'4016'4016'4016, 0x4016'8000'0000'0000},
+ {0x0000'0000'0000'0000, 0x1111'1111'1111'1111},
+ {0xfff4'fff4'fff4'fff4, 0xfff6'fff6'fff6'fff6},
+ {0xfff8'fff8'fff4'fff4, 0xfff5'fff5'fff5'fff5},
+ {0xff11'1110'ff11'1111, 0xff11'1110'ff11'1111},
+ {0xfefe'fefe'fefe'feff, 0xfefe'fefe'fefe'feff}},
+ kVectorComparisonSource);
+}
+
TEST_F(Riscv64InterpreterTest, TestVslideup) {
// With slide offset equal zero, this is equivalent to Vmv.
TestVectorInstruction(
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index cd717917..df061cc2 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -1034,6 +1034,12 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(
div,
ElementType{std::get<0>(Div(static_cast<typename ElementType::BaseType>(args)...))})
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(
+ rem,
+ ElementType{std::get<0>(Rem(static_cast<typename ElementType::BaseType>(args)...))})
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(
+ rem,
+ ElementType{std::get<0>(Rem(static_cast<typename ElementType::BaseType>(args)...))})
DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV(add, (args + ...))
DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VX(add, (args + ...))