diff options
author | Haines Sy <hainesy@google.com> | 2024-04-23 20:25:28 -0700 |
---|---|---|
committer | Haines Sy <hainesy@google.com> | 2024-04-24 07:28:26 +0000 |
commit | 9c08eee2df1ce5ece28c25568f571fd1c98fc40d (patch) | |
tree | bc5b0fd746b8e33e1c9cc711275fe571b8386755 | |
parent | f76d5f0595fa6c550bea6516c3f43fa9d84bc2b3 (diff) | |
download | binary_translation-9c08eee2df1ce5ece28c25568f571fd1c98fc40d.tar.gz |
Implement Rem(u) vv and vx as vector intrinsics
Test: m berberis_all
Change-Id: I3c4c0cdf6febafaa022b136d7cbbb964c1011dcc
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 4 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.h | 12 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 146 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 6 |
4 files changed, 168 insertions, 0 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index 6a04cf3d..c2eb4a65 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -466,6 +466,8 @@ class Decoder { kVmxnormm = 0b011111, kVdivuvv = 0b100000, kVdivvv = 0b100001, + kVremuvv = 0b100010, + kVremvv = 0b100011, kVmulhuvv = 0b100100, kVmulvv = 0b100101, kVmulhsuvv = 0b100110, @@ -500,6 +502,8 @@ class Decoder { kVRXUnary0 = 0b010000, kVdivuvx = 0b100000, kVdivvx = 0b100001, + kVremuvx = 0b100010, + kVremvx = 0b100011, kVmulhuvx = 0b100100, kVmulvx = 0b100101, kVmulhsuvx = 0b100110, diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 3a43bc68..469b4728 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -2335,6 +2335,12 @@ class Interpreter { case Decoder::VOpMVvOpcode::kVdivvv: return OpVectorvv<intrinsics::Vdivvv<SignedType>, SignedType, vlmul, vta, vma>( args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVremuvv: + return OpVectorvv<intrinsics::Vremvv<UnsignedType>, UnsignedType, vlmul, vta, vma>( + args.dst, args.src1, args.src2); + case Decoder::VOpMVvOpcode::kVremvv: + return OpVectorvv<intrinsics::Vremvv<SignedType>, SignedType, vlmul, vta, vma>( + args.dst, args.src1, args.src2); case Decoder::VOpMVvOpcode::kVmulhuvv: return OpVectorvv<intrinsics::Vmulhvv<UnsignedType>, UnsignedType, vlmul, vta, vma>( args.dst, args.src1, args.src2); @@ -2452,6 +2458,12 @@ class Interpreter { case Decoder::VOpMVxOpcode::kVdivvx: return OpVectorvx<intrinsics::Vdivvx<SignedType>, SignedType, vlmul, vta, vma>( args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2)); + case Decoder::VOpMVxOpcode::kVremuvx: + return OpVectorvx<intrinsics::Vremvx<UnsignedType>, UnsignedType, vlmul, vta, vma>( + args.dst, args.src1, MaybeTruncateTo<UnsignedType>(arg2)); + case Decoder::VOpMVxOpcode::kVremvx: + return OpVectorvx<intrinsics::Vremvx<SignedType>, SignedType, vlmul, vta, vma>( + args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2)); case Decoder::VOpMVxOpcode::kVmulhsuvx: return OpVectorvx<intrinsics::Vmulhsuvx<SignedType>, SignedType, vlmul, vta, vma>( args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2)); diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 3042d413..2b108445 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -10922,6 +10922,152 @@ TEST_F(Riscv64InterpreterTest, TestVdiv) { kVectorComparisonSource); } +TEST_F(Riscv64InterpreterTest, TestVrem) { + TestVectorInstruction( + 0x890c2457, // vremu.vv v8, v16, v24, v0.t + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {10, 13, 10, 13, 0, 0, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0}, + {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 128, 22, 64}, + {74, 72, 70, 68, 65, 64, 62, 60, 17, 17, 17, 17, 17, 17, 17, 17}, + {116, 125, 112, 121, 107, 117, 104, 113, 101, 109, 98, 105, 94, 101, 90, 97}, + {84, 93, 80, 89, 79, 85, 76, 81, 68, 77, 65, 73, 61, 69, 57, 65}, + {187, 187, 187, 169, 187, 187, 187, 169, 187, 187, 187, 169, 187, 187, 187, 169}, + {169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169}}, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0d81, 0x0d81, 0x0000, 0x0000, 0x0000, 0x0d81, 0x0000, 0x0000}, + {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0x8000, 0x4016}, + {0x484a, 0x4446, 0x4041, 0x3c3e, 0x1111, 0x1111, 0x1111, 0x1111}, + {0x7d74, 0x7970, 0x756b, 0x7168, 0x6d65, 0x6962, 0x655e, 0x615a}, + {0x5d54, 0x5950, 0x554f, 0x514c, 0x4d44, 0x4941, 0x453d, 0x4139}, + {0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb, 0xbbbb, 0xa9bb}, + {0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0d81'0d81, 0x0000'0000, 0x0d80'cccd, 0x0000'0000}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, + {0x4446'484a, 0x3c3e'4041, 0x1111'1111, 0x1111'1111}, + {0x7970'7d74, 0x7168'756b, 0x6962'6d65, 0x615a'655e}, + {0x5950'5d54, 0x514c'554f, 0x4941'4d44, 0x4139'453d}, + {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, + {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x1111'1110'51c5'51c6, 0x1111'1110'51c5'1112}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, + {0x3c3e'4041'4446'484a, 0x1111'1111'1111'1111}, + {0x7168'756b'7970'7d74, 0x615a'655e'6962'6d65}, + {0x514c'554f'5950'5d54, 0x4139'453d'4941'4d44}, + {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb}, + {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}}, + kVectorComparisonSource); + + TestVectorInstruction( + 0x8900e457, // vremu.vx v8, v16, x1, v0.t + {{5, 70, 5, 70, 5, 70, 5, 70, 64, 64, 64, 64, 85, 85, 85, 85}, + {10, 64, 10, 64, 85, 85, 85, 85, 0, 0, 10, 64, 85, 85, 85, 85}, + {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 128, 22, 64}, + {0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17}, + {74, 85, 74, 85, 74, 85, 74, 85, 76, 85, 76, 85, 76, 85, 76, 85}, + {74, 85, 74, 85, 78, 85, 78, 85, 75, 85, 75, 85, 75, 85, 75, 85}, + {17, 17, 17, 169, 17, 17, 17, 169, 17, 17, 17, 169, 17, 17, 17, 169}, + {169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169}}, + {{0x455b, 0x455b, 0x455b, 0x455b, 0x4040, 0x4040, 0x5555, 0x5555}, + {0x40b4, 0x40b4, 0x5555, 0x5555, 0x0000, 0x40b4, 0x5555, 0x5555}, + {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0x8000, 0x4016}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x1111, 0x1111, 0x1111, 0x1111}, + {0x554a, 0x554a, 0x554a, 0x554a, 0x554c, 0x554c, 0x554c, 0x554c}, + {0x554a, 0x554a, 0x554e, 0x554e, 0x554b, 0x554b, 0x554b, 0x554b}, + {0x1111, 0xa9bb, 0x1111, 0xa9bb, 0x1111, 0xa9bb, 0x1111, 0xa9bb}, + {0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9, 0xa9a9}}, + {{0x455b'455b, 0x455b'455b, 0x4040'4040, 0x5555'5555}, + {0x40b4'40b4, 0x5555'5555, 0x40b4'0000, 0x5555'5555}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, + {0x0000'0000, 0x0000'0000, 0x1111'1111, 0x1111'1111}, + {0x554a'554a, 0x554a'554a, 0x554c'554c, 0x554c'554c}, + {0x554a'554a, 0x554e'554e, 0x554b'554b, 0x554b'554b}, + {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, + {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, + {{0x455b'455b'455b'455b, 0x5555'5554'9595'9596}, + {0x5555'5554'9609'960a, 0x5555'5554'9609'5556}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, + {0x0000'0000'0000'0000, 0x1111'1111'1111'1111}, + {0x554a'554a'554a'554a, 0x554c'554c'554c'554c}, + {0x554e'554e'554a'554a, 0x554b'554b'554b'554b}, + {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb}, + {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}}, + kVectorComparisonSource); + + TestVectorInstruction( + 0x8d0c2457, // vrem.vv v8, v16, v24, v0.t + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {248, 13, 248, 13, 255, 255, 255, 255, 0, 0, 248, 13, 255, 255, 255, 255}, + {7, 0, 7, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0}, + {170, 170, 170, 170, 170, 170, 170, 170, 17, 17, 17, 17, 17, 17, 17, 17}, + {244, 255, 244, 255, 244, 255, 244, 255, 246, 255, 246, 255, 246, 255, 246, 255}, + {244, 255, 244, 255, 248, 255, 248, 255, 245, 255, 245, 255, 245, 255, 245, 255}, + {251, 249, 247, 227, 242, 241, 239, 219, 234, 233, 231, 253, 227, 225, 223, 237}, + {233, 229, 253, 247, 238, 235, 249, 241, 244, 253, 253, 249, 249, 253, 253, 255}}, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0d81, 0x0d81, 0xffff, 0xffff, 0x0000, 0x0d81, 0xffff, 0xffff}, + {0x000b, 0x000b, 0x000b, 0x000b, 0x0000, 0x0000, 0xfff8, 0x000b}, + {0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x1111, 0x1111, 0x1111, 0x1111}, + {0xfff4, 0xfff4, 0xfff4, 0xfff4, 0xfff6, 0xfff6, 0xfff6, 0xfff6}, + {0xfff4, 0xfff4, 0xfff8, 0xfff8, 0xfff5, 0xfff5, 0xfff5, 0xfff5}, + {0xf8fb, 0xe2f7, 0xf0f2, 0xdaef, 0xe8ea, 0xfc13, 0xe0e3, 0xec03}, + {0xe3e9, 0xf4fd, 0xfe05, 0xff0d, 0xf803, 0xfb15, 0xff31, 0xfffd}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0d81'0d81, 0xffff'ffff, 0x0d80'cccd, 0xffff'ffff}, + {0x000b'fb79, 0x000b'fb79, 0x0000'0000, 0x000c'3b63}, + {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111}, + {0xfff4'fff4, 0xfff4'fff4, 0xfff6'fff6, 0xfff6'fff6}, + {0xfff4'fff4, 0xfff8'fff8, 0xfff5'fff5, 0xfff5'fff5}, + {0xe2f6'f8fb, 0xdaee'f0f2, 0xfc12'1619, 0xec02'060b}, + {0xf4fb'0109, 0xff09'131c, 0xfb0d'1f30, 0xffaa'5551}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xffff'ffff'40b4'40b4, 0xffff'ffff'40b4'0000}, + {0x000c'000c'000b'fb79, 0x000c'3ff5'bff5'bb63}, + {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, + {0xfff4'fff4'fff4'fff4, 0xfff6'fff6'fff6'fff6}, + {0xfff8'fff8'fff4'fff4, 0xfff5'fff5'fff5'fff5}, + {0xdaee'f0f1'e2f6'f8fb, 0xec02'0609'fc12'1619}, + {0xff09'1318'2731'3b49, 0xffaa'54ff'aa54'ffa4}}, + kVectorComparisonSource); + + TestVectorInstruction( + 0x8d00e457, // vrem.vx v8, v16, x1, v0.t + {{5, 240, 5, 240, 5, 240, 5, 240, 64, 64, 64, 64, 255, 255, 255, 255}, + {180, 64, 180, 64, 255, 255, 255, 255, 0, 0, 180, 64, 255, 255, 255, 255}, + {22, 64, 22, 64, 22, 64, 22, 64, 0, 0, 0, 0, 0, 214, 22, 64}, + {0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17}, + {244, 255, 244, 255, 244, 255, 244, 255, 246, 255, 246, 255, 246, 255, 246, 255}, + {244, 255, 244, 255, 248, 255, 248, 255, 245, 255, 245, 255, 245, 255, 245, 255}, + {187, 187, 187, 255, 187, 187, 187, 255, 187, 187, 187, 255, 187, 187, 187, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, + {{0xf005, 0xf005, 0xf005, 0xf005, 0x4040, 0x4040, 0xffff, 0xffff}, + {0x40b4, 0x40b4, 0xffff, 0xffff, 0x0000, 0x40b4, 0xffff, 0xffff}, + {0x4016, 0x4016, 0x4016, 0x4016, 0x0000, 0x0000, 0xd556, 0x4016}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x1111, 0x1111, 0x1111, 0x1111}, + {0xfff4, 0xfff4, 0xfff4, 0xfff4, 0xfff6, 0xfff6, 0xfff6, 0xfff6}, + {0xfff4, 0xfff4, 0xfff8, 0xfff8, 0xfff5, 0xfff5, 0xfff5, 0xfff5}, + {0xbbbb, 0xff11, 0xbbbb, 0xff11, 0xbbbb, 0xff11, 0xbbbb, 0xff11}, + {0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff, 0xfeff}}, + {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0xffff'ffff}, + {0x40b4'40b4, 0xffff'ffff, 0x40b4'0000, 0xffff'ffff}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, + {0x0000'0000, 0x0000'0000, 0x1111'1111, 0x1111'1111}, + {0xfff4'fff4, 0xfff4'fff4, 0xfff6'fff6, 0xfff6'fff6}, + {0xfff4'fff4, 0xfff8'fff8, 0xfff5'fff5, 0xfff5'fff5}, + {0xff11'1111, 0xff11'1111, 0xff11'1111, 0xff11'1111}, + {0xfefe'feff, 0xfefe'feff, 0xfefe'feff, 0xfefe'feff}}, + {{0xf005'f005'f005'f005, 0xffff'ffff'4040'4040}, + {0xffff'ffff'40b4'40b4, 0xffff'ffff'40b4'0000}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, + {0x0000'0000'0000'0000, 0x1111'1111'1111'1111}, + {0xfff4'fff4'fff4'fff4, 0xfff6'fff6'fff6'fff6}, + {0xfff8'fff8'fff4'fff4, 0xfff5'fff5'fff5'fff5}, + {0xff11'1110'ff11'1111, 0xff11'1110'ff11'1111}, + {0xfefe'fefe'fefe'feff, 0xfefe'fefe'fefe'feff}}, + kVectorComparisonSource); +} + TEST_F(Riscv64InterpreterTest, TestVslideup) { // With slide offset equal zero, this is equivalent to Vmv. TestVectorInstruction( diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index cd717917..df061cc2 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -1034,6 +1034,12 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV( DEFINE_2OP_ARITHMETIC_INTRINSIC_VX( div, ElementType{std::get<0>(Div(static_cast<typename ElementType::BaseType>(args)...))}) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV( + rem, + ElementType{std::get<0>(Rem(static_cast<typename ElementType::BaseType>(args)...))}) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX( + rem, + ElementType{std::get<0>(Rem(static_cast<typename ElementType::BaseType>(args)...))}) DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV(add, (args + ...)) DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VX(add, (args + ...)) |