diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-14 00:17:45 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-14 00:17:45 +0000 |
commit | 2e3405345ffa06e045abd38aede28b36ad572647 (patch) | |
tree | 0c88d001dfa6e9ece772f5ac2c75244947524655 | |
parent | 3d8930de5ce5d7c973977788ba593d236fa74145 (diff) | |
parent | b582b0dc3ca4ee8daa1ffa92732c169605dd6a71 (diff) | |
download | binary_translation-2e3405345ffa06e045abd38aede28b36ad572647.tar.gz |
Snap for 11216811 from b582b0dc3ca4ee8daa1ffa92732c169605dd6a71 to 24Q1-release
Change-Id: I84f8c2644ff420b942255d6fecbc3964d02a95d9
-rw-r--r-- | berberis_config.mk | 2 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 46 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/semantics_player.h | 13 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 56 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 68 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 18 |
6 files changed, 196 insertions, 7 deletions
diff --git a/berberis_config.mk b/berberis_config.mk index 58f2558c..dd4a178c 100644 --- a/berberis_config.mk +++ b/berberis_config.mk @@ -37,9 +37,9 @@ BERBERIS_PRODUCT_PACKAGES_RISCV64_TO_X86_64 := \ libberberis_proxy_libbinder_ndk \ libberberis_proxy_libc \ libberberis_proxy_libcamera2ndk \ + libberberis_proxy_libjnigraphics \ libberberis_proxy_libmediandk \ libberberis_proxy_libnativehelper \ - libberberis_proxy_libjnigraphics \ libberberis_proxy_libnativewindow \ libberberis_proxy_libneuralnetworks \ libberberis_proxy_libwebviewchromium_plat_support \ diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index f4c6021f..7ad556c1 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -340,6 +340,11 @@ class Decoder { kMaxValue = 0b111111 }; + enum class VOpMVvOpcode : uint8_t { + kVmaccvv = 0b101101, + kMaxValue = 0b111111 + }; + enum class VOpIVxOpcode : uint8_t { kVaddvx = 0b000000, kVsubvx = 0b000010, @@ -384,6 +389,11 @@ class Decoder { kMaxValue = 0b111111 }; + enum class VOpMVxOpcode : uint8_t { + kVmaccvx = 0b101101, + kMaxValue = 0b111111 + }; + // Load/Store instruction include 3bit “width” field while all other floating-point instructions // include 2bit “fmt” field. // @@ -636,6 +646,14 @@ class Decoder { uint8_t src2; }; + struct VOpMVvArgs { + VOpMVvOpcode opcode; + bool vm; + uint8_t dst; + uint8_t src1; + uint8_t src2; + }; + struct VOpIVxArgs { VOpIVxOpcode opcode; bool vm; @@ -644,6 +662,14 @@ class Decoder { uint8_t src2; }; + struct VOpMVxArgs { + VOpMVxOpcode opcode; + bool vm; + uint8_t dst; + uint8_t src1; + uint8_t src2; + }; + struct VsetivliArgs { uint8_t dst; uint8_t avl; @@ -1721,6 +1747,16 @@ class Decoder { }; return insn_consumer_->OpVector(args); } + case 0b010: { + const VOpMVvArgs args = { + .opcode = VOpMVvOpcode(opcode), + .vm = vm, + .dst = dst, + .src1 = src1, + .src2 = src2, + }; + return insn_consumer_->OpVector(args); + } case 0b011: { const VOpIViArgs args = { .opcode = VOpIViOpcode(opcode), @@ -1741,6 +1777,16 @@ class Decoder { }; return insn_consumer_->OpVector(args); } + case 0b110: { + const VOpMVxArgs args = { + .opcode = VOpMVxOpcode(opcode), + .vm = vm, + .dst = dst, + .src1 = src1, + .src2 = src2, + }; + return insn_consumer_->OpVector(args); + } case 0b111: if (GetBits<uint8_t, 31, 1>() == 0) { const VsetvliArgs args = { diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index cfa707c7..faddb48c 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -801,6 +801,12 @@ class SemanticsPlayer { listener_->OpVector(args); } + void OpVector(const typename Decoder::VOpMVvArgs& args) { + // TODO(300690740): develop and implement strategy which would allow us to support vector + // intrinsics not just in the interpreter. + listener_->OpVector(args); + } + void OpVector(const typename Decoder::VOpIVxArgs& args) { // TODO(300690740): develop and implement strategy which would allow us to support vector // intrinsics not just in the interpreter. @@ -808,6 +814,13 @@ class SemanticsPlayer { listener_->OpVector(args, arg2); } + void OpVector(const typename Decoder::VOpMVxArgs& args) { + // TODO(300690740): develop and implement strategy which would allow us to support vector + // intrinsics not just in the interpreter. + Register arg2 = GetRegOrZero(args.src2); + listener_->OpVector(args, arg2); + } + void Vsetivli(const typename Decoder::VsetivliArgs& args) { // Note: it's unclear whether args.avl should be treated similarly to x0 in Vsetvli or not. // Keep implementation separate from Vsetvli to make it easier to adjust that code. diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index 6785b4fa..b48d9349 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -652,6 +652,17 @@ class Interpreter { } template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> + void OpVector(const Decoder::VOpMVvArgs& args) { + switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaccvv: + return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, args.src2); + default: + Unimplemented(); + } + } + + template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> void OpVector(const Decoder::VOpIVxArgs& args, Register arg2) { switch (args.opcode) { case Decoder::VOpIVxOpcode::kVaddvx: @@ -716,6 +727,17 @@ class Interpreter { } } + template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta> + void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { + switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaccvx: + return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>( + args.dst, args.src1, arg2); + default: + Unimplemented(); + } + } + template <auto Intrinsic, typename ElementType, VectorRegisterGroupMultiplier vlmul, @@ -902,6 +924,23 @@ class Interpreter { VectorRegisterGroupMultiplier vlmul, TailProcessing vta, InactiveProcessing vma> + void OpVector(const Decoder::VOpMVvArgs& args) { + switch (args.opcode) { + case Decoder::VOpMVvOpcode::kVmaccvv: + return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, args.src2); + default: + Unimplemented(); + } + } + + template <typename ElementType, + VectorRegisterGroupMultiplier vlmul, + TailProcessing vta, + InactiveProcessing vma> void OpVector(const Decoder::VOpIVxArgs& args, Register arg2) { switch (args.opcode) { case Decoder::VOpIVxOpcode::kVaddvx: @@ -978,6 +1017,23 @@ class Interpreter { } } + template <typename ElementType, + VectorRegisterGroupMultiplier vlmul, + TailProcessing vta, + InactiveProcessing vma> + void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) { + switch (args.opcode) { + case Decoder::VOpMVxOpcode::kVmaccvx: + return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1, arg2); + default: + Unimplemented(); + } + } + template <auto Intrinsic, typename ElementType, VectorRegisterGroupMultiplier vlmul, diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 49ed2776..724c4892 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -1785,6 +1785,74 @@ TEST_F(Riscv64InterpreterTest, TestVsll) { {0xb3ab'a39b'938b'8000, 0xf3eb'e3db'd3cb'c000}}); } +TEST_F(Riscv64InterpreterTest, TestVmacc) { + TestVectorInstruction(0xb5882457, // vmacc.vv v8, v16, v24, v0.t + {{85, 87, 93, 103, 121, 135, 157, 183, 221, 247, 29, 71, 117, 167, 221, 23}, + {85, 151, 221, 39, 137, 199, 29, 119, 237, 55, 157, 7, 117, 231, 93, 215}, + {85, 215, 93, 231, 153, 7, 157, 55, 253, 119, 29, 199, 117, 39, 221, 151}, + {85, 23, 221, 167, 169, 71, 29, 247, 13, 183, 157, 135, 117, 103, 93, 87}, + {85, 87, 93, 103, 185, 135, 157, 183, 29, 247, 29, 71, 117, 167, 221, 23}, + {85, 151, 221, 39, 201, 199, 29, 119, 45, 55, 157, 7, 117, 231, 93, 215}, + {85, 215, 93, 231, 217, 7, 157, 55, 61, 119, 29, 199, 117, 39, 221, 151}, + {85, 23, 221, 167, 233, 71, 29, 247, 77, 183, 157, 135, 117, 103, 93, 87}}, + {{0x5555, 0x6d5d, 0xaa79, 0xfd9d, 0x7edd, 0x0e1d, 0xc675, 0x9edd}, + {0x9755, 0xafdd, 0xfd89, 0x411d, 0xd2ed, 0x529d, 0x0b75, 0xe45d}, + {0xdd55, 0xf65d, 0x5499, 0x889d, 0x2afd, 0x9b1d, 0x5475, 0x2ddd}, + {0x2755, 0x40dd, 0xafa9, 0xd41d, 0x870d, 0xe79d, 0xa175, 0x7b5d}, + {0x7555, 0x8f5d, 0x0eb9, 0x239d, 0xe71d, 0x381d, 0xf275, 0xccdd}, + {0xc755, 0xe1dd, 0x71c9, 0x771d, 0x4b2d, 0x8c9d, 0x4775, 0x225d}, + {0x1d55, 0x385d, 0xd8d9, 0xce9d, 0xb33d, 0xe51d, 0xa075, 0x7bdd}, + {0x7755, 0x92dd, 0x43e9, 0x2a1d, 0x1f4d, 0x419d, 0xfd75, 0xd95d}}, + {{0x5d57'5555, 0x44ed'aa79, 0x2a42'7edd, 0x0149'c675}, + {0xe41b'9755, 0xdec3'fd89, 0xc71a'd2ed, 0x9114'0b75}, + {0x76e7'dd55, 0x84a2'5499, 0x6ffb'2afd, 0x2ce6'5475}, + {0x15bc'2755, 0x3688'afa9, 0x24e3'870d, 0xd4c0'a175}, + {0xc098'7555, 0xf477'0eb9, 0xe5d3'e71d, 0x88a2'f275}, + {0x777c'c755, 0xbe6d'71c9, 0xb2cc'4b2d, 0x488d'4775}, + {0x3a69'1d55, 0x946b'd8d9, 0x8bcc'b33d, 0x147f'a075}, + {0x095d'7755, 0x7672'43e9, 0x70d5'1f4d, 0xec79'fd75}}, + {{0xc89d'7e69'5d57'5555, 0x5ace'6e38'2a42'7edd}, + {0xebfd'5b02'e41b'9755, 0x8c3a'54d9'c71a'd2ed}, + {0x2b75'4bac'76e7'dd55, 0xd9be'4f8b'6ffb'2afd}, + {0x8705'5066'15bc'2755, 0x435a'5e4d'24e3'870d}, + {0xfead'692f'c098'7555, 0xc90e'811e'e5d3'e71d}, + {0x926d'9609'777c'c755, 0x6ada'b800'b2cc'4b2d}, + {0x4245'd6f3'3a69'1d55, 0x28bf'02f2'8bcc'b33d}, + {0x0e36'2bed'095d'7755, 0x02bb'61f4'70d5'1f4d}}); + TestVectorInstruction(0xb500e457, // vmacc.vx v8, x1, v16, v0.t + {{85, 255, 169, 83, 253, 167, 81, 251, 165, 79, 249, 163, 77, 247, 161, 75}, + {245, 159, 73, 243, 157, 71, 241, 155, 69, 239, 153, 67, 237, 151, 65, 235}, + {149, 63, 233, 147, 61, 231, 145, 59, 229, 143, 57, 227, 141, 55, 225, 139}, + {53, 223, 137, 51, 221, 135, 49, 219, 133, 47, 217, 131, 45, 215, 129, 43}, + {213, 127, 41, 211, 125, 39, 209, 123, 37, 207, 121, 35, 205, 119, 33, 203}, + {117, 31, 201, 115, 29, 199, 113, 27, 197, 111, 25, 195, 109, 23, 193, 107}, + {21, 191, 105, 19, 189, 103, 17, 187, 101, 15, 185, 99, 13, 183, 97, 11}, + {181, 95, 9, 179, 93, 7, 177, 91, 5, 175, 89, 3, 173, 87, 1, 171}}, + {{0xff55, 0xa8a9, 0x51fd, 0xfb51, 0xa4a5, 0x4df9, 0xf74d, 0xa0a1}, + {0x49f5, 0xf349, 0x9c9d, 0x45f1, 0xef45, 0x9899, 0x41ed, 0xeb41}, + {0x9495, 0x3de9, 0xe73d, 0x9091, 0x39e5, 0xe339, 0x8c8d, 0x35e1}, + {0xdf35, 0x8889, 0x31dd, 0xdb31, 0x8485, 0x2dd9, 0xd72d, 0x8081}, + {0x29d5, 0xd329, 0x7c7d, 0x25d1, 0xcf25, 0x7879, 0x21cd, 0xcb21}, + {0x7475, 0x1dc9, 0xc71d, 0x7071, 0x19c5, 0xc319, 0x6c6d, 0x15c1}, + {0xbf15, 0x6869, 0x11bd, 0xbb11, 0x6465, 0x0db9, 0xb70d, 0x6061}, + {0x09b5, 0xb309, 0x5c5d, 0x05b1, 0xaf05, 0x5859, 0x01ad, 0xab01}}, + {{0x5353'ff55, 0xfb51'51fd, 0xa34e'a4a5, 0x4b4b'f74d}, + {0xf349'49f5, 0x9b46'9c9d, 0x4343'ef45, 0xeb41'41ed}, + {0x933e'9495, 0x3b3b'e73d, 0xe339'39e5, 0x8b36'8c8d}, + {0x3333'df35, 0xdb31'31dd, 0x832e'8485, 0x2b2b'd72d}, + {0xd329'29d5, 0x7b26'7c7d, 0x2323'cf25, 0xcb21'21cd}, + {0x731e'7475, 0x1b1b'c71d, 0xc319'19c5, 0x6b16'6c6d}, + {0x1313'bf15, 0xbb11'11bd, 0x630e'6465, 0x0b0b'b70d}, + {0xb309'09b5, 0x5b06'5c5d, 0x0303'af05, 0xab01'01ad}}, + {{0xfb51'51fd'5353'ff55, 0xa0a1'4ca2'a34e'a4a5}, + {0x45f1'4747'f349'49f5, 0xeb41'41ed'4343'ef45}, + {0x9091'3c92'933e'9495, 0x35e1'3737'e339'39e5}, + {0xdb31'31dd'3333'df35, 0x8081'2c82'832e'8485}, + {0x25d1'2727'd329'29d5, 0xcb21'21cd'2323'cf25}, + {0x7071'1c72'731e'7475, 0x15c1'1717'c319'19c5}, + {0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465}, + {0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}}); +} } // namespace } // namespace berberis diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index 48c32d1a..bb3b19d9 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -90,12 +90,14 @@ inline std::tuple<SIMD128Register> VectorArithmetic(Lambda lambda, } if (vstart == 0 && vl == static_cast<int>(16 / sizeof(ElementType))) { for (int index = vstart; index < vl; ++index) { - result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index); + result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index), + VectorElement<ElementType>(source, index)...), index); } } else { #pragma clang loop unroll(disable) for (int index = vstart; index < vl; ++index) { - result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index); + result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index), + VectorElement<ElementType>(source, index)...), index); } if constexpr (vta == TailProcessing::kAgnostic) { if (vl < static_cast<int>(16 / sizeof(ElementType))) { @@ -140,7 +142,8 @@ inline std::tuple<SIMD128Register> VectorArithmetic(Lambda lambda, #pragma clang loop unroll(disable) for (int index = vstart; index < vl; ++index) { if (mask & (1 << index)) { - result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index); + result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index), + VectorElement<ElementType>(source, index)...), index); } else if constexpr (vma == InactiveProcessing::kAgnostic) { result.Set<ElementType>(fill_value, index); } @@ -183,7 +186,7 @@ inline ElementType mask_bits(ElementType val) { SIMD128Register result, \ DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS parameters) { \ return VectorArithmetic<ElementType, vta>( \ - [](auto... args) { \ + []([[maybe_unused]] auto vd, auto... args) { \ static_assert((std::is_same_v<decltype(args), ElementType> && ...)); \ arithmetic; \ }, \ @@ -204,7 +207,7 @@ inline ElementType mask_bits(ElementType val) { SIMD128Register result, \ DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS parameters) { \ return VectorArithmetic<ElementType, vta, vma>( \ - [](auto... args) { \ + []([[maybe_unused]] auto vd, auto... args) { \ static_assert((std::is_same_v<decltype(args), ElementType> && ...)); \ arithmetic; \ }, \ @@ -246,7 +249,10 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...}; (arg1 << mask_bits(arg2))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...}; (arg1 << mask_bits(arg2))) - +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...}; + ((arg1 * arg2) + vd)); +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...}; + ((arg1 * arg2) + vd)); #undef DEFINE_ARITHMETIC_INTRINSIC #undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS |