diff options
author | Victor Khimenko <khim@google.com> | 2024-03-04 17:15:06 +0000 |
---|---|---|
committer | Victor Khimenko <khim@google.com> | 2024-03-04 19:11:36 +0000 |
commit | c44a48ec868518cc6947476cc69cb368e1b9b594 (patch) | |
tree | 3bce53cd9c8a5d969d4feecee79c597663702a70 | |
parent | 9a2a3ccce7931524c4dd0772caac3599ec2f2a51 (diff) | |
download | binary_translation-c44a48ec868518cc6947476cc69cb368e1b9b594.tar.gz |
Merge OpVectorv and OpVectorvr functions.
And make it possible to optionally specify other CSR registers (vxrm can
be handled that way, too, although vxsat would need special treatment
since it's not intrinsic input, but output, instead).
Test: m berberis_all
Change-Id: Ibacfa43037d2170519a331a4b4e6976718e4680f
-rw-r--r-- | interpreter/riscv64/interpreter.h | 124 |
1 files changed, 52 insertions, 72 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 28245f54..6680fc0e 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -1385,53 +1385,59 @@ class Interpreter { case Decoder::VOpFVvOpcode::kVFUnary0: switch (args.vfunary0_opcode) { case Decoder::VFUnary0Opcode::kVfcvtxufv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<UnsignedType, ElementType>(FPFlags::DYN, frm, src); }, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1); + ElementType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); case Decoder::VFUnary0Opcode::kVfcvtxfv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<SignedType, ElementType>(FPFlags::DYN, frm, src); }, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1); + ElementType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); case Decoder::VFUnary0Opcode::kVfcvtfxuv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<ElementType, UnsignedType>(FPFlags::DYN, frm, src); }, - UnsignedType, - vlmul, - vta, - vma>(args.dst, args.src1); + UnsignedType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); case Decoder::VFUnary0Opcode::kVfcvtfxv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<ElementType, SignedType>(FPFlags::DYN, frm, src); }, - SignedType, - vlmul, - vta, - vma>(args.dst, args.src1); + SignedType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); case Decoder::VFUnary0Opcode::kVfcvtrtzxufv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<UnsignedType, ElementType>(FPFlags::RTZ, frm, src); }, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1); + ElementType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); case Decoder::VFUnary0Opcode::kVfcvtrtzxfv: - return OpVectorvr<[](int8_t frm, SIMD128Register src) { + return OpVectorv<[](int8_t frm, SIMD128Register src) { return intrinsics::Vfcvtv<SignedType, ElementType>(FPFlags::RTZ, frm, src); }, - ElementType, - vlmul, - vta, - vma>(args.dst, args.src1); + ElementType, + vlmul, + vta, + vma, + CsrName::kFrm>(args.dst, args.src1); default: break; // Make compiler happy. } @@ -2412,10 +2418,15 @@ class Interpreter { VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma, + CsrName... kExtraCsrs, typename... DstMaskType> void OpVectorv(uint8_t dst, uint8_t src1, DstMaskType... dst_mask) { - return OpVectorv<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>( - dst, src1, dst_mask...); + return OpVectorv<Intrinsic, + ElementType, + NumberOfRegistersInvolved(vlmul), + vta, + vma, + kExtraCsrs...>(dst, src1, dst_mask...); } template <auto Intrinsic, @@ -2423,6 +2434,7 @@ class Interpreter { size_t kRegistersInvolved, TailProcessing vta, auto vma, + CsrName... kExtraCsrs, typename... DstMaskType> void OpVectorv(uint8_t dst, uint8_t src, DstMaskType... dst_mask) { static_assert(sizeof...(dst_mask) <= 1); @@ -2448,46 +2460,14 @@ class Interpreter { result_mask.Set(state_->cpu.v[dst_mask_unpacked[0] + index]); } SIMD128Register arg{state_->cpu.v[src + index]}; - result = VectorMasking<ElementType, vta, vma>( - result, std::get<0>(Intrinsic(arg)), result_mask, vstart, vl, index, mask); - state_->cpu.v[dst + index] = result.Get<__uint128_t>(); - } - } - - template <auto Intrinsic, - typename ElementType, - VectorRegisterGroupMultiplier vlmul, - TailProcessing vta, - auto vma> - void OpVectorvr(uint8_t dst, uint8_t src1) { - return OpVectorvr<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(dst, - src1); - } - - template <auto Intrinsic, - typename ElementType, - size_t kRegistersInvolved, - TailProcessing vta, - auto vma> - void OpVectorvr(uint8_t dst, uint8_t src) { - if (!IsAligned<kRegistersInvolved>(dst | src)) { - return Unimplemented(); - } - size_t vstart = GetCsr<CsrName::kVstart>(); - size_t vl = GetCsr<CsrName::kVl>(); - SetCsr<CsrName::kVstart>(0); - // When vstart >= vl, there are no body elements, and no elements are updated in any destination - // vector register group, including that no tail elements are updated with agnostic values. - if (vstart >= vl) [[unlikely]] { - return; - } - int8_t frm = GetCsr<CsrName::kFrm>(); - auto mask = GetMaskForVectorOperations<vma>(); - for (size_t index = 0; index < kRegistersInvolved; ++index) { - SIMD128Register result{state_->cpu.v[dst + index]}; - SIMD128Register arg{state_->cpu.v[src + index]}; - result = VectorMasking<ElementType, vta, vma>( - result, std::get<0>(Intrinsic(frm, arg)), vstart, vl, index, mask); + result = + VectorMasking<ElementType, vta, vma>(result, + std::get<0>(Intrinsic(GetCsr<kExtraCsrs>()..., arg)), + result_mask, + vstart, + vl, + index, + mask); state_->cpu.v[dst + index] = result.Get<__uint128_t>(); } } |