diff options
author | Ahmed Mohamed Mohamed <ahmed200615200@gmail.com> | 2024-03-03 13:31:33 +0000 |
---|---|---|
committer | Victor Khimenko <khim@google.com> | 2024-03-04 14:53:31 +0000 |
commit | ae7726ba95d2daa79febdabd45115c27e9b6f47e (patch) | |
tree | 0e18775beddca79b0ef3b1eb9afed0ba241da033 | |
parent | e22f587ea25b76f2290c34bd770a0c00c8f03f3d (diff) | |
download | binary_translation-ae7726ba95d2daa79febdabd45115c27e9b6f47e.tar.gz |
Implement narrowing floating point conversions.
Note: vfncvt.rod.f.f.w instruction is not implemented. It's rarely used
(we haven't observed it in the wild yet) and, more importantly, this is
rounding which is not support by x86-64 architecture and out intrinsics
don't support it either.
Test: berberis_all
Change-Id: Ia9caa1fbb33db22a71d9f103b212da5892efee6f
-rw-r--r-- | interpreter/riscv64/interpreter.h | 110 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 91 |
2 files changed, 195 insertions, 6 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 66ab030c..28245f54 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -1243,6 +1243,38 @@ class Interpreter { vlmul, vta, vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtxufw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::DYN, frm, src); + }, + UnsignedType, + vlmul, + vta, + vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtxfw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::DYN, frm, src); + }, + SignedType, + vlmul, + vta, + vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtrtzxufw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::RTZ, frm, src); + }, + UnsignedType, + vlmul, + vta, + vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtrtzxfw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::RTZ, frm, src); + }, + SignedType, + vlmul, + vta, + vma>(args.dst, args.src1); default: break; // Make compiler happy. } @@ -1306,6 +1338,30 @@ class Interpreter { vlmul, vta, vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtfxuw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<ElementType, WideUnsignedType>(FPFlags::DYN, frm, src); + }, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtffw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<ElementType, WideElementType>(FPFlags::DYN, frm, src); + }, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1); + case Decoder::VFUnary0Opcode::kVfncvtfxw: + return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) { + return intrinsics::Vfcvtv<ElementType, WideSignedType>(FPFlags::DYN, frm, src); + }, + ElementType, + vlmul, + vta, + vma>(args.dst, args.src1); default: break; // Make compiler happy. } @@ -2712,6 +2768,60 @@ class Interpreter { } } + template <auto Intrinsic, + typename TargetElementType, + VectorRegisterGroupMultiplier vlmul, + TailProcessing vta, + auto vma> + void OpVectorNarrowwr(uint8_t dst, uint8_t src) { + return OpVectorNarrowwr<Intrinsic, + TargetElementType, + NumberOfRegistersInvolved(vlmul), + NumRegistersInvolvedForWideOperand(vlmul), + vta, + vma>(dst, src); + } + + template <auto Intrinsic, + typename TargetElementType, + size_t kDestRegistersInvolved, + size_t kSrcRegistersInvolved, + TailProcessing vta, + auto vma> + void OpVectorNarrowwr(uint8_t dst, uint8_t src) { + if constexpr (kDestRegistersInvolved == kSrcRegistersInvolved) { + if (!IsAligned<kDestRegistersInvolved>(dst | src)) { + return Unimplemented(); + } + } else if (!IsAligned<kDestRegistersInvolved>(dst) || !IsAligned<kSrcRegistersInvolved>(src)) { + return Unimplemented(); + } + size_t vstart = GetCsr<CsrName::kVstart>(); + size_t vl = GetCsr<CsrName::kVl>(); + SetCsr<CsrName::kVstart>(0); + // When vstart >= vl, there are no body elements, and no elements are updated in any destination + // vector register group, including that no tail elements are updated with agnostic values. + if (vstart >= vl) [[unlikely]] { + return; + } + int8_t frm = GetCsr<CsrName::kFrm>(); + auto mask = GetMaskForVectorOperations<vma>(); + for (size_t index = 0; index < kDestRegistersInvolved; index++) { + SIMD128Register orig_result(state_->cpu.v[dst + index]); + SIMD128Register arg_low(state_->cpu.v[src + 2 * index]); + SIMD128Register intrinsic_result = std::get<0>(Intrinsic(frm, arg_low)); + if constexpr (kSrcRegistersInvolved > 1) { + SIMD128Register arg_high(state_->cpu.v[src + 2 * index + 1]); + SIMD128Register result_high = std::get<0>(Intrinsic(frm, arg_high)); + intrinsic_result = std::get<0>( + intrinsics::VMergeBottomHalfToTop<TargetElementType>(intrinsic_result, result_high)); + } + auto result = VectorMasking<TargetElementType, vta, vma>( + orig_result, intrinsic_result, vstart, vl, index, mask); + state_->cpu.v[dst + index] = result.template Get<__uint128_t>(); + } + } + // SEW = 2*SEW op SEW template <auto Intrinsic, typename ElementType, diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index ec753c17..ce991806 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -977,10 +977,25 @@ class Riscv64InterpreterTest : public ::testing::Test { expected_result_int64); } + void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes, + const uint32_t (&expected_result_int32)[4][4], + const __v2du (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( + insn_bytes, source, expected_result_int32); + } + + void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes, + const uint16_t (&expected_result_int16)[4][8], + const uint32_t (&expected_result_int32)[4][4], + const __v2du (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( + insn_bytes, source, expected_result_int16, expected_result_int32); + } + void TestNarrowingVectorInstruction(uint32_t insn_bytes, - const uint8_t (&expected_result_int8)[8][16], - const uint16_t (&expected_result_int16)[8][8], - const uint32_t (&expected_result_int32)[8][4], + const uint8_t (&expected_result_int8)[4][16], + const uint16_t (&expected_result_int16)[4][8], + const uint32_t (&expected_result_int32)[4][4], const __v2du (&source)[16]) { TestVectorInstruction<TestVectorInstructionKind::kInteger, TestVectorInstructionMode::kNarrowing>( @@ -1018,10 +1033,12 @@ class Riscv64InterpreterTest : public ::testing::Test { template <TestVectorInstructionKind kTestVectorInstructionKind, TestVectorInstructionMode kTestVectorInstructionMode, typename... ElementType, + size_t... kResultsCount, size_t... kElementCount> - void TestVectorInstruction(uint32_t insn_bytes, - const __v2du (&source)[16], - const ElementType (&... expected_result)[8][kElementCount]) { + void TestVectorInstruction( + uint32_t insn_bytes, + const __v2du (&source)[16], + const ElementType (&... expected_result)[kResultsCount][kElementCount]) { auto Verify = [this, &source](uint32_t insn_bytes, uint8_t vsew, uint8_t vlmul_max, @@ -2115,6 +2132,68 @@ TEST_F(Riscv64InterpreterTest, TestVfcvtxfv) { {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}}, kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + 0x49881457, // Vfncvt.xu.f.w v8, v24, v0.t + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + 0x49889457, // Vfncvt.x.f.w v8, v24, v0.t + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x8000, 0x8000, 0xcacf, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(0x498a1457, // Vfncvt.f.f.w v8, v24, v0.t + {{0x8000'0000, 0x8000'0000, 0xb165'd14e, 0x8000'0000}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}, + {0x0000'0000, 0x0000'0000, 0x3561'd54a, 0x0000'0000}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(0x49891457, // Vfncvt.f.xu.w v8, v24, v0.t + {{0x5f1e'0c9a, 0x5f0e'1c8a, 0x5f3e'2cba, 0x5f2e'3caa}, + {0x5f5e'4cda, 0x5f4e'5cca, 0x5f7e'6cfa, 0x5f6e'7cea}, + {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, + {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(0x49899457, // Vfncvt.f.x.w v8, v24, v0.t + {{0xdec3'e6cc, 0xdee3'c6ec, 0xde83'a68c, 0xdea3'86ac}, + {0xde06'cc97, 0xde46'8cd7, 0xdbc9'82cb, 0xdd8c'18ac}, + {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, + {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + 0x498b1457, // Vfncvt.rtz.xu.f.w v8, v24, v0.t + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + 0x498b9457, // Vfncvt.rtz.x.f.w v8, v24, v0.t + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x8000, 0x8000, 0xcad0, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + kVectorCalculationsSource); } TEST_F(Riscv64InterpreterTest, TestVfmvfs) { |