diff options
author | Victor Khimenko <khim@google.com> | 2024-02-27 09:50:34 +0000 |
---|---|---|
committer | Victor Khimenko <khim@google.com> | 2024-02-27 19:41:05 +0000 |
commit | 2088e39f3872e6800bb8686819c64d09785722db (patch) | |
tree | 012b76dfa1c26859928382d55135fd2487dbeaaf | |
parent | 73cd47fce0dad10f0d6b15b15def28f8222bc82a (diff) | |
download | binary_translation-2088e39f3872e6800bb8686819c64d09785722db.tar.gz |
Add vfmv.s.f and vfmv.f.s instructions.
Test: berberis_all
Change-Id: I80d1ab78c6d60090ed75e024f8b0b2ef36bdf4ff
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 2 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 40 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 60 |
3 files changed, 90 insertions, 12 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index 39c1ad55..c9cabf68 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -296,7 +296,7 @@ class Decoder { kVfsgnjxvv = 0b001010, kVfmvfs = 0b010000, kVfcvtXX = 0b010010, - kVXXXv = 010011, // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v + kVXXXv = 0b010011, // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v kVmfeqvv = 0b011000, kVmflevv = 0b011001, kVmfltvv = 0b011011, diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index 8d86c11c..ca0eb5d4 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -1181,6 +1181,14 @@ class Interpreter { template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma> void OpVector(const Decoder::VOpFVfArgs& args, ElementType arg2) { switch (args.opcode) { + case Decoder::VOpFVfOpcode::kVfmvsf: + if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { + return Unimplemented(); + } + if (args.src1 != 0) { + return Unimplemented(); + } + return OpVectorVmvsx<ElementType, vta>(args.dst, arg2); case Decoder::VOpFVfOpcode::kVfmergevf: if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { return OpVectorvx<intrinsics::Vmergevx<ElementType>, ElementType, vlmul, vta, vma>( @@ -1211,6 +1219,14 @@ class Interpreter { } } else { switch (args.opcode) { + case Decoder::VOpFVvOpcode::kVfmvfs: + if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { + return Unimplemented(); + } + if (args.src2 != 0) { + return Unimplemented(); + } + return OpVectorVmvfs<ElementType>(args.dst, args.src1); default: return Unimplemented(); } @@ -1279,7 +1295,7 @@ class Interpreter { } case Decoder::VOpIViOpcode::kVmvvi: if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { - return OpvectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm)); + return OpVectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm)); } else { return Unimplemented(); } @@ -1447,7 +1463,7 @@ class Interpreter { if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { return Unimplemented(); } - return OpvectorVmvxs<SignedType>(args.dst, args.src1); + return OpVectorVmvxs<SignedType>(args.dst, args.src1); case Decoder::VXmXXsOpcode::kVcpopm: return OpVectorVXmXXs<intrinsics::Vcpopm<Int128>, vma>(args.dst, args.src1); case Decoder::VXmXXsOpcode::kVfirstm: @@ -1687,7 +1703,7 @@ class Interpreter { if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { return Unimplemented(); } - return OpvectorVmvsx<SignedType, vta>(args.dst, args.src2); + return OpVectorVmvsx<SignedType, vta>(args.dst, MaybeTruncateTo<SignedType>(arg2)); default: return Unimplemented(); } @@ -1915,8 +1931,19 @@ class Interpreter { SetCsr<CsrName::kVstart>(0); } + template <typename ElementType> + void OpVectorVmvfs(uint8_t dst, uint8_t src) { + // Note: intrinsics::NanBox always received Float64 argument, even if it processes Float32 value + // to not cause recursion in interinsics handling. + // NanBox in the interpreter takes FpRegister and returns FpRegister which is probably the + // cleanest way of processing that data (at least on x86-64 this produces code that's close to + // optimal). + NanBoxAndSetFpReg<ElementType>(dst, SIMD128Register{state_->cpu.v[src]}.Get<FpRegister>(0)); + SetCsr<CsrName::kVstart>(0); + } + template <typename ElementType, TailProcessing vta> - void OpvectorVmvsx(uint8_t dst, uint8_t src1) { + void OpVectorVmvsx(uint8_t dst, ElementType element) { size_t vstart = GetCsr<CsrName::kVstart>(); size_t vl = GetCsr<CsrName::kVl>(); // Documentation doesn't specify what happenes when vstart is non-zero but less than vl. @@ -1924,7 +1951,6 @@ class Interpreter { // https://github.com/riscv/riscv-v-spec/issues/937 // We are doing the same here. if (vstart == 0 && vl != 0) [[likely]] { - ElementType element = MaybeTruncateTo<ElementType>(GetRegOrZero(src1)); SIMD128Register result; if constexpr (vta == intrinsics::TailProcessing::kAgnostic) { result = ~SIMD128Register{}; @@ -1938,7 +1964,7 @@ class Interpreter { } template <typename ElementType> - void OpvectorVmvxs(uint8_t dst, uint8_t src1) { + void OpVectorVmvxs(uint8_t dst, uint8_t src1) { static_assert(ElementType::kIsSigned); // Conversion to Int64 would perform sign-extension if source element is signed. Register element = Int64{SIMD128Register{state_->cpu.v[src1]}.Get<ElementType>(0)}; @@ -2030,7 +2056,7 @@ class Interpreter { } template <typename ElementType> - void OpvectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) { + void OpVectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) { if (!IsPowerOf2(nf + 1)) { return Unimplemented(); } diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 93250a8c..aa67cfbc 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -831,6 +831,44 @@ class Riscv64InterpreterTest : public ::testing::Test { } template <typename ElementType> + void TestVfmvfs(uint32_t insn_bytes, uint64_t expected_result) { + state_.cpu.vtype = BitUtilLog2(sizeof(ElementType)) << 3; + state_.cpu.vstart = 0; + state_.cpu.vl = 0; + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>(); + SetFReg<1>(state_.cpu, 0x5555'5555'5555'5555); + EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); + } + + template <typename ElementType> + void TestVfmvsf(uint32_t insn_bytes, uint64_t boxed_value, ElementType unboxed_value) { + for (uint8_t vstart = 0; vstart < 2; ++vstart) { + for (uint8_t vl = 0; vl < 2; ++vl) { + for (uint8_t vta = 0; vta < 2; ++vta) { + state_.cpu.vtype = (vta << 6) | (BitUtilLog2(sizeof(ElementType)) << 3); + state_.cpu.vstart = vstart; + state_.cpu.vl = vl; + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>(); + SetFReg<1>(state_.cpu, boxed_value); + EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + if (vstart == 0 && vl != 0) { + SIMD128Register expected_result = + vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]}; + expected_result.Set<ElementType>(unboxed_value, 0); + EXPECT_EQ(state_.cpu.v[8], expected_result); + } else { + EXPECT_EQ(state_.cpu.v[8], + SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>()); + } + } + } + } + } + + template <typename ElementType> void TestVmvsx(uint32_t insn_bytes) { for (uint8_t vstart = 0; vstart < 2; ++vstart) { for (uint8_t vl = 0; vl < 2; ++vl) { @@ -839,18 +877,18 @@ class Riscv64InterpreterTest : public ::testing::Test { state_.cpu.vstart = vstart; state_.cpu.vl = vl; state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); - state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>(); + state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>(); SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555); EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); if (vstart == 0 && vl != 0) { SIMD128Register expected_result = - vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSourceLegacy[0]}; + vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]}; expected_result.Set<ElementType>(MaybeTruncateTo<ElementType>(0x5555'5555'5555'5555), 0); EXPECT_EQ(state_.cpu.v[8], expected_result); } else { EXPECT_EQ(state_.cpu.v[8], - SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>()); + SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>()); } } } @@ -863,7 +901,7 @@ class Riscv64InterpreterTest : public ::testing::Test { state_.cpu.vstart = 0; state_.cpu.vl = 0; state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); - state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>(); + state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>(); SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555); EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); @@ -1857,6 +1895,20 @@ TEST_F(Riscv64InterpreterTest, TestVmXr) { TestVmvXr<8>(0x9f03b457); // Vmv8r.v v8, v16 } +TEST_F(Riscv64InterpreterTest, TestVfmvfs) { + TestVfmvfs<intrinsics::Float32>(0x428010d7, 0xffff'ffff'8302'8100); // Vfmv.f.s f1, v8 + TestVfmvfs<intrinsics::Float64>(0x428010d7, 0x8706'8504'8302'8100); // Vfmv.f.s f1, v8 +} + +TEST_F(Riscv64InterpreterTest, TestVfmvsf) { + TestVfmvsf<intrinsics::Float32>(0x4200d457, // Vfmv.s.f v8, f1 + 0xffff'ffff'40b4'0000, + intrinsics::Float32{5.625f}); + TestVfmvsf<intrinsics::Float64>(0x4200d457, // Vfmv.s.f v8, f1 + 0x4016'8000'0000'0000, + intrinsics::Float64{5.625}); +} + TEST_F(Riscv64InterpreterTest, TestVmvsx) { TestVmvsx<Int8>(0x4200e457); // Vmv.s.x v8, x1 TestVmvsx<Int16>(0x4200e457); // Vmv.s.x v8, x1 |