Add vfmv.s.f and vfmv.f.s instructions.

Test: berberis_all Change-Id: I80d1ab78c6d60090ed75e024f8b0b2ef36bdf4ff
author: Victor Khimenko <khim@google.com> 2024-02-27 09:50:34 +0000
committer: Victor Khimenko <khim@google.com> 2024-02-27 19:41:05 +0000
commit: 2088e39f3872e6800bb8686819c64d09785722db (patch)
tree: 012b76dfa1c26859928382d55135fd2487dbeaaf
parent: 73cd47fce0dad10f0d6b15b15def28f8222bc82a (diff)
download: binary_translation-2088e39f3872e6800bb8686819c64d09785722db.tar.gz
3 files changed, 90 insertions, 12 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 39c1ad55..c9cabf68 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -296,7 +296,7 @@ class Decoder {
     kVfsgnjxvv = 0b001010,
     kVfmvfs = 0b010000,
     kVfcvtXX = 0b010010,
-    kVXXXv = 010011,  // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v
+    kVXXXv = 0b010011,  // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v
     kVmfeqvv = 0b011000,
     kVmflevv = 0b011001,
     kVmfltvv = 0b011011,
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index 8d86c11c..ca0eb5d4 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -1181,6 +1181,14 @@ class Interpreter {
   template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
   void OpVector(const Decoder::VOpFVfArgs& args, ElementType arg2) {
     switch (args.opcode) {
+      case Decoder::VOpFVfOpcode::kVfmvsf:
+        if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
+          return Unimplemented();
+        }
+        if (args.src1 != 0) {
+          return Unimplemented();
+        }
+        return OpVectorVmvsx<ElementType, vta>(args.dst, arg2);
       case Decoder::VOpFVfOpcode::kVfmergevf:
         if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
           return OpVectorvx<intrinsics::Vmergevx<ElementType>, ElementType, vlmul, vta, vma>(
@@ -1211,6 +1219,14 @@ class Interpreter {
       }
     } else {
       switch (args.opcode) {
+        case Decoder::VOpFVvOpcode::kVfmvfs:
+          if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
+            return Unimplemented();
+          }
+          if (args.src2 != 0) {
+            return Unimplemented();
+          }
+          return OpVectorVmvfs<ElementType>(args.dst, args.src1);
         default:
           return Unimplemented();
       }
@@ -1279,7 +1295,7 @@ class Interpreter {
         }
       case Decoder::VOpIViOpcode::kVmvvi:
         if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
-          return OpvectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm));
+          return OpVectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm));
         } else {
           return Unimplemented();
         }
@@ -1447,7 +1463,7 @@ class Interpreter {
             if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
               return Unimplemented();
             }
-            return OpvectorVmvxs<SignedType>(args.dst, args.src1);
+            return OpVectorVmvxs<SignedType>(args.dst, args.src1);
           case Decoder::VXmXXsOpcode::kVcpopm:
               return OpVectorVXmXXs<intrinsics::Vcpopm<Int128>, vma>(args.dst, args.src1);
           case Decoder::VXmXXsOpcode::kVfirstm:
@@ -1687,7 +1703,7 @@ class Interpreter {
               if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
                 return Unimplemented();
               }
-              return OpvectorVmvsx<SignedType, vta>(args.dst, args.src2);
+              return OpVectorVmvsx<SignedType, vta>(args.dst, MaybeTruncateTo<SignedType>(arg2));
           default:
               return Unimplemented();
         }
@@ -1915,8 +1931,19 @@ class Interpreter {
     SetCsr<CsrName::kVstart>(0);
   }
 
+  template <typename ElementType>
+  void OpVectorVmvfs(uint8_t dst, uint8_t src) {
+    // Note: intrinsics::NanBox always received Float64 argument, even if it processes Float32 value
+    // to not cause recursion in interinsics handling.
+    // NanBox in the interpreter takes FpRegister and returns FpRegister which is probably the
+    // cleanest way of processing that data (at least on x86-64 this produces code that's close to
+    // optimal).
+    NanBoxAndSetFpReg<ElementType>(dst, SIMD128Register{state_->cpu.v[src]}.Get<FpRegister>(0));
+    SetCsr<CsrName::kVstart>(0);
+  }
+
   template <typename ElementType, TailProcessing vta>
-  void OpvectorVmvsx(uint8_t dst, uint8_t src1) {
+  void OpVectorVmvsx(uint8_t dst, ElementType element) {
     size_t vstart = GetCsr<CsrName::kVstart>();
     size_t vl = GetCsr<CsrName::kVl>();
     // Documentation doesn't specify what happenes when vstart is non-zero but less than vl.
@@ -1924,7 +1951,6 @@ class Interpreter {
     //   https://github.com/riscv/riscv-v-spec/issues/937
     // We are doing the same here.
     if (vstart == 0 && vl != 0) [[likely]] {
-      ElementType element = MaybeTruncateTo<ElementType>(GetRegOrZero(src1));
       SIMD128Register result;
       if constexpr (vta == intrinsics::TailProcessing::kAgnostic) {
         result = ~SIMD128Register{};
@@ -1938,7 +1964,7 @@ class Interpreter {
   }
 
   template <typename ElementType>
-  void OpvectorVmvxs(uint8_t dst, uint8_t src1) {
+  void OpVectorVmvxs(uint8_t dst, uint8_t src1) {
     static_assert(ElementType::kIsSigned);
     // Conversion to Int64 would perform sign-extension if source element is signed.
     Register element = Int64{SIMD128Register{state_->cpu.v[src1]}.Get<ElementType>(0)};
@@ -2030,7 +2056,7 @@ class Interpreter {
   }
 
   template <typename ElementType>
-  void OpvectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) {
+  void OpVectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) {
     if (!IsPowerOf2(nf + 1)) {
       return Unimplemented();
     }
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 93250a8c..aa67cfbc 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -831,6 +831,44 @@ class Riscv64InterpreterTest : public ::testing::Test {
   }
 
   template <typename ElementType>
+  void TestVfmvfs(uint32_t insn_bytes, uint64_t expected_result) {
+    state_.cpu.vtype = BitUtilLog2(sizeof(ElementType)) << 3;
+    state_.cpu.vstart = 0;
+    state_.cpu.vl = 0;
+    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+    state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
+    SetFReg<1>(state_.cpu, 0x5555'5555'5555'5555);
+    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+    EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+  }
+
+  template <typename ElementType>
+  void TestVfmvsf(uint32_t insn_bytes, uint64_t boxed_value, ElementType unboxed_value) {
+    for (uint8_t vstart = 0; vstart < 2; ++vstart) {
+      for (uint8_t vl = 0; vl < 2; ++vl) {
+        for (uint8_t vta = 0; vta < 2; ++vta) {
+          state_.cpu.vtype = (vta << 6) | (BitUtilLog2(sizeof(ElementType)) << 3);
+          state_.cpu.vstart = vstart;
+          state_.cpu.vl = vl;
+          state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+          state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
+          SetFReg<1>(state_.cpu, boxed_value);
+          EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+          if (vstart == 0 && vl != 0) {
+            SIMD128Register expected_result =
+                vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]};
+            expected_result.Set<ElementType>(unboxed_value, 0);
+            EXPECT_EQ(state_.cpu.v[8], expected_result);
+          } else {
+            EXPECT_EQ(state_.cpu.v[8],
+                      SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>());
+          }
+        }
+      }
+    }
+  }
+
+  template <typename ElementType>
   void TestVmvsx(uint32_t insn_bytes) {
     for (uint8_t vstart = 0; vstart < 2; ++vstart) {
       for (uint8_t vl = 0; vl < 2; ++vl) {
@@ -839,18 +877,18 @@ class Riscv64InterpreterTest : public ::testing::Test {
           state_.cpu.vstart = vstart;
           state_.cpu.vl = vl;
           state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-          state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>();
+          state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
           SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555);
           EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
           if (vstart == 0 && vl != 0) {
             SIMD128Register expected_result =
-                vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSourceLegacy[0]};
+                vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]};
             expected_result.Set<ElementType>(MaybeTruncateTo<ElementType>(0x5555'5555'5555'5555),
                                              0);
             EXPECT_EQ(state_.cpu.v[8], expected_result);
           } else {
             EXPECT_EQ(state_.cpu.v[8],
-                      SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>());
+                      SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>());
           }
         }
       }
@@ -863,7 +901,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
     state_.cpu.vstart = 0;
     state_.cpu.vl = 0;
     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-    state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>();
+    state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
     SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555);
     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
@@ -1857,6 +1895,20 @@ TEST_F(Riscv64InterpreterTest, TestVmXr) {
   TestVmvXr<8>(0x9f03b457);  // Vmv8r.v v8, v16
 }
 
+TEST_F(Riscv64InterpreterTest, TestVfmvfs) {
+  TestVfmvfs<intrinsics::Float32>(0x428010d7, 0xffff'ffff'8302'8100);  // Vfmv.f.s f1, v8
+  TestVfmvfs<intrinsics::Float64>(0x428010d7, 0x8706'8504'8302'8100);  // Vfmv.f.s f1, v8
+}
+
+TEST_F(Riscv64InterpreterTest, TestVfmvsf) {
+  TestVfmvsf<intrinsics::Float32>(0x4200d457,  // Vfmv.s.f v8, f1
+                                  0xffff'ffff'40b4'0000,
+                                  intrinsics::Float32{5.625f});
+  TestVfmvsf<intrinsics::Float64>(0x4200d457,  // Vfmv.s.f v8, f1
+                                  0x4016'8000'0000'0000,
+                                  intrinsics::Float64{5.625});
+}
+
 TEST_F(Riscv64InterpreterTest, TestVmvsx) {
   TestVmvsx<Int8>(0x4200e457);   // Vmv.s.x v8, x1
   TestVmvsx<Int16>(0x4200e457);  // Vmv.s.x v8, x1
author	Victor Khimenko <khim@google.com>	2024-02-27 09:50:34 +0000
committer	Victor Khimenko <khim@google.com>	2024-02-27 19:41:05 +0000
commit	2088e39f3872e6800bb8686819c64d09785722db (patch)
tree	012b76dfa1c26859928382d55135fd2487dbeaaf
parent	73cd47fce0dad10f0d6b15b15def28f8222bc82a (diff)
download	binary_translation-2088e39f3872e6800bb8686819c64d09785722db.tar.gz