aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Khimenko <khim@google.com>2024-02-27 09:50:34 +0000
committerVictor Khimenko <khim@google.com>2024-02-27 19:41:05 +0000
commit2088e39f3872e6800bb8686819c64d09785722db (patch)
tree012b76dfa1c26859928382d55135fd2487dbeaaf
parent73cd47fce0dad10f0d6b15b15def28f8222bc82a (diff)
downloadbinary_translation-2088e39f3872e6800bb8686819c64d09785722db.tar.gz
Add vfmv.s.f and vfmv.f.s instructions.
Test: berberis_all Change-Id: I80d1ab78c6d60090ed75e024f8b0b2ef36bdf4ff
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h2
-rw-r--r--interpreter/riscv64/interpreter.cc40
-rw-r--r--interpreter/riscv64/interpreter_test.cc60
3 files changed, 90 insertions, 12 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 39c1ad55..c9cabf68 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -296,7 +296,7 @@ class Decoder {
kVfsgnjxvv = 0b001010,
kVfmvfs = 0b010000,
kVfcvtXX = 0b010010,
- kVXXXv = 010011, // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v
+ kVXXXv = 0b010011, // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v
kVmfeqvv = 0b011000,
kVmflevv = 0b011001,
kVmfltvv = 0b011011,
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index 8d86c11c..ca0eb5d4 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -1181,6 +1181,14 @@ class Interpreter {
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
void OpVector(const Decoder::VOpFVfArgs& args, ElementType arg2) {
switch (args.opcode) {
+ case Decoder::VOpFVfOpcode::kVfmvsf:
+ if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
+ return Unimplemented();
+ }
+ if (args.src1 != 0) {
+ return Unimplemented();
+ }
+ return OpVectorVmvsx<ElementType, vta>(args.dst, arg2);
case Decoder::VOpFVfOpcode::kVfmergevf:
if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return OpVectorvx<intrinsics::Vmergevx<ElementType>, ElementType, vlmul, vta, vma>(
@@ -1211,6 +1219,14 @@ class Interpreter {
}
} else {
switch (args.opcode) {
+ case Decoder::VOpFVvOpcode::kVfmvfs:
+ if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
+ return Unimplemented();
+ }
+ if (args.src2 != 0) {
+ return Unimplemented();
+ }
+ return OpVectorVmvfs<ElementType>(args.dst, args.src1);
default:
return Unimplemented();
}
@@ -1279,7 +1295,7 @@ class Interpreter {
}
case Decoder::VOpIViOpcode::kVmvvi:
if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
- return OpvectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm));
+ return OpVectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm));
} else {
return Unimplemented();
}
@@ -1447,7 +1463,7 @@ class Interpreter {
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return Unimplemented();
}
- return OpvectorVmvxs<SignedType>(args.dst, args.src1);
+ return OpVectorVmvxs<SignedType>(args.dst, args.src1);
case Decoder::VXmXXsOpcode::kVcpopm:
return OpVectorVXmXXs<intrinsics::Vcpopm<Int128>, vma>(args.dst, args.src1);
case Decoder::VXmXXsOpcode::kVfirstm:
@@ -1687,7 +1703,7 @@ class Interpreter {
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return Unimplemented();
}
- return OpvectorVmvsx<SignedType, vta>(args.dst, args.src2);
+ return OpVectorVmvsx<SignedType, vta>(args.dst, MaybeTruncateTo<SignedType>(arg2));
default:
return Unimplemented();
}
@@ -1915,8 +1931,19 @@ class Interpreter {
SetCsr<CsrName::kVstart>(0);
}
+ template <typename ElementType>
+ void OpVectorVmvfs(uint8_t dst, uint8_t src) {
+ // Note: intrinsics::NanBox always received Float64 argument, even if it processes Float32 value
+ // to not cause recursion in interinsics handling.
+ // NanBox in the interpreter takes FpRegister and returns FpRegister which is probably the
+ // cleanest way of processing that data (at least on x86-64 this produces code that's close to
+ // optimal).
+ NanBoxAndSetFpReg<ElementType>(dst, SIMD128Register{state_->cpu.v[src]}.Get<FpRegister>(0));
+ SetCsr<CsrName::kVstart>(0);
+ }
+
template <typename ElementType, TailProcessing vta>
- void OpvectorVmvsx(uint8_t dst, uint8_t src1) {
+ void OpVectorVmvsx(uint8_t dst, ElementType element) {
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
// Documentation doesn't specify what happenes when vstart is non-zero but less than vl.
@@ -1924,7 +1951,6 @@ class Interpreter {
// https://github.com/riscv/riscv-v-spec/issues/937
// We are doing the same here.
if (vstart == 0 && vl != 0) [[likely]] {
- ElementType element = MaybeTruncateTo<ElementType>(GetRegOrZero(src1));
SIMD128Register result;
if constexpr (vta == intrinsics::TailProcessing::kAgnostic) {
result = ~SIMD128Register{};
@@ -1938,7 +1964,7 @@ class Interpreter {
}
template <typename ElementType>
- void OpvectorVmvxs(uint8_t dst, uint8_t src1) {
+ void OpVectorVmvxs(uint8_t dst, uint8_t src1) {
static_assert(ElementType::kIsSigned);
// Conversion to Int64 would perform sign-extension if source element is signed.
Register element = Int64{SIMD128Register{state_->cpu.v[src1]}.Get<ElementType>(0)};
@@ -2030,7 +2056,7 @@ class Interpreter {
}
template <typename ElementType>
- void OpvectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) {
+ void OpVectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) {
if (!IsPowerOf2(nf + 1)) {
return Unimplemented();
}
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 93250a8c..aa67cfbc 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -831,6 +831,44 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
template <typename ElementType>
+ void TestVfmvfs(uint32_t insn_bytes, uint64_t expected_result) {
+ state_.cpu.vtype = BitUtilLog2(sizeof(ElementType)) << 3;
+ state_.cpu.vstart = 0;
+ state_.cpu.vl = 0;
+ state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+ state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
+ SetFReg<1>(state_.cpu, 0x5555'5555'5555'5555);
+ EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+ EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+ }
+
+ template <typename ElementType>
+ void TestVfmvsf(uint32_t insn_bytes, uint64_t boxed_value, ElementType unboxed_value) {
+ for (uint8_t vstart = 0; vstart < 2; ++vstart) {
+ for (uint8_t vl = 0; vl < 2; ++vl) {
+ for (uint8_t vta = 0; vta < 2; ++vta) {
+ state_.cpu.vtype = (vta << 6) | (BitUtilLog2(sizeof(ElementType)) << 3);
+ state_.cpu.vstart = vstart;
+ state_.cpu.vl = vl;
+ state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+ state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
+ SetFReg<1>(state_.cpu, boxed_value);
+ EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+ if (vstart == 0 && vl != 0) {
+ SIMD128Register expected_result =
+ vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]};
+ expected_result.Set<ElementType>(unboxed_value, 0);
+ EXPECT_EQ(state_.cpu.v[8], expected_result);
+ } else {
+ EXPECT_EQ(state_.cpu.v[8],
+ SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>());
+ }
+ }
+ }
+ }
+ }
+
+ template <typename ElementType>
void TestVmvsx(uint32_t insn_bytes) {
for (uint8_t vstart = 0; vstart < 2; ++vstart) {
for (uint8_t vl = 0; vl < 2; ++vl) {
@@ -839,18 +877,18 @@ class Riscv64InterpreterTest : public ::testing::Test {
state_.cpu.vstart = vstart;
state_.cpu.vl = vl;
state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
- state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>();
+ state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555);
EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
if (vstart == 0 && vl != 0) {
SIMD128Register expected_result =
- vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSourceLegacy[0]};
+ vta ? ~SIMD128Register{} : SIMD128Register{kVectorCalculationsSource[0]};
expected_result.Set<ElementType>(MaybeTruncateTo<ElementType>(0x5555'5555'5555'5555),
0);
EXPECT_EQ(state_.cpu.v[8], expected_result);
} else {
EXPECT_EQ(state_.cpu.v[8],
- SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>());
+ SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>());
}
}
}
@@ -863,7 +901,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
state_.cpu.vstart = 0;
state_.cpu.vl = 0;
state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
- state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSourceLegacy[0]}.Get<__uint128_t>();
+ state_.cpu.v[8] = SIMD128Register{kVectorCalculationsSource[0]}.Get<__uint128_t>();
SetXReg<1>(state_.cpu, 0x5555'5555'5555'5555);
EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
@@ -1857,6 +1895,20 @@ TEST_F(Riscv64InterpreterTest, TestVmXr) {
TestVmvXr<8>(0x9f03b457); // Vmv8r.v v8, v16
}
+TEST_F(Riscv64InterpreterTest, TestVfmvfs) {
+ TestVfmvfs<intrinsics::Float32>(0x428010d7, 0xffff'ffff'8302'8100); // Vfmv.f.s f1, v8
+ TestVfmvfs<intrinsics::Float64>(0x428010d7, 0x8706'8504'8302'8100); // Vfmv.f.s f1, v8
+}
+
+TEST_F(Riscv64InterpreterTest, TestVfmvsf) {
+ TestVfmvsf<intrinsics::Float32>(0x4200d457, // Vfmv.s.f v8, f1
+ 0xffff'ffff'40b4'0000,
+ intrinsics::Float32{5.625f});
+ TestVfmvsf<intrinsics::Float64>(0x4200d457, // Vfmv.s.f v8, f1
+ 0x4016'8000'0000'0000,
+ intrinsics::Float64{5.625});
+}
+
TEST_F(Riscv64InterpreterTest, TestVmvsx) {
TestVmvsx<Int8>(0x4200e457); // Vmv.s.x v8, x1
TestVmvsx<Int16>(0x4200e457); // Vmv.s.x v8, x1