aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-04-22 01:27:17 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-04-22 01:27:17 +0000
commit65790b034e7d97022c7c8ea9a6e930333d3c1fd9 (patch)
treea076320347f30694dd7f815ea3c7ef0519645811
parent722006fdeb16a49976325a0d74ab662992adba1a (diff)
parent1bf3e03bcb66e3a87a0bc3c69affd8ee56a45a00 (diff)
downloadbinary_translation-android14-release.tar.gz
Change-Id: I28d0860d6f3122d1425a55e2224d9e7cbc5dd0e7
-rw-r--r--base/include/berberis/base/dependent_false.h32
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h124
-rw-r--r--decoder/include/berberis/decoder/riscv64/semantics_player.h7
-rw-r--r--guest_state/include/berberis/guest_state/guest_state_riscv64.h28
-rw-r--r--interpreter/riscv64/fp_regs.h64
-rw-r--r--interpreter/riscv64/interpreter.cc37
-rw-r--r--interpreter/riscv64/interpreter_test.cc328
-rw-r--r--intrinsics/include/berberis/intrinsics/intrinsics_float.h14
-rw-r--r--intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h101
-rw-r--r--intrinsics/include/berberis/intrinsics/type_traits.h6
10 files changed, 633 insertions, 108 deletions
diff --git a/base/include/berberis/base/dependent_false.h b/base/include/berberis/base/dependent_false.h
new file mode 100644
index 00000000..f01a48e0
--- /dev/null
+++ b/base/include/berberis/base/dependent_false.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_BASE_DEPENDENT_FALSE_H_
+#define BERBERIS_BASE_DEPENDENT_FALSE_H_
+
+#include <type_traits>
+
+namespace berberis {
+
+template <typename T>
+inline constexpr bool kDependentTypeFalse = false;
+
+template <auto T>
+inline constexpr bool kDependentValueFalse = false;
+
+} // namespace berberis
+
+#endif // BERBERIS_BASE_DEPENDENT_FALSE_H_
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index dc8303d1..9f785732 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -194,6 +194,17 @@ class Decoder {
kMaxAmoOpcode = 0b11111'111,
};
+ enum class OpFpOpcode {
+ // Bit #2 = 1 means rm is an opcode extension.
+ // Bit #3 = 1 means rs2 is an opcode extension
+ // Bits #4, #1, and #0 - actual opcode.
+ kFAdd = 0b0'0'0'00,
+ kFSub = 0b0'0'0'01,
+ kFMul = 0b0'0'0'10,
+ kFDiv = 0b0'0'0'11,
+ kMaxOpFpOpcode = 0b1'1'1'11,
+ };
+
enum class LoadOpcode {
kLb = 0b000,
kLh = 0b001,
@@ -277,6 +288,14 @@ class Decoder {
kMaxCsrRegister = 0b11'11'1111'1111,
};
+ enum class FloatSize {
+ kFloat = 0b00,
+ kDouble = 0b01,
+ kHalf = 0b10,
+ kQuad = 0b11,
+ kMaxFloatSize = 0b11,
+ };
+
struct AmoArgs {
AmoOpcode opcode;
uint8_t dst;
@@ -379,6 +398,15 @@ class Decoder {
using StoreArgs = StoreArgsTemplate<StoreOpcode>;
using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>;
+ struct OpFpArgs {
+ OpFpOpcode opcode;
+ FloatSize float_size;
+ uint8_t dst;
+ uint8_t src1;
+ uint8_t src2;
+ uint8_t rm;
+ };
+
struct BranchArgs {
BranchOpcode opcode;
uint8_t src1;
@@ -430,13 +458,19 @@ class Decoder {
DecodeCAddi();
break;
case CompressedOpcode::kFld:
- DecodeCFld();
+ DecodeCompressedLoadStore<LoadFpOpcode::kFld>();
break;
case CompressedOpcode::kLw:
- DecodeCLw();
+ DecodeCompressedLoadStore<LoadOpcode::kLw>();
break;
case CompressedOpcode::kLd:
- DecodeCLd();
+ DecodeCompressedLoadStore<LoadOpcode::kLd>();
+ break;
+ case CompressedOpcode::kFsd:
+ DecodeCompressedLoadStore<StoreFpOpcode::kFsd>();
+ break;
+ case CompressedOpcode::kSd:
+ DecodeCompressedLoadStore<StoreOpcode::kSd>();
break;
default:
insn_consumer_->Unimplemented();
@@ -444,50 +478,37 @@ class Decoder {
return 2;
}
- void DecodeCLd() {
+ template <auto opcode>
+ void DecodeCompressedLoadStore() {
uint8_t low_imm = GetBits<uint8_t, 5, 2>();
uint8_t high_imm = GetBits<uint8_t, 10, 3>();
- uint8_t imm = (low_imm << 6 | high_imm << 3);
- uint8_t rd = GetBits<uint8_t, 2, 3>();
- uint8_t rs = GetBits<uint8_t, 7, 3>();
- const LoadArgs args = {
- .opcode = LoadOpcode::kLd,
- .dst = uint8_t(8 + rd),
- .src = uint8_t(8 + rs),
- .offset = imm,
- };
- insn_consumer_->Load(args);
- }
-
- void DecodeCLw() {
- constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
- uint8_t low_imm = GetBits<uint8_t, 5, 2>();
- uint8_t high_imm = GetBits<uint8_t, 10, 3>();
- uint8_t imm = (kLwLow[low_imm] | high_imm << 3);
- uint8_t rd = GetBits<uint8_t, 2, 3>();
- uint8_t rs = GetBits<uint8_t, 7, 3>();
- const LoadArgs args = {
- .opcode = LoadOpcode::kLw,
- .dst = uint8_t(8 + rd),
- .src = uint8_t(8 + rs),
- .offset = imm,
- };
- insn_consumer_->Load(args);
- }
-
- void DecodeCFld() {
- uint8_t low_imm = GetBits<uint8_t, 5, 2>();
- uint8_t high_imm = GetBits<uint8_t, 10, 3>();
- uint8_t imm = (low_imm << 6 | high_imm << 3);
+ uint8_t imm;
+ if constexpr ((uint8_t(opcode) & 1) == 0) {
+ constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
+ imm = (kLwLow[low_imm] | high_imm << 3);
+ } else {
+ imm = (low_imm << 6 | high_imm << 3);
+ }
uint8_t rd = GetBits<uint8_t, 2, 3>();
uint8_t rs = GetBits<uint8_t, 7, 3>();
- const LoadFpArgs args = {
- .opcode = LoadFpOpcode::kFld,
- .dst = uint8_t(8 + rd),
- .src = uint8_t(8 + rs),
- .offset = imm,
- };
- insn_consumer_->Load(args);
+ if constexpr (std::is_same_v<decltype(opcode), StoreOpcode> ||
+ std::is_same_v<decltype(opcode), StoreFpOpcode>) {
+ const StoreArgsTemplate<decltype(opcode)> args = {
+ .opcode = opcode,
+ .src = uint8_t(8 + rs),
+ .offset = imm,
+ .data = uint8_t(8 + rd),
+ };
+ insn_consumer_->Store(args);
+ } else {
+ const LoadArgsTemplate<decltype(opcode)> args = {
+ .opcode = opcode,
+ .dst = uint8_t(8 + rd),
+ .src = uint8_t(8 + rs),
+ .offset = imm,
+ };
+ insn_consumer_->Load(args);
+ }
}
void DecodeCAddi() {
@@ -578,6 +599,9 @@ class Decoder {
case BaseOpcode::kOpImm32:
DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>();
break;
+ case BaseOpcode::kOpFp:
+ DecodeOpFp();
+ break;
case BaseOpcode::kStore:
DecodeStore<StoreOpcode>();
break;
@@ -822,6 +846,20 @@ class Decoder {
insn_consumer_->JumpAndLink(args);
}
+ void DecodeOpFp() {
+ uint8_t float_size = GetBits<uint8_t, 25, 2>();
+ uint8_t opcode_bits = GetBits<uint8_t, 27, 5>();
+ const OpFpArgs args = {
+ .opcode = OpFpOpcode(opcode_bits),
+ .float_size = FloatSize(float_size),
+ .dst = GetBits<uint8_t, 7, 5>(),
+ .src1 = GetBits<uint8_t, 15, 5>(),
+ .src2 = GetBits<uint8_t, 20, 5>(),
+ .rm = GetBits<uint8_t, 12, 3>(),
+ };
+ insn_consumer_->OpFp(args);
+ }
+
void DecodeSystem() {
uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
if (low_opcode == 0b00) {
diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h
index 40b559d3..e320b7ce 100644
--- a/decoder/include/berberis/decoder/riscv64/semantics_player.h
+++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h
@@ -135,6 +135,13 @@ class SemanticsPlayer {
SetRegOrIgnore(args.dst, result);
};
+ void OpFp(const typename Decoder::OpFpArgs& args) {
+ FpRegister arg1 = GetFpReg(args.src1);
+ FpRegister arg2 = GetFpReg(args.src2);
+ FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2);
+ SetFpReg(args.dst, result);
+ }
+
void Store(const typename Decoder::StoreArgs& args) {
Register arg = GetRegOrZero(args.src);
Register data = GetRegOrZero(args.data);
diff --git a/guest_state/include/berberis/guest_state/guest_state_riscv64.h b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
index 8546311f..82aad665 100644
--- a/guest_state/include/berberis/guest_state/guest_state_riscv64.h
+++ b/guest_state/include/berberis/guest_state/guest_state_riscv64.h
@@ -19,6 +19,7 @@
#include <cstdint>
+#include "berberis/base/dependent_false.h"
#include "berberis/base/macros.h"
#include "berberis/guest_state/guest_addr.h"
@@ -74,6 +75,33 @@ inline void SetFReg(CPUState& state, uint64_t val) {
state.f[kIndex] = val;
}
+enum class RegisterType {
+ kReg,
+ kFpReg,
+};
+
+template <RegisterType register_type, uint8_t kIndex>
+inline auto GetReg(const CPUState& state) {
+ if constexpr (register_type == RegisterType::kReg) {
+ return GetXReg<kIndex>(state);
+ } else if constexpr (register_type == RegisterType::kFpReg) {
+ return GetFReg<kIndex>(state);
+ } else {
+ static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+ }
+}
+
+template <RegisterType register_type, uint8_t kIndex, typename Register>
+inline auto SetReg(CPUState& state, Register val) {
+ if constexpr (register_type == RegisterType::kReg) {
+ return SetXReg<kIndex>(state, val);
+ } else if constexpr (register_type == RegisterType::kFpReg) {
+ return SetFReg<kIndex>(state, val);
+ } else {
+ static_assert(kDependentValueFalse<register_type>, "Unsupported register type");
+ }
+}
+
struct ThreadState {
CPUState cpu;
};
diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h
new file mode 100644
index 00000000..dd49f19d
--- /dev/null
+++ b/interpreter/riscv64/fp_regs.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_FP_REGS_H_
+#define BERBERIS_FP_REGS_H_
+
+#include <cstring>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+
+namespace berberis {
+
+template <typename FloatType>
+inline FloatType NanUnboxFPRegToFloat(uint64_t arg);
+
+template <>
+inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) {
+ // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on
+ // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e.,
+ // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the
+ // input value, otherwise the input value is treated as an n-bit canonical NaN.
+ if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) {
+ return bit_cast<intrinsics::Float32>(0x7fc00000);
+ }
+ intrinsics::Float32 result;
+ memcpy(&result, &arg, sizeof(intrinsics::Float32));
+ return result;
+}
+
+template <>
+inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) {
+ return bit_cast<intrinsics::Float64>(arg);
+}
+
+template <typename FloatType>
+inline uint64_t NanBoxFloatToFPReg(FloatType arg);
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) {
+ return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000;
+}
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) {
+ return bit_cast<uint64_t>(arg);
+}
+
+} // namespace berberis
+
+#endif // BERBERIS_FP_REGS_H_
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index f4e82fea..a8fcbf61 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -28,10 +28,11 @@
#include "berberis/decoder/riscv64/semantics_player.h"
#include "berberis/guest_state/guest_addr.h"
#include "berberis/guest_state/guest_state_riscv64.h"
-#include "berberis/intrinsics/riscv64/guest_fpstate.h"
+#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h"
#include "berberis/kernel_api/run_guest_syscall.h"
#include "atomics.h"
+#include "fp_regs.h"
namespace berberis {
@@ -42,6 +43,8 @@ class Interpreter {
using Decoder = Decoder<SemanticsPlayer<Interpreter>>;
using Register = uint64_t;
using FpRegister = uint64_t;
+ using Float32 = intrinsics::Float32;
+ using Float64 = intrinsics::Float64;
explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {}
@@ -352,6 +355,38 @@ class Interpreter {
return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
}
+ FpRegister OpFp(Decoder::OpFpOpcode opcode,
+ Decoder::FloatSize float_size,
+ uint8_t rm,
+ FpRegister arg1,
+ FpRegister arg2) {
+ switch (float_size) {
+ case Decoder::FloatSize::kFloat:
+ return NanBoxFloatToFPReg(OpFp<Float32>(
+ opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2)));
+ case Decoder::FloatSize::kDouble:
+ return NanBoxFloatToFPReg(OpFp<Float64>(
+ opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2)));
+ default:
+ Unimplemented();
+ return {};
+ }
+ }
+
+ // TODO(b/278812060): switch to intrinsics when they would become available and stop using
+ // ExecuteFloatOperation directly.
+ template <typename FloatType>
+ FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) {
+ switch (opcode) {
+ case Decoder::OpFpOpcode::kFAdd:
+ return intrinsics::ExecuteFloatOperation<FloatType>(
+ rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2);
+ default:
+ Unimplemented();
+ return {};
+ }
+ }
+
Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) {
switch (opcode) {
case Decoder::ShiftImmOpcode::kSlli:
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index e35b58f3..b6edfc4f 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -35,28 +35,24 @@ namespace {
class Riscv64InterpreterTest : public ::testing::Test {
public:
- void InterpretCLd(uint16_t insn_bytes, uint64_t offset) {
+ template <RegisterType register_type, uint64_t expected_result>
+ void InterpretCompressedStore(uint16_t insn_bytes, uint64_t offset) {
auto code_start = ToGuestAddr(&insn_bytes);
state_.cpu.insn_addr = code_start;
- SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
- InterpretInsn(&state_);
- EXPECT_EQ(GetXReg<8>(state_.cpu), kDataToLoad);
- }
-
- void InterpretCLw(uint16_t insn_bytes, uint64_t offset) {
- auto code_start = ToGuestAddr(&insn_bytes);
- state_.cpu.insn_addr = code_start;
- SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
+ store_area_ = 0;
+ SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
+ SetReg<register_type, 9>(state_.cpu, kDataToLoad);
InterpretInsn(&state_);
- EXPECT_EQ(GetXReg<8>(state_.cpu), uint64_t(int32_t(kDataToLoad)));
+ EXPECT_EQ(store_area_, expected_result);
}
- void InterpretCFld(uint16_t insn_bytes, uint64_t offset) {
+ template <RegisterType register_type, uint64_t expected_result>
+ void InterpretCompressedLoad(uint16_t insn_bytes, uint64_t offset) {
auto code_start = ToGuestAddr(&insn_bytes);
state_.cpu.insn_addr = code_start;
SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
InterpretInsn(&state_);
- EXPECT_EQ(GetFReg<8>(state_.cpu), kDataToLoad);
+ EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result);
}
void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
@@ -102,6 +98,17 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
+ void InterpretOpFp(uint32_t insn_bytes,
+ std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
+ for (auto [arg1, arg2, expected_result] : args) {
+ state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+ SetFReg<2>(state_.cpu, arg1);
+ SetFReg<3>(state_.cpu, arg2);
+ InterpretInsn(&state_);
+ EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+ }
+ }
+
void InterpretFence(uint32_t insn_bytes) {
state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
InterpretInsn(&state_);
@@ -224,38 +231,6 @@ class Riscv64InterpreterTest : public ::testing::Test {
ThreadState state_;
};
-TEST_F(Riscv64InterpreterTest, CLd) {
- union {
- uint16_t offset;
- struct {
- uint8_t : 3;
- uint8_t i3_i5 : 3;
- uint8_t i6_i7 : 2;
- } i_bits;
- };
- for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
- union {
- int16_t parcel;
- struct {
- uint8_t low_opcode : 2;
- uint8_t rd : 3;
- uint8_t i6_i7 : 2;
- uint8_t rs : 3;
- uint8_t i3_i5 : 3;
- uint8_t high_opcode : 3;
- } __attribute__((__packed__));
- } o_bits = {
- .low_opcode = 0b00,
- .rd = 0,
- .i6_i7 = i_bits.i6_i7,
- .rs = 0,
- .i3_i5 = i_bits.i3_i5,
- .high_opcode = 0b011,
- };
- InterpretCLd(o_bits.parcel, offset);
- }
-}
-
TEST_F(Riscv64InterpreterTest, CLw) {
union {
uint16_t offset;
@@ -280,18 +255,21 @@ TEST_F(Riscv64InterpreterTest, CLw) {
} __attribute__((__packed__));
} o_bits = {
.low_opcode = 0b00,
- .rd = 0,
+ .rd = 1,
.i6 = i_bits.i6,
.i2 = i_bits.i2,
.rs = 0,
.i3_i5 = i_bits.i3_i5,
.high_opcode = 0b010,
};
- InterpretCLw(o_bits.parcel, offset);
+ InterpretCompressedLoad<RegisterType::kReg,
+ static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad))>(o_bits.parcel,
+ offset);
}
}
-TEST_F(Riscv64InterpreterTest, CFld) {
+template <uint16_t opcode, auto execute_instruction_func>
+void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) {
union {
uint16_t offset;
struct {
@@ -303,26 +281,45 @@ TEST_F(Riscv64InterpreterTest, CFld) {
for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
union {
int16_t parcel;
- struct {
+ struct [[gnu::packed]] {
uint8_t low_opcode : 2;
uint8_t rd : 3;
uint8_t i6_i7 : 2;
uint8_t rs : 3;
uint8_t i3_i5 : 3;
uint8_t high_opcode : 3;
- } __attribute__((__packed__));
+ };
} o_bits = {
.low_opcode = 0b00,
- .rd = 0,
+ .rd = 1,
.i6_i7 = i_bits.i6_i7,
.rs = 0,
.i3_i5 = i_bits.i3_i5,
- .high_opcode = 0b001,
+ .high_opcode = 0b000,
};
- InterpretCFld(o_bits.parcel, offset);
+ (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
}
}
+TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) {
+ // c.Fld
+ TestCompressedLoadOrStore<
+ 0b001'000'000'00'000'00,
+ &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kFpReg, kDataToLoad>>(this);
+ // c.Ld
+ TestCompressedLoadOrStore<
+ 0b011'000'000'00'000'00,
+ &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kReg, kDataToLoad>>(this);
+ // c.Fsd
+ TestCompressedLoadOrStore<
+ 0b101'000'000'00'000'00,
+ &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kFpReg, kDataToLoad>>(this);
+ // c.Sd
+ TestCompressedLoadOrStore<
+ 0b111'000'000'00'000'00,
+ &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kReg, kDataToLoad>>(this);
+}
+
TEST_F(Riscv64InterpreterTest, CAddi) {
union {
int8_t offset;
@@ -633,6 +630,231 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) {
InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
}
+TEST_F(Riscv64InterpreterTest, OpFpInstructions) {
+ // FAdd.S
+ InterpretOpFp(0x003100d3,
+ {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}});
+ // FAdd.D
+ InterpretOpFp(0x023100d3,
+ {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}});
+}
+
+TEST_F(Riscv64InterpreterTest, RoundingModeTest) {
+ // FAdd.S
+ InterpretOpFp(0x003100d3,
+ // Test RNE
+ {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+ // FAdd.S
+ InterpretOpFp(0x003110d3,
+ // Test RTZ
+ {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+ // FAdd.S
+ InterpretOpFp(0x003120d3,
+ // Test RDN
+ {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+ // FAdd.S
+ InterpretOpFp(0x003130d3,
+ // Test RUP
+ {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}});
+ // FAdd.S
+ InterpretOpFp(0x003140d3,
+ // Test RMM
+ {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000},
+ {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}});
+
+ // FAdd.D
+ InterpretOpFp(0x023100d3,
+ // Test RNE
+ {{bit_cast<uint64_t>(1.0000000000000002),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000004),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000007),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000009)},
+ {bit_cast<uint64_t>(-1.0000000000000002),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000004),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000007),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000009)}});
+ // FAdd.D
+ InterpretOpFp(0x023110d3,
+ // Test RTZ
+ {{bit_cast<uint64_t>(1.0000000000000002),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000002)},
+ {bit_cast<uint64_t>(1.0000000000000004),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000007),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000007)},
+ {bit_cast<uint64_t>(-1.0000000000000002),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000002)},
+ {bit_cast<uint64_t>(-1.0000000000000004),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000007),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000007)}});
+ // FAdd.D
+ InterpretOpFp(0x023120d3,
+ // Test RDN
+ {{bit_cast<uint64_t>(1.0000000000000002),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000002)},
+ {bit_cast<uint64_t>(1.0000000000000004),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000007),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000007)},
+ {bit_cast<uint64_t>(-1.0000000000000002),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000004),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000007)},
+ {bit_cast<uint64_t>(-1.0000000000000007),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000009)}});
+ // FAdd.D
+ InterpretOpFp(0x023130d3,
+ // Test RUP
+ {{bit_cast<uint64_t>(1.0000000000000002),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000004),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000007)},
+ {bit_cast<uint64_t>(1.0000000000000007),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000009)},
+ {bit_cast<uint64_t>(-1.0000000000000002),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000002)},
+ {bit_cast<uint64_t>(-1.0000000000000004),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000007),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000007)}});
+ // FAdd.D
+ InterpretOpFp(0x023140d3,
+ // Test RMM
+ {{bit_cast<uint64_t>(1.0000000000000002),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000004)},
+ {bit_cast<uint64_t>(1.0000000000000004),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000007)},
+ {bit_cast<uint64_t>(1.0000000000000007),
+ bit_cast<uint64_t>(0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(1.0000000000000009)},
+ {bit_cast<uint64_t>(-1.0000000000000002),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000004)},
+ {bit_cast<uint64_t>(-1.0000000000000004),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000007)},
+ {bit_cast<uint64_t>(-1.0000000000000007),
+ bit_cast<uint64_t>(-0.00000000000000011102230246251565),
+ bit_cast<uint64_t>(-1.0000000000000009)}});
+}
+
TEST_F(Riscv64InterpreterTest, LoadInstructions) {
// Offset is always 8.
// Lbu
diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
index 30f83e55..694afba9 100644
--- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h
+++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
@@ -64,23 +64,15 @@ class WrappedFloatType {
explicit constexpr operator uint32_t() const { return value_; }
explicit constexpr operator int64_t() const { return value_; }
explicit constexpr operator uint64_t() const { return value_; }
-
- auto BitCastToIntOfSameSize() {
- if constexpr (std::is_same_v<BaseType, float>) {
- return bit_cast<int32_t>(value_);
- } else {
- static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported.");
- return bit_cast<int64_t>(value_);
- }
- }
-
- // Only valid for BaseType==double. Returns the bit representation of the fp value.
explicit constexpr operator WrappedFloatType<float>() const {
return WrappedFloatType<float>(value_);
}
explicit constexpr operator WrappedFloatType<double>() const {
return WrappedFloatType<double>(value_);
}
+#if defined(__i386__) || defined(__x86_64__)
+ explicit constexpr operator long double() const { return value_; }
+#endif
// Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x
// produce different results which could be surprising. Use fneg instead of unary operator-.
friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2);
diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
new file mode 100644
index 00000000..db8ff249
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+
+#include <limits>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode
+#include "berberis/intrinsics/type_traits.h"
+
+namespace berberis::intrinsics {
+
+// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
+// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
+// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
+// this addition may overflow.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args)
+ -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+ using Wide = typename TypeTraits<FloatType>::Wide;
+ Wide wide_result = operation(Wide(args)...);
+ if constexpr (std::is_same_v<FloatType, Float32>) {
+ // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
+ // bits to ensure that we are getting ±∞ and not NaN.
+ auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
+ if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
+ return FloatType(wide_result);
+ }
+ int_result += 0x0000'0000'1000'0000;
+ int_result &= 0xffff'ffff'e000'0000;
+ wide_result = bit_cast<Wide>(int_result);
+ } else if constexpr (std::is_same_v<FloatType, Float64>) {
+ // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
+ // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
+ struct {
+ uint64_t significand;
+ uint16_t exponent;
+ uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
+ } fp80_parts;
+ static_assert(sizeof fp80_parts == sizeof(Wide));
+ memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
+ // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
+ if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
+ (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
+ return FloatType(wide_result);
+ }
+ fp80_parts.significand += 0x0000'0000'0000'0400;
+ fp80_parts.significand &= 0xffff'ffff'ffff'f800;
+ if (fp80_parts.significand == 0) {
+ fp80_parts.exponent++;
+ fp80_parts.significand = 0x8000'0000'0000'0000;
+ }
+ memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
+ }
+ return FloatType(wide_result);
+}
+
+// Note: first round of rm/frm verification must happen before that function because RISC-V
+// postulates that invalid rm or frm should trigger illegal instruction exception.
+// Here we can assume both rm and frm fields are valid.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperation(uint8_t requested_rm,
+ uint8_t current_rm,
+ OperationType operation,
+ Args... args)
+ -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+ int host_requested_rm = ToHostRoundingMode(requested_rm);
+ int host_current_rm = ToHostRoundingMode(current_rm);
+ if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
+ uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
+ if (rm == FPFlags::RMM) {
+ return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+ }
+ return operation(args...);
+ }
+ ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
+ if (requested_rm == FPFlags::RMM) {
+ return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+ }
+ return operation(args...);
+}
+
+} // namespace berberis::intrinsics
+
+#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h
index f1f6f75d..2785b92d 100644
--- a/intrinsics/include/berberis/intrinsics/type_traits.h
+++ b/intrinsics/include/berberis/intrinsics/type_traits.h
@@ -93,11 +93,17 @@ struct TypeTraits<int64_t> {
template <>
struct TypeTraits<intrinsics::Float32> {
using Int = int32_t;
+ using Wide = intrinsics::Float64;
};
template <>
struct TypeTraits<intrinsics::Float64> {
using Int = int64_t;
+ using Narrow = intrinsics::Float32;
+#if defined(__i386__) || defined(__x86_64__)
+ static_assert(sizeof(long double) > sizeof(intrinsics::Float64));
+ using Wide = long double;
+#endif
};
#if defined(__x86_64__)