aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Khimenko <khim@google.com>2023-04-21 12:42:17 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2023-04-21 12:42:17 +0000
commit2f02ab18928254520e361fb82ad36c2aa8b2e502 (patch)
tree29adb3bff935efd7d9a82d4d734c5e135a8fbf83
parentdc72d0b2e6a1bb771e9e0376aec91b539eb0efe3 (diff)
parent5ea1d0ca1e4975100374c8f844cd7f6684f908a4 (diff)
downloadbinary_translation-2f02ab18928254520e361fb82ad36c2aa8b2e502.tar.gz
Merge "interp: added FAdd instruction." am: 88e370a2bd am: 5ea1d0ca1e
Original change: https://android-review.googlesource.com/c/platform/frameworks/libs/binary_translation/+/2543850 Change-Id: I569b2daddedfa639f0e5d00a23004eb842c9821a Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h45
-rw-r--r--decoder/include/berberis/decoder/riscv64/semantics_player.h7
-rw-r--r--interpreter/riscv64/fp_regs.h64
-rw-r--r--interpreter/riscv64/interpreter.cc37
-rw-r--r--interpreter/riscv64/interpreter_test.cc22
-rw-r--r--intrinsics/include/berberis/intrinsics/intrinsics_float.h14
-rw-r--r--intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h101
-rw-r--r--intrinsics/include/berberis/intrinsics/type_traits.h6
8 files changed, 284 insertions, 12 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index ec51fe37..57e79287 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -194,6 +194,17 @@ class Decoder {
kMaxAmoOpcode = 0b11111'111,
};
+ enum class OpFpOpcode {
+ // Bit #2 = 1 means rm is an opcode extension.
+ // Bit #3 = 1 means rs2 is an opcode extension
+ // Bits #4, #1, and #0 - actual opcode.
+ kFAdd = 0b0'0'0'00,
+ kFSub = 0b0'0'0'01,
+ kFMul = 0b0'0'0'10,
+ kFDiv = 0b0'0'0'11,
+ kMaxOpFpOpcode = 0b1'1'1'11,
+ };
+
enum class LoadOpcode {
kLb = 0b000,
kLh = 0b001,
@@ -277,6 +288,14 @@ class Decoder {
kMaxCsrRegister = 0b11'11'1111'1111,
};
+ enum class FloatSize {
+ kFloat = 0b00,
+ kDouble = 0b01,
+ kHalf = 0b10,
+ kQuad = 0b11,
+ kMaxFloatSize = 0b11,
+ };
+
struct AmoArgs {
AmoOpcode opcode;
uint8_t dst;
@@ -379,6 +398,15 @@ class Decoder {
using StoreArgs = StoreArgsTemplate<StoreOpcode>;
using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>;
+ struct OpFpArgs {
+ OpFpOpcode opcode;
+ FloatSize float_size;
+ uint8_t dst;
+ uint8_t src1;
+ uint8_t src2;
+ uint8_t rm;
+ };
+
struct BranchArgs {
BranchOpcode opcode;
uint8_t src1;
@@ -580,6 +608,9 @@ class Decoder {
case BaseOpcode::kOpImm32:
DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>();
break;
+ case BaseOpcode::kOpFp:
+ DecodeOpFp();
+ break;
case BaseOpcode::kStore:
DecodeStore<StoreOpcode>();
break;
@@ -824,6 +855,20 @@ class Decoder {
insn_consumer_->JumpAndLink(args);
}
+ void DecodeOpFp() {
+ uint8_t float_size = GetBits<uint8_t, 25, 2>();
+ uint8_t opcode_bits = GetBits<uint8_t, 27, 5>();
+ const OpFpArgs args = {
+ .opcode = OpFpOpcode(opcode_bits),
+ .float_size = FloatSize(float_size),
+ .dst = GetBits<uint8_t, 7, 5>(),
+ .src1 = GetBits<uint8_t, 15, 5>(),
+ .src2 = GetBits<uint8_t, 20, 5>(),
+ .rm = GetBits<uint8_t, 12, 3>(),
+ };
+ insn_consumer_->OpFp(args);
+ }
+
void DecodeSystem() {
uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
if (low_opcode == 0b00) {
diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h
index 40b559d3..e320b7ce 100644
--- a/decoder/include/berberis/decoder/riscv64/semantics_player.h
+++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h
@@ -135,6 +135,13 @@ class SemanticsPlayer {
SetRegOrIgnore(args.dst, result);
};
+ void OpFp(const typename Decoder::OpFpArgs& args) {
+ FpRegister arg1 = GetFpReg(args.src1);
+ FpRegister arg2 = GetFpReg(args.src2);
+ FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2);
+ SetFpReg(args.dst, result);
+ }
+
void Store(const typename Decoder::StoreArgs& args) {
Register arg = GetRegOrZero(args.src);
Register data = GetRegOrZero(args.data);
diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h
new file mode 100644
index 00000000..dd49f19d
--- /dev/null
+++ b/interpreter/riscv64/fp_regs.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_FP_REGS_H_
+#define BERBERIS_FP_REGS_H_
+
+#include <cstring>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+
+namespace berberis {
+
+template <typename FloatType>
+inline FloatType NanUnboxFPRegToFloat(uint64_t arg);
+
+template <>
+inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) {
+ // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on
+ // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e.,
+ // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the
+ // input value, otherwise the input value is treated as an n-bit canonical NaN.
+ if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) {
+ return bit_cast<intrinsics::Float32>(0x7fc00000);
+ }
+ intrinsics::Float32 result;
+ memcpy(&result, &arg, sizeof(intrinsics::Float32));
+ return result;
+}
+
+template <>
+inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) {
+ return bit_cast<intrinsics::Float64>(arg);
+}
+
+template <typename FloatType>
+inline uint64_t NanBoxFloatToFPReg(FloatType arg);
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) {
+ return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000;
+}
+
+template <>
+inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) {
+ return bit_cast<uint64_t>(arg);
+}
+
+} // namespace berberis
+
+#endif // BERBERIS_FP_REGS_H_
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index f4e82fea..a8fcbf61 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -28,10 +28,11 @@
#include "berberis/decoder/riscv64/semantics_player.h"
#include "berberis/guest_state/guest_addr.h"
#include "berberis/guest_state/guest_state_riscv64.h"
-#include "berberis/intrinsics/riscv64/guest_fpstate.h"
+#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h"
#include "berberis/kernel_api/run_guest_syscall.h"
#include "atomics.h"
+#include "fp_regs.h"
namespace berberis {
@@ -42,6 +43,8 @@ class Interpreter {
using Decoder = Decoder<SemanticsPlayer<Interpreter>>;
using Register = uint64_t;
using FpRegister = uint64_t;
+ using Float32 = intrinsics::Float32;
+ using Float64 = intrinsics::Float64;
explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {}
@@ -352,6 +355,38 @@ class Interpreter {
return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
}
+ FpRegister OpFp(Decoder::OpFpOpcode opcode,
+ Decoder::FloatSize float_size,
+ uint8_t rm,
+ FpRegister arg1,
+ FpRegister arg2) {
+ switch (float_size) {
+ case Decoder::FloatSize::kFloat:
+ return NanBoxFloatToFPReg(OpFp<Float32>(
+ opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2)));
+ case Decoder::FloatSize::kDouble:
+ return NanBoxFloatToFPReg(OpFp<Float64>(
+ opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2)));
+ default:
+ Unimplemented();
+ return {};
+ }
+ }
+
+ // TODO(b/278812060): switch to intrinsics when they would become available and stop using
+ // ExecuteFloatOperation directly.
+ template <typename FloatType>
+ FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) {
+ switch (opcode) {
+ case Decoder::OpFpOpcode::kFAdd:
+ return intrinsics::ExecuteFloatOperation<FloatType>(
+ rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2);
+ default:
+ Unimplemented();
+ return {};
+ }
+ }
+
Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) {
switch (opcode) {
case Decoder::ShiftImmOpcode::kSlli:
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 742cf028..f72437f7 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -122,6 +122,17 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
+ void InterpretOpFp(uint32_t insn_bytes,
+ std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
+ for (auto [arg1, arg2, expected_result] : args) {
+ state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+ SetFReg<2>(state_.cpu, arg1);
+ SetFReg<3>(state_.cpu, arg2);
+ InterpretInsn(&state_);
+ EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
+ }
+ }
+
void InterpretFence(uint32_t insn_bytes) {
state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
InterpretInsn(&state_);
@@ -633,6 +644,17 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) {
InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
}
+TEST_F(Riscv64InterpreterTest, OpFpInstructions) {
+ // FAdd.S
+ InterpretOpFp(0x003100d3,
+ {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000,
+ bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}});
+ // FAdd.D
+ InterpretOpFp(0x023100d3,
+ {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}});
+}
+
TEST_F(Riscv64InterpreterTest, LoadInstructions) {
// Offset is always 8.
// Lbu
diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
index 30f83e55..694afba9 100644
--- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h
+++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h
@@ -64,23 +64,15 @@ class WrappedFloatType {
explicit constexpr operator uint32_t() const { return value_; }
explicit constexpr operator int64_t() const { return value_; }
explicit constexpr operator uint64_t() const { return value_; }
-
- auto BitCastToIntOfSameSize() {
- if constexpr (std::is_same_v<BaseType, float>) {
- return bit_cast<int32_t>(value_);
- } else {
- static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported.");
- return bit_cast<int64_t>(value_);
- }
- }
-
- // Only valid for BaseType==double. Returns the bit representation of the fp value.
explicit constexpr operator WrappedFloatType<float>() const {
return WrappedFloatType<float>(value_);
}
explicit constexpr operator WrappedFloatType<double>() const {
return WrappedFloatType<double>(value_);
}
+#if defined(__i386__) || defined(__x86_64__)
+ explicit constexpr operator long double() const { return value_; }
+#endif
// Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x
// produce different results which could be surprising. Use fneg instead of unary operator-.
friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2);
diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
new file mode 100644
index 00000000..db8ff249
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
+
+#include <limits>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"
+#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode
+#include "berberis/intrinsics/type_traits.h"
+
+namespace berberis::intrinsics {
+
+// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
+// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
+// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
+// this addition may overflow.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args)
+ -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+ using Wide = typename TypeTraits<FloatType>::Wide;
+ Wide wide_result = operation(Wide(args)...);
+ if constexpr (std::is_same_v<FloatType, Float32>) {
+ // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
+ // bits to ensure that we are getting ±∞ and not NaN.
+ auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
+ if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
+ return FloatType(wide_result);
+ }
+ int_result += 0x0000'0000'1000'0000;
+ int_result &= 0xffff'ffff'e000'0000;
+ wide_result = bit_cast<Wide>(int_result);
+ } else if constexpr (std::is_same_v<FloatType, Float64>) {
+ // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
+ // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
+ struct {
+ uint64_t significand;
+ uint16_t exponent;
+ uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
+ } fp80_parts;
+ static_assert(sizeof fp80_parts == sizeof(Wide));
+ memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
+ // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
+ if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
+ (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
+ return FloatType(wide_result);
+ }
+ fp80_parts.significand += 0x0000'0000'0000'0400;
+ fp80_parts.significand &= 0xffff'ffff'ffff'f800;
+ if (fp80_parts.significand == 0) {
+ fp80_parts.exponent++;
+ fp80_parts.significand = 0x8000'0000'0000'0000;
+ }
+ memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
+ }
+ return FloatType(wide_result);
+}
+
+// Note: first round of rm/frm verification must happen before that function because RISC-V
+// postulates that invalid rm or frm should trigger illegal instruction exception.
+// Here we can assume both rm and frm fields are valid.
+template <typename FloatType, typename OperationType, typename... Args>
+inline auto ExecuteFloatOperation(uint8_t requested_rm,
+ uint8_t current_rm,
+ OperationType operation,
+ Args... args)
+ -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> {
+ int host_requested_rm = ToHostRoundingMode(requested_rm);
+ int host_current_rm = ToHostRoundingMode(current_rm);
+ if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
+ uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
+ if (rm == FPFlags::RMM) {
+ return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+ }
+ return operation(args...);
+ }
+ ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
+ if (requested_rm == FPFlags::RMM) {
+ return ExecuteFloatOperationRmm<FloatType>(operation, args...);
+ }
+ return operation(args...);
+}
+
+} // namespace berberis::intrinsics
+
+#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h
index f1f6f75d..2785b92d 100644
--- a/intrinsics/include/berberis/intrinsics/type_traits.h
+++ b/intrinsics/include/berberis/intrinsics/type_traits.h
@@ -93,11 +93,17 @@ struct TypeTraits<int64_t> {
template <>
struct TypeTraits<intrinsics::Float32> {
using Int = int32_t;
+ using Wide = intrinsics::Float64;
};
template <>
struct TypeTraits<intrinsics::Float64> {
using Int = int64_t;
+ using Narrow = intrinsics::Float32;
+#if defined(__i386__) || defined(__x86_64__)
+ static_assert(sizeof(long double) > sizeof(intrinsics::Float64));
+ using Wide = long double;
+#endif
};
#if defined(__x86_64__)