diff options
author | Victor Khimenko <khim@google.com> | 2023-04-21 13:49:47 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-04-21 13:49:47 +0000 |
commit | 010d846fc2e2e7a56f37d847dd715aed6fd0dc15 (patch) | |
tree | 29adb3bff935efd7d9a82d4d734c5e135a8fbf83 | |
parent | 7d94c252e52a80b873217d6d3ec85ccd6a2a5a39 (diff) | |
parent | 61e4b31bad6107df519d81b8fe9435981215bb86 (diff) | |
download | binary_translation-010d846fc2e2e7a56f37d847dd715aed6fd0dc15.tar.gz |
Merge "interp: added FAdd instruction." am: 88e370a2bd am: 5ea1d0ca1e am: 2f02ab1892 am: 61e4b31bad
Original change: https://android-review.googlesource.com/c/platform/frameworks/libs/binary_translation/+/2543850
Change-Id: I21bce6de7388994b03430825d214d36c7f7616e4
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/decoder.h | 45 | ||||
-rw-r--r-- | decoder/include/berberis/decoder/riscv64/semantics_player.h | 7 | ||||
-rw-r--r-- | interpreter/riscv64/fp_regs.h | 64 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter.cc | 37 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 22 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/intrinsics_float.h | 14 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h | 101 | ||||
-rw-r--r-- | intrinsics/include/berberis/intrinsics/type_traits.h | 6 |
8 files changed, 284 insertions, 12 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index ec51fe37..57e79287 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -194,6 +194,17 @@ class Decoder { kMaxAmoOpcode = 0b11111'111, }; + enum class OpFpOpcode { + // Bit #2 = 1 means rm is an opcode extension. + // Bit #3 = 1 means rs2 is an opcode extension + // Bits #4, #1, and #0 - actual opcode. + kFAdd = 0b0'0'0'00, + kFSub = 0b0'0'0'01, + kFMul = 0b0'0'0'10, + kFDiv = 0b0'0'0'11, + kMaxOpFpOpcode = 0b1'1'1'11, + }; + enum class LoadOpcode { kLb = 0b000, kLh = 0b001, @@ -277,6 +288,14 @@ class Decoder { kMaxCsrRegister = 0b11'11'1111'1111, }; + enum class FloatSize { + kFloat = 0b00, + kDouble = 0b01, + kHalf = 0b10, + kQuad = 0b11, + kMaxFloatSize = 0b11, + }; + struct AmoArgs { AmoOpcode opcode; uint8_t dst; @@ -379,6 +398,15 @@ class Decoder { using StoreArgs = StoreArgsTemplate<StoreOpcode>; using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>; + struct OpFpArgs { + OpFpOpcode opcode; + FloatSize float_size; + uint8_t dst; + uint8_t src1; + uint8_t src2; + uint8_t rm; + }; + struct BranchArgs { BranchOpcode opcode; uint8_t src1; @@ -580,6 +608,9 @@ class Decoder { case BaseOpcode::kOpImm32: DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>(); break; + case BaseOpcode::kOpFp: + DecodeOpFp(); + break; case BaseOpcode::kStore: DecodeStore<StoreOpcode>(); break; @@ -824,6 +855,20 @@ class Decoder { insn_consumer_->JumpAndLink(args); } + void DecodeOpFp() { + uint8_t float_size = GetBits<uint8_t, 25, 2>(); + uint8_t opcode_bits = GetBits<uint8_t, 27, 5>(); + const OpFpArgs args = { + .opcode = OpFpOpcode(opcode_bits), + .float_size = FloatSize(float_size), + .dst = GetBits<uint8_t, 7, 5>(), + .src1 = GetBits<uint8_t, 15, 5>(), + .src2 = GetBits<uint8_t, 20, 5>(), + .rm = GetBits<uint8_t, 12, 3>(), + }; + insn_consumer_->OpFp(args); + } + void DecodeSystem() { uint8_t low_opcode = GetBits<uint8_t, 12, 2>(); if (low_opcode == 0b00) { diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index 40b559d3..e320b7ce 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -135,6 +135,13 @@ class SemanticsPlayer { SetRegOrIgnore(args.dst, result); }; + void OpFp(const typename Decoder::OpFpArgs& args) { + FpRegister arg1 = GetFpReg(args.src1); + FpRegister arg2 = GetFpReg(args.src2); + FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2); + SetFpReg(args.dst, result); + } + void Store(const typename Decoder::StoreArgs& args) { Register arg = GetRegOrZero(args.src); Register data = GetRegOrZero(args.data); diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h new file mode 100644 index 00000000..dd49f19d --- /dev/null +++ b/interpreter/riscv64/fp_regs.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_FP_REGS_H_ +#define BERBERIS_FP_REGS_H_ + +#include <cstring> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" + +namespace berberis { + +template <typename FloatType> +inline FloatType NanUnboxFPRegToFloat(uint64_t arg); + +template <> +inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) { + // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on + // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e., + // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the + // input value, otherwise the input value is treated as an n-bit canonical NaN. + if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) { + return bit_cast<intrinsics::Float32>(0x7fc00000); + } + intrinsics::Float32 result; + memcpy(&result, &arg, sizeof(intrinsics::Float32)); + return result; +} + +template <> +inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) { + return bit_cast<intrinsics::Float64>(arg); +} + +template <typename FloatType> +inline uint64_t NanBoxFloatToFPReg(FloatType arg); + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) { + return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000; +} + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) { + return bit_cast<uint64_t>(arg); +} + +} // namespace berberis + +#endif // BERBERIS_FP_REGS_H_ diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index f4e82fea..a8fcbf61 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -28,10 +28,11 @@ #include "berberis/decoder/riscv64/semantics_player.h" #include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state_riscv64.h" -#include "berberis/intrinsics/riscv64/guest_fpstate.h" +#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h" #include "berberis/kernel_api/run_guest_syscall.h" #include "atomics.h" +#include "fp_regs.h" namespace berberis { @@ -42,6 +43,8 @@ class Interpreter { using Decoder = Decoder<SemanticsPlayer<Interpreter>>; using Register = uint64_t; using FpRegister = uint64_t; + using Float32 = intrinsics::Float32; + using Float64 = intrinsics::Float64; explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {} @@ -352,6 +355,38 @@ class Interpreter { return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); } + FpRegister OpFp(Decoder::OpFpOpcode opcode, + Decoder::FloatSize float_size, + uint8_t rm, + FpRegister arg1, + FpRegister arg2) { + switch (float_size) { + case Decoder::FloatSize::kFloat: + return NanBoxFloatToFPReg(OpFp<Float32>( + opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2))); + case Decoder::FloatSize::kDouble: + return NanBoxFloatToFPReg(OpFp<Float64>( + opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2))); + default: + Unimplemented(); + return {}; + } + } + + // TODO(b/278812060): switch to intrinsics when they would become available and stop using + // ExecuteFloatOperation directly. + template <typename FloatType> + FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) { + switch (opcode) { + case Decoder::OpFpOpcode::kFAdd: + return intrinsics::ExecuteFloatOperation<FloatType>( + rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2); + default: + Unimplemented(); + return {}; + } + } + Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) { switch (opcode) { case Decoder::ShiftImmOpcode::kSlli: diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 742cf028..f72437f7 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -122,6 +122,17 @@ class Riscv64InterpreterTest : public ::testing::Test { } } + void InterpretOpFp(uint32_t insn_bytes, + std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { + for (auto [arg1, arg2, expected_result] : args) { + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + SetFReg<2>(state_.cpu, arg1); + SetFReg<3>(state_.cpu, arg2); + InterpretInsn(&state_); + EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); + } + } + void InterpretFence(uint32_t insn_bytes) { state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); InterpretInsn(&state_); @@ -633,6 +644,17 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) { InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}}); } +TEST_F(Riscv64InterpreterTest, OpFpInstructions) { + // FAdd.S + InterpretOpFp(0x003100d3, + {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}}); + // FAdd.D + InterpretOpFp(0x023100d3, + {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}}); +} + TEST_F(Riscv64InterpreterTest, LoadInstructions) { // Offset is always 8. // Lbu diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h index 30f83e55..694afba9 100644 --- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h +++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h @@ -64,23 +64,15 @@ class WrappedFloatType { explicit constexpr operator uint32_t() const { return value_; } explicit constexpr operator int64_t() const { return value_; } explicit constexpr operator uint64_t() const { return value_; } - - auto BitCastToIntOfSameSize() { - if constexpr (std::is_same_v<BaseType, float>) { - return bit_cast<int32_t>(value_); - } else { - static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported."); - return bit_cast<int64_t>(value_); - } - } - - // Only valid for BaseType==double. Returns the bit representation of the fp value. explicit constexpr operator WrappedFloatType<float>() const { return WrappedFloatType<float>(value_); } explicit constexpr operator WrappedFloatType<double>() const { return WrappedFloatType<double>(value_); } +#if defined(__i386__) || defined(__x86_64__) + explicit constexpr operator long double() const { return value_; } +#endif // Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x // produce different results which could be surprising. Use fneg instead of unary operator-. friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2); diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h new file mode 100644 index 00000000..db8ff249 --- /dev/null +++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ +#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ + +#include <limits> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" +#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode +#include "berberis/intrinsics/type_traits.h" + +namespace berberis::intrinsics { + +// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it +// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision +// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember +// this addition may overflow. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + using Wide = typename TypeTraits<FloatType>::Wide; + Wide wide_result = operation(Wide(args)...); + if constexpr (std::is_same_v<FloatType, Float32>) { + // In the 32bit->64bit case everything happens almost automatically, we just need to clear low + // bits to ensure that we are getting ±∞ and not NaN. + auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result); + if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) { + return FloatType(wide_result); + } + int_result += 0x0000'0000'1000'0000; + int_result &= 0xffff'ffff'e000'0000; + wide_result = bit_cast<Wide>(int_result); + } else if constexpr (std::is_same_v<FloatType, Float64>) { + // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not + // pseudo-infinity (supported on 8087/80287, but not on modern CPUs). + struct { + uint64_t significand; + uint16_t exponent; + uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)]; + } fp80_parts; + static_assert(sizeof fp80_parts == sizeof(Wide)); + memcpy(&fp80_parts, &wide_result, sizeof(wide_result)); + // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V). + if ((fp80_parts.exponent & 0x7fff) == 0x7fff || + (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) { + return FloatType(wide_result); + } + fp80_parts.significand += 0x0000'0000'0000'0400; + fp80_parts.significand &= 0xffff'ffff'ffff'f800; + if (fp80_parts.significand == 0) { + fp80_parts.exponent++; + fp80_parts.significand = 0x8000'0000'0000'0000; + } + memcpy(&wide_result, &fp80_parts, sizeof(wide_result)); + } + return FloatType(wide_result); +} + +// Note: first round of rm/frm verification must happen before that function because RISC-V +// postulates that invalid rm or frm should trigger illegal instruction exception. +// Here we can assume both rm and frm fields are valid. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperation(uint8_t requested_rm, + uint8_t current_rm, + OperationType operation, + Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + int host_requested_rm = ToHostRoundingMode(requested_rm); + int host_current_rm = ToHostRoundingMode(current_rm); + if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) { + uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm; + if (rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); + } + ScopedRoundingMode scoped_rounding_mode{host_requested_rm}; + if (requested_rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); +} + +} // namespace berberis::intrinsics + +#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h index f1f6f75d..2785b92d 100644 --- a/intrinsics/include/berberis/intrinsics/type_traits.h +++ b/intrinsics/include/berberis/intrinsics/type_traits.h @@ -93,11 +93,17 @@ struct TypeTraits<int64_t> { template <> struct TypeTraits<intrinsics::Float32> { using Int = int32_t; + using Wide = intrinsics::Float64; }; template <> struct TypeTraits<intrinsics::Float64> { using Int = int64_t; + using Narrow = intrinsics::Float32; +#if defined(__i386__) || defined(__x86_64__) + static_assert(sizeof(long double) > sizeof(intrinsics::Float64)); + using Wide = long double; +#endif }; #if defined(__x86_64__) |