diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-04-28 01:27:41 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-04-28 01:27:41 +0000 |
commit | ad1b04dca34f075c2d9b180eb4d4cba1242e2140 (patch) | |
tree | a076320347f30694dd7f815ea3c7ef0519645811 | |
parent | 61a7b2a21a273310747fea7bb339242e3042bab8 (diff) | |
parent | d95592f8a30c9de44a18a3e550638e6ecc845e9d (diff) | |
download | binary_translation-android14-qpr1-s2-release.tar.gz |
Snap for 10017868 from d95592f8a30c9de44a18a3e550638e6ecc845e9d to udc-qpr1-releaseandroid-14.0.0_r27android-14.0.0_r26android-14.0.0_r25android-14.0.0_r24android-14.0.0_r23android-14.0.0_r22android-14.0.0_r21android-14.0.0_r20android-14.0.0_r19android-14.0.0_r18android-14.0.0_r17android-14.0.0_r16android14-qpr1-s2-releaseandroid14-qpr1-release
Change-Id: I1ae7eaabaa22db1ff1cce6828792a52079c666f3
21 files changed, 1253 insertions, 20 deletions
diff --git a/base/Android.bp b/base/Android.bp index ea775e28..b07c5d40 100644 --- a/base/Android.bp +++ b/base/Android.bp @@ -48,6 +48,12 @@ cc_library_static { srcs: ["raw_syscall_x86_64.S"], }, }, + target: { + bionic: { + srcs: ["exec_region_elf_backed.cc"], + }, + }, + header_libs: ["libberberis_base_headers"], export_header_lib_headers: ["libberberis_base_headers"], } @@ -78,5 +84,10 @@ cc_test_library { "mmap_pool_test.cc", "pointer_and_counter_test.cc", ], + target: { + bionic: { + srcs: ["exec_region_elf_backed_test.cc"], + }, + }, header_libs: ["libberberis_base_headers"], } diff --git a/base/exec_region_elf_backed.cc b/base/exec_region_elf_backed.cc new file mode 100644 index 00000000..aa272c28 --- /dev/null +++ b/base/exec_region_elf_backed.cc @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "berberis/base/exec_region_elf_backed.h" + +#include <android/dlext.h> +#include <dlfcn.h> +#include <sys/mman.h> + +#include "berberis/base/bit_util.h" +#include "berberis/base/mmap.h" + +// Note that we have to use absolute path for ANDROID_DLEXT_FORCE_LOAD to work correctly +// otherwise searching by soname will trigger and the flag will have no effect. +#if defined(__LP64__) +const constexpr char* kExecRegionLibraryPath = "/system/lib64/libberberis_exec_region.so"; +#else +const constexpr char* kExecRegionLibraryPath = "/system/lib/libberberis_exec_region.so"; +#endif + +const constexpr char* kRegionStartSymbolName = "exec_region_start"; +const constexpr char* kRegionEndSymbolName = "exec_region_end"; + +namespace berberis { + +ExecRegion ExecRegionElfBackedFactory::Create(size_t size) { + size = AlignUpPageSize(size); + + android_dlextinfo dlextinfo{.flags = ANDROID_DLEXT_FORCE_LOAD}; + void* handle = android_dlopen_ext(kExecRegionLibraryPath, RTLD_NOW, &dlextinfo); + if (handle == nullptr) { + FATAL("Couldn't load \"%s\": %s", kExecRegionLibraryPath, dlerror()); + } + void* region_start = dlsym(handle, kRegionStartSymbolName); + CHECK(region_start != nullptr); + auto region_start_addr = bit_cast<uintptr_t>(region_start); + CHECK(region_start_addr % kPageSize == 0); + + void* region_end = dlsym(handle, kRegionEndSymbolName); + CHECK(region_end != nullptr); + auto region_end_addr = bit_cast<uintptr_t>(region_end); + CHECK(region_end_addr % kPageSize == 0); + size_t region_size = region_end_addr - region_start_addr; + CHECK_GE(region_size, size); + + return ExecRegion{ + static_cast<uint8_t*>(MmapImplOrDie({.addr = region_start, + .size = region_size, + .prot = PROT_READ | PROT_WRITE | PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS})), + region_size}; +} + +} // namespace berberis diff --git a/base/exec_region_elf_backed_test.cc b/base/exec_region_elf_backed_test.cc new file mode 100644 index 00000000..3eeb4570 --- /dev/null +++ b/base/exec_region_elf_backed_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +#include "berberis/base/exec_region_elf_backed.h" + +#include <dlfcn.h> + +namespace berberis { + +namespace { + +TEST(ExecRegionElfBacked, Smoke) { + const char buf[] = "deadbeef"; + + ExecRegion exec = ExecRegionElfBackedFactory::Create(sizeof(buf)); + const uint8_t* code = exec.begin(); + ASSERT_NE(nullptr, code); + + exec.Write(code, buf, sizeof(buf)); + ASSERT_EQ('f', code[7]); + + exec.Detach(); + ASSERT_EQ('f', code[7]); + + exec.Free(); +} + +TEST(ExecRegionElfBacked, PltIsExecutable_b_254823538) { + // DlClose calls .plt section for __cxa_finalize + // This test makes sure it is called without incidents + // http://b/254823538 + void* handle = dlopen("libberberis_exec_region.so", RTLD_NOW); + EXPECT_NE(handle, nullptr) << dlerror(); + dlclose(handle); +} + +TEST(ExecRegionElfBacked, TwoRegionsHaveDifferentAddresses) { + auto region1 = ExecRegionElfBackedFactory::Create(1); + auto region2 = ExecRegionElfBackedFactory::Create(1); + EXPECT_NE(region1.begin(), region2.begin()); + region1.Free(); + region2.Free(); +} + +TEST(ExecRegionElfBacked, RegionOfDifferentSizes) { + auto region = ExecRegionElfBackedFactory::Create(ExecRegionElfBackedFactory::kExecRegionSize); + region.Free(); + // Anything bigger should result it CHECK fail. + EXPECT_DEATH( + (void)ExecRegionElfBackedFactory::Create(ExecRegionElfBackedFactory::kExecRegionSize + 1), + ""); +} + +} // namespace + +} // namespace berberis diff --git a/base/include/berberis/base/dependent_false.h b/base/include/berberis/base/dependent_false.h new file mode 100644 index 00000000..f01a48e0 --- /dev/null +++ b/base/include/berberis/base/dependent_false.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_BASE_DEPENDENT_FALSE_H_ +#define BERBERIS_BASE_DEPENDENT_FALSE_H_ + +#include <type_traits> + +namespace berberis { + +template <typename T> +inline constexpr bool kDependentTypeFalse = false; + +template <auto T> +inline constexpr bool kDependentValueFalse = false; + +} // namespace berberis + +#endif // BERBERIS_BASE_DEPENDENT_FALSE_H_ diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index de0b9d44..9f785732 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -112,6 +112,20 @@ class Decoder { kMaxCompressedOpcode = 0b111'11, }; + enum class CsrOpcode { + kCsrrw = 0b01, + kCsrrs = 0b10, + kCsrrc = 0b11, + kMaxCsrOpcode = 0b11, + }; + + enum class CsrImmOpcode { + kCsrrwi = 0b01, + kCsrrsi = 0b10, + kCsrrci = 0b11, + kMaxCsrOpcode = 0b11, + }; + enum class FenceOpcode { kFence = 0b0000, kFenceTso = 0b1000, @@ -180,6 +194,17 @@ class Decoder { kMaxAmoOpcode = 0b11111'111, }; + enum class OpFpOpcode { + // Bit #2 = 1 means rm is an opcode extension. + // Bit #3 = 1 means rs2 is an opcode extension + // Bits #4, #1, and #0 - actual opcode. + kFAdd = 0b0'0'0'00, + kFSub = 0b0'0'0'01, + kFMul = 0b0'0'0'10, + kFDiv = 0b0'0'0'11, + kMaxOpFpOpcode = 0b1'1'1'11, + }; + enum class LoadOpcode { kLb = 0b000, kLh = 0b001, @@ -256,6 +281,21 @@ class Decoder { kMaxBranchOpcode = 0b111, }; + enum class CsrRegister { + kFFlags = 0b00'00'0000'0001, + kFrm = 0b00'00'0000'0010, + kFCsr = 0b00'00'0000'0011, + kMaxCsrRegister = 0b11'11'1111'1111, + }; + + enum class FloatSize { + kFloat = 0b00, + kDouble = 0b01, + kHalf = 0b10, + kQuad = 0b11, + kMaxFloatSize = 0b11, + }; + struct AmoArgs { AmoOpcode opcode; uint8_t dst; @@ -265,6 +305,20 @@ class Decoder { bool aq : 1; }; + struct CsrArgs { + CsrOpcode opcode; + uint8_t dst; + uint8_t src; + CsrRegister csr; + }; + + struct CsrImmArgs { + CsrImmOpcode opcode; + uint8_t dst; + uint8_t imm; + CsrRegister csr; + }; + struct FenceArgs { FenceOpcode opcode; uint8_t dst; @@ -344,6 +398,15 @@ class Decoder { using StoreArgs = StoreArgsTemplate<StoreOpcode>; using StoreFpArgs = StoreArgsTemplate<StoreFpOpcode>; + struct OpFpArgs { + OpFpOpcode opcode; + FloatSize float_size; + uint8_t dst; + uint8_t src1; + uint8_t src2; + uint8_t rm; + }; + struct BranchArgs { BranchOpcode opcode; uint8_t src1; @@ -394,12 +457,60 @@ class Decoder { case CompressedOpcode::kAddi: DecodeCAddi(); break; + case CompressedOpcode::kFld: + DecodeCompressedLoadStore<LoadFpOpcode::kFld>(); + break; + case CompressedOpcode::kLw: + DecodeCompressedLoadStore<LoadOpcode::kLw>(); + break; + case CompressedOpcode::kLd: + DecodeCompressedLoadStore<LoadOpcode::kLd>(); + break; + case CompressedOpcode::kFsd: + DecodeCompressedLoadStore<StoreFpOpcode::kFsd>(); + break; + case CompressedOpcode::kSd: + DecodeCompressedLoadStore<StoreOpcode::kSd>(); + break; default: insn_consumer_->Unimplemented(); } return 2; } + template <auto opcode> + void DecodeCompressedLoadStore() { + uint8_t low_imm = GetBits<uint8_t, 5, 2>(); + uint8_t high_imm = GetBits<uint8_t, 10, 3>(); + uint8_t imm; + if constexpr ((uint8_t(opcode) & 1) == 0) { + constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44}; + imm = (kLwLow[low_imm] | high_imm << 3); + } else { + imm = (low_imm << 6 | high_imm << 3); + } + uint8_t rd = GetBits<uint8_t, 2, 3>(); + uint8_t rs = GetBits<uint8_t, 7, 3>(); + if constexpr (std::is_same_v<decltype(opcode), StoreOpcode> || + std::is_same_v<decltype(opcode), StoreFpOpcode>) { + const StoreArgsTemplate<decltype(opcode)> args = { + .opcode = opcode, + .src = uint8_t(8 + rs), + .offset = imm, + .data = uint8_t(8 + rd), + }; + insn_consumer_->Store(args); + } else { + const LoadArgsTemplate<decltype(opcode)> args = { + .opcode = opcode, + .dst = uint8_t(8 + rd), + .src = uint8_t(8 + rs), + .offset = imm, + }; + insn_consumer_->Load(args); + } + } + void DecodeCAddi() { uint8_t low_imm = GetBits<uint8_t, 2, 5>(); uint8_t high_imm = GetBits<uint8_t, 12, 1>(); @@ -488,6 +599,9 @@ class Decoder { case BaseOpcode::kOpImm32: DecodeOp<OpImm32Opcode, ShiftImm32Opcode, 5>(); break; + case BaseOpcode::kOpFp: + DecodeOpFp(); + break; case BaseOpcode::kStore: DecodeStore<StoreOpcode>(); break; @@ -732,12 +846,47 @@ class Decoder { insn_consumer_->JumpAndLink(args); } + void DecodeOpFp() { + uint8_t float_size = GetBits<uint8_t, 25, 2>(); + uint8_t opcode_bits = GetBits<uint8_t, 27, 5>(); + const OpFpArgs args = { + .opcode = OpFpOpcode(opcode_bits), + .float_size = FloatSize(float_size), + .dst = GetBits<uint8_t, 7, 5>(), + .src1 = GetBits<uint8_t, 15, 5>(), + .src2 = GetBits<uint8_t, 20, 5>(), + .rm = GetBits<uint8_t, 12, 3>(), + }; + insn_consumer_->OpFp(args); + } + void DecodeSystem() { - int32_t opcode = GetBits<uint32_t, 7, 25>(); - const SystemArgs args = { - .opcode = SystemOpcode(opcode), + uint8_t low_opcode = GetBits<uint8_t, 12, 2>(); + if (low_opcode == 0b00) { + int32_t opcode = GetBits<uint32_t, 7, 25>(); + const SystemArgs args = { + .opcode = SystemOpcode(opcode), + }; + return insn_consumer_->System(args); + } + if (GetBits<uint8_t, 14, 1>()) { + CsrImmOpcode opcode = CsrImmOpcode(low_opcode); + const CsrImmArgs args = { + .opcode = opcode, + .dst = GetBits<uint8_t, 7, 5>(), + .imm = GetBits<uint8_t, 15, 5>(), + .csr = CsrRegister(GetBits<uint16_t, 20, 12>()), + }; + return insn_consumer_->Csr(args); + } + CsrOpcode opcode = CsrOpcode(low_opcode); + const CsrArgs args = { + .opcode = opcode, + .dst = GetBits<uint8_t, 7, 5>(), + .src = GetBits<uint8_t, 15, 5>(), + .csr = CsrRegister(GetBits<uint16_t, 20, 12>()), }; - insn_consumer_->System(args); + return insn_consumer_->Csr(args); } void DecodeJumpAndLinkRegister() { diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index 683b6dea..e320b7ce 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -35,6 +35,19 @@ class SemanticsPlayer { // Decoder's InsnConsumer implementation. + void Csr(const typename Decoder::CsrArgs& args) { + Register result; + Register arg = GetRegOrZero(args.src); + result = listener_->Csr(args.opcode, arg, args.csr); + SetRegOrIgnore(args.dst, result); + } + + void Csr(const typename Decoder::CsrImmArgs& args) { + Register result; + result = listener_->Csr(args.opcode, args.imm, args.csr); + SetRegOrIgnore(args.dst, result); + } + void Fence(const typename Decoder::FenceArgs& args) { listener_->Fence(args.opcode, args.src, @@ -122,6 +135,13 @@ class SemanticsPlayer { SetRegOrIgnore(args.dst, result); }; + void OpFp(const typename Decoder::OpFpArgs& args) { + FpRegister arg1 = GetFpReg(args.src1); + FpRegister arg2 = GetFpReg(args.src2); + FpRegister result = listener_->OpFp(args.opcode, args.float_size, args.rm, arg1, arg2); + SetFpReg(args.dst, result); + } + void Store(const typename Decoder::StoreArgs& args) { Register arg = GetRegOrZero(args.src); Register data = GetRegOrZero(args.data); diff --git a/exec_region/Android.bp b/exec_region/Android.bp new file mode 100644 index 00000000..d88326e0 --- /dev/null +++ b/exec_region/Android.bp @@ -0,0 +1,27 @@ +// Copyright (C) 2023 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package { + default_applicable_licenses: [ + "Android-Apache-2.0", + ], +} + +cc_library_shared { + name: "libberberis_exec_region", + defaults: ["berberis_defaults"], + srcs: ["exec_region.cc"], + linker_scripts: ["sections.ld"], +} diff --git a/exec_region/exec_region.cc b/exec_region/exec_region.cc new file mode 100644 index 00000000..2c874b48 --- /dev/null +++ b/exec_region/exec_region.cc @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/exec_region/sections.ld b/exec_region/sections.ld new file mode 100644 index 00000000..097003c2 --- /dev/null +++ b/exec_region/sections.ld @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +SECTIONS { + . = SIZEOF_HEADERS; + .text : { + *(.text.*) + . = ALIGN(4096); + exec_region_start = .; + . += (512 * 1024); + . = ALIGN(4096); + exec_region_end = .; + } + .plt : { + *(.plt) + } + /* Align following segments on the page boundary to prevent + * next PT_LOAD segment from mapping over .plt section removing + * executable flag from .plt. See also http://b/254823538. + */ + . = ALIGN(4096); + .fini_array : { + *(.fini_array.*) + } + .init_array : { + *(.ini_array.*) + } + .dynamic : { + *(.dynamic) + } + .got : { + *(.got) + } + .got.plt : { + *(.got.plt) + } + /* Align the rest of segments on the page boundary to prevent + * GNU_RELRO segment from mprotecting writable flag away + * from them. See also http://b/261807330. + */ + . = ALIGN(4096); +} diff --git a/guest_state/include/berberis/guest_state/guest_state_riscv64.h b/guest_state/include/berberis/guest_state/guest_state_riscv64.h index ff6a1b40..82aad665 100644 --- a/guest_state/include/berberis/guest_state/guest_state_riscv64.h +++ b/guest_state/include/berberis/guest_state/guest_state_riscv64.h @@ -19,6 +19,7 @@ #include <cstdint> +#include "berberis/base/dependent_false.h" #include "berberis/base/macros.h" #include "berberis/guest_state/guest_addr.h" @@ -30,6 +31,21 @@ struct CPUState { // f0 to f31. We are using uint64_t because C++ may change values of NaN when they are passed from // or to function and RISC-V uses NaN-boxing which would make things problematic. uint64_t f[32]; + // RISC-V has five rounding modes, while x86-64 has only four. + // + // Extra rounding mode (RMM in RISC-V documentation) is emulated but requires the use of + // FE_TOWARDZERO mode for correct work. + // + // Additionally RISC-V implementation is supposed to support three “illegal” rounding modes and + // when they are selected all instructions which use rounding mode trigger “undefined instruction” + // exception. + // + // For simplicity we always keep full rounding mode (3 bits) in the frm field and set host + // rounding mode to appropriate one. + // + // Exceptions, on the other hand, couldn't be stored here efficiently, instead we rely on the fact + // that x86-64 implements all five exceptions that RISC-V needs (and more). + uint8_t frm : 3; GuestAddr insn_addr; }; @@ -59,6 +75,33 @@ inline void SetFReg(CPUState& state, uint64_t val) { state.f[kIndex] = val; } +enum class RegisterType { + kReg, + kFpReg, +}; + +template <RegisterType register_type, uint8_t kIndex> +inline auto GetReg(const CPUState& state) { + if constexpr (register_type == RegisterType::kReg) { + return GetXReg<kIndex>(state); + } else if constexpr (register_type == RegisterType::kFpReg) { + return GetFReg<kIndex>(state); + } else { + static_assert(kDependentValueFalse<register_type>, "Unsupported register type"); + } +} + +template <RegisterType register_type, uint8_t kIndex, typename Register> +inline auto SetReg(CPUState& state, Register val) { + if constexpr (register_type == RegisterType::kReg) { + return SetXReg<kIndex>(state, val); + } else if constexpr (register_type == RegisterType::kFpReg) { + return SetFReg<kIndex>(state, val); + } else { + static_assert(kDependentValueFalse<register_type>, "Unsupported register type"); + } +} + struct ThreadState { CPUState cpu; }; diff --git a/interpreter/Android.bp b/interpreter/Android.bp index 32ac6435..37a87df3 100644 --- a/interpreter/Android.bp +++ b/interpreter/Android.bp @@ -33,6 +33,7 @@ cc_library_static { "libberberis_decoder_riscv64_headers", "libberberis_guest_state_headers", "libberberis_interpreter_riscv64_headers", + "libberberis_intrinsics_headers", "libberberis_kernel_api_headers", ], export_header_lib_headers: ["libberberis_interpreter_riscv64_headers"], @@ -47,6 +48,7 @@ cc_test_library { "libberberis_base_headers", "libberberis_guest_state_headers", "libberberis_interpreter_riscv64_headers", + "libberberis_intrinsics_headers", "libberberis_kernel_api_headers", ], } diff --git a/interpreter/riscv64/fp_regs.h b/interpreter/riscv64/fp_regs.h new file mode 100644 index 00000000..dd49f19d --- /dev/null +++ b/interpreter/riscv64/fp_regs.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_FP_REGS_H_ +#define BERBERIS_FP_REGS_H_ + +#include <cstring> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" + +namespace berberis { + +template <typename FloatType> +inline FloatType NanUnboxFPRegToFloat(uint64_t arg); + +template <> +inline intrinsics::Float32 NanUnboxFPRegToFloat(uint64_t arg) { + // Apart from transfer operations (e.g. loads and stores), all other floating-point operations on + // narrower n-bit operations, n < FLEN, check if the input operands are correctly NaN-boxed, i.e., + // all upper FLEN−n bits are 1. If so, the n least-significant bits of the input are used as the + // input value, otherwise the input value is treated as an n-bit canonical NaN. + if ((arg & 0xffff'ffff'0000'0000) != 0xffff'ffff'0000'0000) { + return bit_cast<intrinsics::Float32>(0x7fc00000); + } + intrinsics::Float32 result; + memcpy(&result, &arg, sizeof(intrinsics::Float32)); + return result; +} + +template <> +inline intrinsics::Float64 NanUnboxFPRegToFloat(uint64_t arg) { + return bit_cast<intrinsics::Float64>(arg); +} + +template <typename FloatType> +inline uint64_t NanBoxFloatToFPReg(FloatType arg); + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float32 arg) { + return bit_cast<uint32_t>(arg) | 0xffff'ffff'0000'0000; +} + +template <> +inline uint64_t NanBoxFloatToFPReg(intrinsics::Float64 arg) { + return bit_cast<uint64_t>(arg); +} + +} // namespace berberis + +#endif // BERBERIS_FP_REGS_H_ diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc index 63b55f15..a8fcbf61 100644 --- a/interpreter/riscv64/interpreter.cc +++ b/interpreter/riscv64/interpreter.cc @@ -16,6 +16,7 @@ #include "berberis/interpreter/riscv64/interpreter.h" +#include <cfenv> #include <cstdint> #include <cstring> @@ -27,9 +28,11 @@ #include "berberis/decoder/riscv64/semantics_player.h" #include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state_riscv64.h" +#include "berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h" #include "berberis/kernel_api/run_guest_syscall.h" #include "atomics.h" +#include "fp_regs.h" namespace berberis { @@ -40,6 +43,8 @@ class Interpreter { using Decoder = Decoder<SemanticsPlayer<Interpreter>>; using Register = uint64_t; using FpRegister = uint64_t; + using Float32 = intrinsics::Float32; + using Float64 = intrinsics::Float64; explicit Interpreter(ThreadState* state) : state_(state), branch_taken_(false) {} @@ -47,6 +52,47 @@ class Interpreter { // Instruction implementations. // + Register Csr(Decoder::CsrOpcode opcode, Register arg, Decoder::CsrRegister csr) { + Register (*UpdateStatus)(Register arg, Register original_csr_value); + switch (opcode) { + case Decoder::CsrOpcode::kCsrrw: + UpdateStatus = [](Register arg, Register /*original_csr_value*/) { return arg; }; + break; + case Decoder::CsrOpcode::kCsrrs: + UpdateStatus = [](Register arg, Register original_csr_value) { + return arg | original_csr_value; + }; + break; + case Decoder::CsrOpcode::kCsrrc: + UpdateStatus = [](Register arg, Register original_csr_value) { + return ~arg & original_csr_value; + }; + break; + default: + Unimplemented(); + return {}; + } + Register result; + switch (csr) { + case Decoder::CsrRegister::kFrm: + result = state_->cpu.frm; + arg = UpdateStatus(arg, result); + state_->cpu.frm = arg; + if (arg <= FPFlags::RM_MAX) { + std::fesetround(intrinsics::ToHostRoundingMode(arg)); + } + break; + default: + Unimplemented(); + return {}; + } + return result; + } + + Register Csr(Decoder::CsrImmOpcode opcode, uint8_t imm, Decoder::CsrRegister csr) { + return Csr(Decoder::CsrOpcode(opcode), imm, csr); + } + // Note: we prefer not to use C11/C++ atomic_thread_fence or even gcc/clang builtin // __atomic_thread_fence because all these function rely on the fact that compiler never uses // non-temporal loads and stores and only issue “mfence” when sequentially consistent ordering is @@ -309,6 +355,38 @@ class Interpreter { return RunGuestSyscall(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); } + FpRegister OpFp(Decoder::OpFpOpcode opcode, + Decoder::FloatSize float_size, + uint8_t rm, + FpRegister arg1, + FpRegister arg2) { + switch (float_size) { + case Decoder::FloatSize::kFloat: + return NanBoxFloatToFPReg(OpFp<Float32>( + opcode, rm, NanUnboxFPRegToFloat<Float32>(arg1), NanUnboxFPRegToFloat<Float32>(arg2))); + case Decoder::FloatSize::kDouble: + return NanBoxFloatToFPReg(OpFp<Float64>( + opcode, rm, NanUnboxFPRegToFloat<Float64>(arg1), NanUnboxFPRegToFloat<Float64>(arg2))); + default: + Unimplemented(); + return {}; + } + } + + // TODO(b/278812060): switch to intrinsics when they would become available and stop using + // ExecuteFloatOperation directly. + template <typename FloatType> + FloatType OpFp(Decoder::OpFpOpcode opcode, uint8_t rm, FloatType arg1, FloatType arg2) { + switch (opcode) { + case Decoder::OpFpOpcode::kFAdd: + return intrinsics::ExecuteFloatOperation<FloatType>( + rm, state_->cpu.frm, [](auto x, auto y) { return x + y; }, arg1, arg2); + default: + Unimplemented(); + return {}; + } + } + Register ShiftImm(Decoder::ShiftImmOpcode opcode, Register arg, uint16_t imm) { switch (opcode) { case Decoder::ShiftImmOpcode::kSlli: diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index b82a2dfa..b6edfc4f 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -27,6 +27,7 @@ #include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state_riscv64.h" #include "berberis/interpreter/riscv64/interpreter.h" +#include "berberis/intrinsics/guest_fpstate.h" namespace berberis { @@ -34,6 +35,26 @@ namespace { class Riscv64InterpreterTest : public ::testing::Test { public: + template <RegisterType register_type, uint64_t expected_result> + void InterpretCompressedStore(uint16_t insn_bytes, uint64_t offset) { + auto code_start = ToGuestAddr(&insn_bytes); + state_.cpu.insn_addr = code_start; + store_area_ = 0; + SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset)); + SetReg<register_type, 9>(state_.cpu, kDataToLoad); + InterpretInsn(&state_); + EXPECT_EQ(store_area_, expected_result); + } + + template <RegisterType register_type, uint64_t expected_result> + void InterpretCompressedLoad(uint16_t insn_bytes, uint64_t offset) { + auto code_start = ToGuestAddr(&insn_bytes); + state_.cpu.insn_addr = code_start; + SetXReg<8>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset)); + InterpretInsn(&state_); + EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result); + } + void InterpretCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) { auto code_start = ToGuestAddr(&insn_bytes); state_.cpu.insn_addr = code_start; @@ -57,6 +78,15 @@ class Riscv64InterpreterTest : public ::testing::Test { EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset); } + void InterpretCsr(uint32_t insn_bytes, uint8_t expected_rm) { + auto code_start = ToGuestAddr(&insn_bytes); + state_.cpu.insn_addr = code_start; + state_.cpu.frm = 0b001u; + InterpretInsn(&state_); + EXPECT_EQ(GetXReg<2>(state_.cpu), 0b001u); + EXPECT_EQ(state_.cpu.frm, expected_rm); + } + void InterpretOp(uint32_t insn_bytes, std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { for (auto [arg1, arg2, expected_result] : args) { @@ -68,6 +98,17 @@ class Riscv64InterpreterTest : public ::testing::Test { } } + void InterpretOpFp(uint32_t insn_bytes, + std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { + for (auto [arg1, arg2, expected_result] : args) { + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + SetFReg<2>(state_.cpu, arg1); + SetFReg<3>(state_.cpu, arg2); + InterpretInsn(&state_); + EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); + } + } + void InterpretFence(uint32_t insn_bytes) { state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); InterpretInsn(&state_); @@ -190,6 +231,95 @@ class Riscv64InterpreterTest : public ::testing::Test { ThreadState state_; }; +TEST_F(Riscv64InterpreterTest, CLw) { + union { + uint16_t offset; + struct { + uint8_t : 2; + uint8_t i2 : 1; + uint8_t i3_i5 : 3; + uint8_t i6 : 1; + } i_bits; + }; + for (offset = uint8_t{0}; offset < uint8_t{128}; offset += 4) { + union { + int16_t parcel; + struct { + uint8_t low_opcode : 2; + uint8_t rd : 3; + uint8_t i6 : 1; + uint8_t i2 : 1; + uint8_t rs : 3; + uint8_t i3_i5 : 3; + uint8_t high_opcode : 3; + } __attribute__((__packed__)); + } o_bits = { + .low_opcode = 0b00, + .rd = 1, + .i6 = i_bits.i6, + .i2 = i_bits.i2, + .rs = 0, + .i3_i5 = i_bits.i3_i5, + .high_opcode = 0b010, + }; + InterpretCompressedLoad<RegisterType::kReg, + static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad))>(o_bits.parcel, + offset); + } +} + +template <uint16_t opcode, auto execute_instruction_func> +void TestCompressedLoadOrStore(Riscv64InterpreterTest* that) { + union { + uint16_t offset; + struct { + uint8_t : 3; + uint8_t i3_i5 : 3; + uint8_t i6_i7 : 2; + } i_bits; + }; + for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) { + union { + int16_t parcel; + struct [[gnu::packed]] { + uint8_t low_opcode : 2; + uint8_t rd : 3; + uint8_t i6_i7 : 2; + uint8_t rs : 3; + uint8_t i3_i5 : 3; + uint8_t high_opcode : 3; + }; + } o_bits = { + .low_opcode = 0b00, + .rd = 1, + .i6_i7 = i_bits.i6_i7, + .rs = 0, + .i3_i5 = i_bits.i3_i5, + .high_opcode = 0b000, + }; + (that->*execute_instruction_func)(o_bits.parcel | opcode, offset); + } +} + +TEST_F(Riscv64InterpreterTest, CompressedLoadAndStores) { + // c.Fld + TestCompressedLoadOrStore< + 0b001'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kFpReg, kDataToLoad>>(this); + // c.Ld + TestCompressedLoadOrStore< + 0b011'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedLoad<RegisterType::kReg, kDataToLoad>>(this); + // c.Fsd + TestCompressedLoadOrStore< + 0b101'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kFpReg, kDataToLoad>>(this); + // c.Sd + TestCompressedLoadOrStore< + 0b111'000'000'00'000'00, + &Riscv64InterpreterTest::InterpretCompressedStore<RegisterType::kReg, kDataToLoad>>(this); +} + TEST_F(Riscv64InterpreterTest, CAddi) { union { int8_t offset; @@ -322,6 +452,16 @@ TEST_F(Riscv64InterpreterTest, CJ) { } } +TEST_F(Riscv64InterpreterTest, CsrInstrctuion) { + ScopedRoundingMode scoped_rounding_mode; + // Csrrw x2, frm, 2 + InterpretCsr(0x00215173, 2); + // Csrrsi x2, frm, 2 + InterpretCsr(0x00216173, 3); + // Csrrci x2, frm, 1 + InterpretCsr(0x0020f173, 0); +} + TEST_F(Riscv64InterpreterTest, FenceInstructions) { // Fence InterpretFence(0x0ff0000f); @@ -490,6 +630,231 @@ TEST_F(Riscv64InterpreterTest, OpImm32Instructions) { InterpretOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}}); } +TEST_F(Riscv64InterpreterTest, OpFpInstructions) { + // FAdd.S + InterpretOpFp(0x003100d3, + {{bit_cast<uint32_t>(1.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(2.0f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(3.0f) | 0xffff'ffff'0000'0000}}); + // FAdd.D + InterpretOpFp(0x023100d3, + {{bit_cast<uint64_t>(1.0), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0)}}); +} + +TEST_F(Riscv64InterpreterTest, RoundingModeTest) { + // FAdd.S + InterpretOpFp(0x003100d3, + // Test RNE + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003110d3, + // Test RTZ + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003120d3, + // Test RDN + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003130d3, + // Test RUP + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}}); + // FAdd.S + InterpretOpFp(0x003140d3, + // Test RMM + {{bit_cast<uint32_t>(1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(1.0000005f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000001f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000002f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000}, + {bit_cast<uint32_t>(-1.0000004f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-0.000000059604645f) | 0xffff'ffff'0000'0000, + bit_cast<uint32_t>(-1.0000005f) | 0xffff'ffff'0000'0000}}); + + // FAdd.D + InterpretOpFp(0x023100d3, + // Test RNE + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); + // FAdd.D + InterpretOpFp(0x023110d3, + // Test RTZ + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000002)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000002)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}}); + // FAdd.D + InterpretOpFp(0x023120d3, + // Test RDN + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000002)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); + // FAdd.D + InterpretOpFp(0x023130d3, + // Test RUP + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000002)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}}); + // FAdd.D + InterpretOpFp(0x023140d3, + // Test RMM + {{bit_cast<uint64_t>(1.0000000000000002), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000004)}, + {bit_cast<uint64_t>(1.0000000000000004), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000007)}, + {bit_cast<uint64_t>(1.0000000000000007), + bit_cast<uint64_t>(0.00000000000000011102230246251565), + bit_cast<uint64_t>(1.0000000000000009)}, + {bit_cast<uint64_t>(-1.0000000000000002), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000004)}, + {bit_cast<uint64_t>(-1.0000000000000004), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000007)}, + {bit_cast<uint64_t>(-1.0000000000000007), + bit_cast<uint64_t>(-0.00000000000000011102230246251565), + bit_cast<uint64_t>(-1.0000000000000009)}}); +} + TEST_F(Riscv64InterpreterTest, LoadInstructions) { // Offset is always 8. // Lbu diff --git a/intrinsics/Android.bp b/intrinsics/Android.bp index 44220fe0..9e6a6cc0 100644 --- a/intrinsics/Android.bp +++ b/intrinsics/Android.bp @@ -17,6 +17,13 @@ package { default_applicable_licenses: ["Android-Apache-2.0"], } +cc_library_headers { + name: "libberberis_intrinsics_headers", + defaults: ["berberis_defaults"], + host_supported: true, + export_include_dirs: ["include"], +} + cc_library_static { name: "libberberis_intrinsics", defaults: ["berberis_defaults"], diff --git a/intrinsics/include/berberis/intrinsics/guest_fpstate.h b/intrinsics/include/berberis/intrinsics/guest_fpstate.h index da07e470..23cce1be 100644 --- a/intrinsics/include/berberis/intrinsics/guest_fpstate.h +++ b/intrinsics/include/berberis/intrinsics/guest_fpstate.h @@ -21,8 +21,8 @@ // portion that is reflected in hosts' fp-environment. // TODO(levarum): Rename file to reflect this. -#include <fenv.h> // FE_TONEAREST and friends. -#include <stdint.h> +#include <cfenv> // FE_TONEAREST and friends. +#include <cstdint> namespace berberis { @@ -39,6 +39,16 @@ static_assert(FE_TIESAWAY != FE_UPWARD); static_assert(FE_TIESAWAY != FE_DOWNWARD); static_assert(FE_TIESAWAY != FE_TOWARDZERO); +class ScopedRoundingMode { + public: + ScopedRoundingMode() : saved_round_mode(std::fegetround()) {} + ScopedRoundingMode(int rm) : saved_round_mode(std::fegetround()) { std::fesetround(rm); } + ~ScopedRoundingMode() { std::fesetround(saved_round_mode); } + + private: + int saved_round_mode; +}; + } // namespace berberis #endif // BERBERIS_INTRINSICS_GUEST_FPSTATE_H_ diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/intrinsics_float.h index 30f83e55..694afba9 100644 --- a/intrinsics/include/berberis/intrinsics/intrinsics_float.h +++ b/intrinsics/include/berberis/intrinsics/intrinsics_float.h @@ -64,23 +64,15 @@ class WrappedFloatType { explicit constexpr operator uint32_t() const { return value_; } explicit constexpr operator int64_t() const { return value_; } explicit constexpr operator uint64_t() const { return value_; } - - auto BitCastToIntOfSameSize() { - if constexpr (std::is_same_v<BaseType, float>) { - return bit_cast<int32_t>(value_); - } else { - static_assert(std::is_same_v<BaseType, double>, "Only float and double BaseType supported."); - return bit_cast<int64_t>(value_); - } - } - - // Only valid for BaseType==double. Returns the bit representation of the fp value. explicit constexpr operator WrappedFloatType<float>() const { return WrappedFloatType<float>(value_); } explicit constexpr operator WrappedFloatType<double>() const { return WrappedFloatType<double>(value_); } +#if defined(__i386__) || defined(__x86_64__) + explicit constexpr operator long double() const { return value_; } +#endif // Note: we don't provide unary operator-. That's done on purpose: with floats -x and 0.-x // produce different results which could be surprising. Use fneg instead of unary operator-. friend WrappedFloatType operator+(const WrappedFloatType& v1, const WrappedFloatType& v2); diff --git a/intrinsics/include/berberis/intrinsics/riscv64/guest_fpstate.h b/intrinsics/include/berberis/intrinsics/riscv64/guest_fpstate.h new file mode 100644 index 00000000..e8a68941 --- /dev/null +++ b/intrinsics/include/berberis/intrinsics/riscv64/guest_fpstate.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_INTRINSICS_GUEST_RISCV64_FPSTATE_H_ +#define BERBERIS_INTRINSICS_GUEST_RISCV64_FPSTATE_H_ + +#include <cfenv> +#include <cstdint> + +namespace berberis { + +namespace FPFlags { + +inline constexpr uint64_t NV = 1 << 4; +inline constexpr uint64_t DZ = 1 << 3; +inline constexpr uint64_t OF = 1 << 2; +inline constexpr uint64_t UF = 1 << 1; +inline constexpr uint64_t NX = 1 << 0; +inline constexpr uint64_t RM_POS = 5; +inline constexpr uint64_t RM_MASK = 0b111; +inline constexpr uint64_t RM_MAX = 0b100; +inline constexpr uint64_t RNE = 0b000; +inline constexpr uint64_t RTZ = 0b001; +inline constexpr uint64_t RDN = 0b010; +inline constexpr uint64_t RUP = 0b011; +inline constexpr uint64_t RMM = 0b100; +inline constexpr uint64_t DYN = 0b111; + +} // namespace FPFlags + +namespace intrinsics { + +// Note that not all RISC-V rounding modes are supported on popular architectures. +// FE_TIESAWAY is emulated, but proper emulation needs FE_TOWARDZERO mode. +inline int ToHostRoundingMode(int8_t rm) { + static constexpr int kRounding[FPFlags::RM_MAX + 1] = { + FE_TONEAREST, FE_TOWARDZERO, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO}; + return kRounding[rm]; +} + +} // namespace intrinsics + +} // namespace berberis + +#endif // BERBERIS_INTRINSICS_GUEST_RISCV64_FPSTATE_H_ diff --git a/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h new file mode 100644 index 00000000..db8ff249 --- /dev/null +++ b/intrinsics/include/berberis/intrinsics/riscv64_to_x86_64/intrinsics_float.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ +#define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ + +#include <limits> + +#include "berberis/base/bit_util.h" +#include "berberis/intrinsics/intrinsics_float.h" +#include "berberis/intrinsics/riscv64/guest_fpstate.h" // ScopedRoundingMode +#include "berberis/intrinsics/type_traits.h" + +namespace berberis::intrinsics { + +// x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it +// have support for 80bit floats: if calculations are done with one bit (or more) of extra precision +// in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember +// this addition may overflow. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperationRmm(OperationType operation, Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + using Wide = typename TypeTraits<FloatType>::Wide; + Wide wide_result = operation(Wide(args)...); + if constexpr (std::is_same_v<FloatType, Float32>) { + // In the 32bit->64bit case everything happens almost automatically, we just need to clear low + // bits to ensure that we are getting ±∞ and not NaN. + auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result); + if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) { + return FloatType(wide_result); + } + int_result += 0x0000'0000'1000'0000; + int_result &= 0xffff'ffff'e000'0000; + wide_result = bit_cast<Wide>(int_result); + } else if constexpr (std::is_same_v<FloatType, Float64>) { + // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not + // pseudo-infinity (supported on 8087/80287, but not on modern CPUs). + struct { + uint64_t significand; + uint16_t exponent; + uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)]; + } fp80_parts; + static_assert(sizeof fp80_parts == sizeof(Wide)); + memcpy(&fp80_parts, &wide_result, sizeof(wide_result)); + // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V). + if ((fp80_parts.exponent & 0x7fff) == 0x7fff || + (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) { + return FloatType(wide_result); + } + fp80_parts.significand += 0x0000'0000'0000'0400; + fp80_parts.significand &= 0xffff'ffff'ffff'f800; + if (fp80_parts.significand == 0) { + fp80_parts.exponent++; + fp80_parts.significand = 0x8000'0000'0000'0000; + } + memcpy(&wide_result, &fp80_parts, sizeof(wide_result)); + } + return FloatType(wide_result); +} + +// Note: first round of rm/frm verification must happen before that function because RISC-V +// postulates that invalid rm or frm should trigger illegal instruction exception. +// Here we can assume both rm and frm fields are valid. +template <typename FloatType, typename OperationType, typename... Args> +inline auto ExecuteFloatOperation(uint8_t requested_rm, + uint8_t current_rm, + OperationType operation, + Args... args) + -> std::enable_if_t<(std::is_same_v<Args, FloatType> && ...), FloatType> { + int host_requested_rm = ToHostRoundingMode(requested_rm); + int host_current_rm = ToHostRoundingMode(current_rm); + if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) { + uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm; + if (rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); + } + ScopedRoundingMode scoped_rounding_mode{host_requested_rm}; + if (requested_rm == FPFlags::RMM) { + return ExecuteFloatOperationRmm<FloatType>(operation, args...); + } + return operation(args...); +} + +} // namespace berberis::intrinsics + +#endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ diff --git a/intrinsics/include/berberis/intrinsics/type_traits.h b/intrinsics/include/berberis/intrinsics/type_traits.h index f1f6f75d..2785b92d 100644 --- a/intrinsics/include/berberis/intrinsics/type_traits.h +++ b/intrinsics/include/berberis/intrinsics/type_traits.h @@ -93,11 +93,17 @@ struct TypeTraits<int64_t> { template <> struct TypeTraits<intrinsics::Float32> { using Int = int32_t; + using Wide = intrinsics::Float64; }; template <> struct TypeTraits<intrinsics::Float64> { using Int = int64_t; + using Narrow = intrinsics::Float32; +#if defined(__i386__) || defined(__x86_64__) + static_assert(sizeof(long double) > sizeof(intrinsics::Float64)); + using Wide = long double; +#endif }; #if defined(__x86_64__) diff --git a/kernel_api/fcntl_emulation.cc b/kernel_api/fcntl_emulation.cc index e48097d4..6c74b49d 100644 --- a/kernel_api/fcntl_emulation.cc +++ b/kernel_api/fcntl_emulation.cc @@ -32,6 +32,7 @@ #include <cerrno> +#include "berberis/base/checks.h" #include "berberis/kernel_api/open_emulation.h" #include "berberis/kernel_api/tracing.h" @@ -62,9 +63,49 @@ static_assert(F_SETLEASE == 1024); static_assert(F_GETLEASE == 1025); static_assert(F_NOTIFY == 1026); +#if !defined(ANDROID_HOST_MUSL) static_assert(F_GETLK == 5); static_assert(F_SETLK == 6); static_assert(F_SETLKW == 7); +#endif + +#define GUEST_F_GETLK 5 +#define GUEST_F_SETLK 6 +#define GUEST_F_SETLKW 7 + +#if defined(ANDROID_HOST_MUSL) +// Musl only has a 64-bit flock that it uses for flock and flock64. + +struct Guest_flock { + int16_t l_type; + int16_t l_whence; + int32_t l_start; + int32_t l_len; + int32_t l_pid; +}; + +const struct flock64* ConvertGuestFlockToHostFlock64(const Guest_flock* guest, + struct flock64* host) { + if (!guest) { + return nullptr; + } + *host = {guest->l_type, guest->l_whence, guest->l_start, guest->l_len, guest->l_pid}; + return host; +} + +void ConvertHostFlock64ToGuestFlock(const struct flock64* host, Guest_flock* guest) { + CHECK_NE(guest, nullptr); + CHECK_LE(host->l_start, INT32_MAX); + CHECK_GE(host->l_start, INT32_MIN); + CHECK_LE(host->l_len, INT32_MAX); + CHECK_GE(host->l_len, INT32_MIN); + *guest = {host->l_type, + host->l_whence, + static_cast<int32_t>(host->l_start), + static_cast<int32_t>(host->l_len), + host->l_pid}; +} +#endif namespace berberis { @@ -102,10 +143,29 @@ int GuestFcntl(int fd, int cmd, long arg_3) { #if defined(F_GET_SEALS) case F_GET_SEALS: #endif - case F_SETLK: - case F_SETLKW: - case F_GETLK: + case GUEST_F_SETLK: + case GUEST_F_SETLKW: + case GUEST_F_GETLK: +#if defined(ANDROID_HOST_MUSL) + { + // Musl only has a 64-bit flock for both flock and flock64, translate flock calls to flock64. + Guest_flock* guest_flock = reinterpret_cast<Guest_flock*>(arg_3); + struct flock64 host_flock64; + // In case of GETLK input flock describes region + // to check, thus conversion is also required. + auto result = fcntl(fd, + cmd + F_SETLK - GUEST_F_SETLK, + ConvertGuestFlockToHostFlock64(guest_flock, &host_flock64)); + if (result == 0 && cmd == GUEST_F_GETLK) { + // Output contains the result of lock check. + ConvertHostFlock64ToGuestFlock(&host_flock64, guest_flock); + } + return result; + } +#else + // struct flock compatibility is checked above. return fcntl(fd, cmd, arg_3); +#endif case F_SETFL: return fcntl(fd, cmd, ToHostOpenFlags(arg_3)); default: |