aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-03-04 22:09:57 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-03-04 22:09:57 +0000
commitcb71633fbed62d26757f32be25ebecc388032c40 (patch)
tree3bce53cd9c8a5d969d4feecee79c597663702a70
parentcd384f706cd4c14148da0a9d2509dc49bde7ccd7 (diff)
parentc44a48ec868518cc6947476cc69cb368e1b9b594 (diff)
downloadbinary_translation-simpleperf-release.tar.gz
Snap for 11526323 from c44a48ec868518cc6947476cc69cb368e1b9b594 to simpleperf-releasesimpleperf-release
Change-Id: I8ee6b3c97f6dc6eef08c5cbb0ffa69ad63f5cdce
-rw-r--r--base/bit_util_test.cc33
-rw-r--r--base/include/berberis/base/bit_util.h110
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h71
-rw-r--r--guest_state/Android.bp2
-rw-r--r--guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h45
-rw-r--r--interpreter/Android.bp18
-rw-r--r--interpreter/riscv64/interpreter-VLoadIndexedArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VLoadStrideArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VLoadUnitStrideArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpFVfArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpFVvArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpIViArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpIVvArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpIVxArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpMVvArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VOpMVxArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VStoreIndexedArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VStoreStrideArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-VStoreUnitStrideArgs.cc26
-rw-r--r--interpreter/riscv64/interpreter-main.cc43
-rw-r--r--interpreter/riscv64/interpreter.h (renamed from interpreter/riscv64/interpreter.cc)888
-rw-r--r--interpreter/riscv64/interpreter_test.cc353
-rw-r--r--intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h37
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h36
24 files changed, 1626 insertions, 348 deletions
diff --git a/base/bit_util_test.cc b/base/bit_util_test.cc
index 79fde7a4..9ebf1e1f 100644
--- a/base/bit_util_test.cc
+++ b/base/bit_util_test.cc
@@ -153,11 +153,44 @@ static_assert(std::is_same_v<Int16, UInt16::SignedType>);
static_assert(std::is_same_v<UInt16, Int16::UnsignedType>);
static_assert(std::is_same_v<UInt16, UInt16::UnsignedType>);
+static_assert(std::is_same_v<Int16, SignedType<RawInt16>>);
+static_assert(std::is_same_v<Int16, SignedType<Int16>>);
+static_assert(std::is_same_v<Int16, SignedType<UInt16>>);
+static_assert(std::is_same_v<UInt16, UnsignedType<RawInt16>>);
+static_assert(std::is_same_v<UInt16, UnsignedType<Int16>>);
+static_assert(std::is_same_v<UInt16, UnsignedType<UInt16>>);
+
+static_assert(std::is_same_v<Int16, WrappingType<Int16>>);
+static_assert(std::is_same_v<UInt16, WrappingType<UInt16>>);
+static_assert(std::is_same_v<Int16, WrappingType<SatInt16>>);
+static_assert(std::is_same_v<UInt16, WrappingType<SatUInt16>>);
+
static_assert(std::is_same_v<SatInt16, SatInt16::SignedType>);
static_assert(std::is_same_v<SatInt16, SatUInt16::SignedType>);
static_assert(std::is_same_v<SatUInt16, SatInt16::UnsignedType>);
static_assert(std::is_same_v<SatUInt16, SatUInt16::UnsignedType>);
+static_assert(std::is_same_v<SatInt16, SignedType<SatInt16>>);
+static_assert(std::is_same_v<SatInt16, SignedType<SatUInt16>>);
+static_assert(std::is_same_v<SatUInt16, UnsignedType<SatInt16>>);
+static_assert(std::is_same_v<SatUInt16, UnsignedType<SatUInt16>>);
+
+static_assert(std::is_same_v<SatInt16, SaturatingType<Int16>>);
+static_assert(std::is_same_v<SatUInt16, SaturatingType<UInt16>>);
+static_assert(std::is_same_v<SatInt16, SaturatingType<SatInt16>>);
+static_assert(std::is_same_v<SatUInt16, SaturatingType<SatUInt16>>);
+
+static_assert(std::is_same_v<SatInt16, SignedType<SatInt16>>);
+static_assert(std::is_same_v<SatInt16, SignedType<SatUInt16>>);
+static_assert(std::is_same_v<SatUInt16, UnsignedType<SatInt16>>);
+static_assert(std::is_same_v<SatUInt16, UnsignedType<SatUInt16>>);
+
+static_assert(std::is_same_v<RawInt16, RawType<RawInt16>>);
+static_assert(std::is_same_v<RawInt16, RawType<Int16>>);
+static_assert(std::is_same_v<RawInt16, RawType<UInt16>>);
+static_assert(std::is_same_v<RawInt16, RawType<SatInt16>>);
+static_assert(std::is_same_v<RawInt16, RawType<SatUInt16>>);
+
} // namespace
} // namespace berberis
diff --git a/base/include/berberis/base/bit_util.h b/base/include/berberis/base/bit_util.h
index ddf4f91c..b3d287ea 100644
--- a/base/include/berberis/base/bit_util.h
+++ b/base/include/berberis/base/bit_util.h
@@ -271,6 +271,9 @@ class WrappedFloatType;
} // namespace intrinsics
+template <typename T>
+struct TypeTraits;
+
// Raw integers. Used to carry payload, which may be be EXPLICITLY converted to Saturating
// integer, Wrapping integer, or WrappedFloatType.
//
@@ -697,6 +700,12 @@ using UInt128 = Wrapping<unsigned __int128>;
#endif
template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToSigned(Raw<IntType> src) ->
+ typename Wrapping<IntType>::SignedType {
+ return {static_cast<std::make_signed_t<IntType>>(src.value)};
+}
+
+template <typename IntType>
[[nodiscard]] auto constexpr BitCastToSigned(Saturating<IntType> src) ->
typename Saturating<IntType>::SignedType {
return {static_cast<std::make_signed_t<IntType>>(src.value)};
@@ -712,6 +721,12 @@ template <typename T>
using SignedType = decltype(BitCastToSigned(std::declval<T>()));
template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToUnsigned(Raw<IntType> src) ->
+ typename Wrapping<IntType>::UnsignedType {
+ return {static_cast<std::make_unsigned_t<IntType>>(src.value)};
+}
+
+template <typename IntType>
[[nodiscard]] auto constexpr BitCastToUnsigned(Saturating<IntType> src) ->
typename Saturating<IntType>::UnsignedType {
return {static_cast<std::make_unsigned_t<IntType>>(src.value)};
@@ -726,6 +741,86 @@ template <typename IntType>
template <typename T>
using UnsignedType = decltype(BitCastToUnsigned(std::declval<T>()));
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToSaturating(Saturating<IntType> src) -> Saturating<IntType> {
+ return src;
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToSaturating(Wrapping<IntType> src) -> Saturating<IntType> {
+ return {src.value};
+}
+
+template <typename T>
+using SaturatingType = decltype(BitCastToSaturating(std::declval<T>()));
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToWrapping(Saturating<IntType> src) -> Wrapping<IntType> {
+ return {src.value};
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToWrapping(Wrapping<IntType> src) -> Wrapping<IntType> {
+ return src;
+}
+
+template <typename T>
+using WrappingType = decltype(BitCastToWrapping(std::declval<T>()));
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToRaw(Raw<IntType> src) -> Raw<IntType> {
+ return src;
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToRaw(Saturating<IntType> src)
+ -> Raw<std::make_unsigned_t<IntType>> {
+ return {static_cast<std::make_unsigned_t<IntType>>(src.value)};
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToRaw(Wrapping<IntType> src)
+ -> Raw<std::make_unsigned_t<IntType>> {
+ return {static_cast<std::make_unsigned_t<IntType>>(src.value)};
+}
+
+template <typename BaseType>
+[[nodiscard]] constexpr auto BitCastToRaw(intrinsics::WrappedFloatType<BaseType> src)
+ -> Raw<std::make_unsigned_t<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Int>> {
+ return {bit_cast<
+ std::make_unsigned_t<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Int>>(src)};
+}
+
+template <typename T>
+using RawType = decltype(BitCastToRaw(std::declval<T>()));
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToFloat(Raw<IntType> src) ->
+ typename TypeTraits<IntType>::Float {
+ return bit_cast<typename TypeTraits<IntType>::Float>(src.value);
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToFloat(Saturating<IntType> src) ->
+ typename TypeTraits<IntType>::Float {
+ return bit_cast<typename TypeTraits<IntType>::Float>(src.value);
+}
+
+template <typename IntType>
+[[nodiscard]] auto constexpr BitCastToFloat(Wrapping<IntType> src) ->
+ typename TypeTraits<IntType>::Float {
+ return bit_cast<typename TypeTraits<IntType>::Float>(src.value);
+}
+
+template <typename BaseType>
+[[nodiscard]] constexpr auto BitCastToFloat(intrinsics::WrappedFloatType<BaseType> src)
+ -> intrinsics::WrappedFloatType<BaseType> {
+ return src;
+}
+
+template <typename T>
+using FloatType = decltype(BitCastToFloat(std::declval<T>()));
+
template <typename ResultType, typename IntType>
[[nodiscard]] auto constexpr MaybeTruncateTo(IntType src)
-> std::enable_if_t<std::is_integral_v<IntType> &&
@@ -774,9 +869,6 @@ template <typename ResultType, typename IntType>
return ResultType{static_cast<ResultType::BaseType>(src.value)};
}
-template <typename T>
-struct TypeTraits;
-
template <typename BaseType>
[[nodiscard]] constexpr auto Widen(Saturating<BaseType> source)
-> Saturating<typename TypeTraits<BaseType>::Wide> {
@@ -789,6 +881,12 @@ template <typename BaseType>
return {source.value};
}
+template <typename BaseType>
+[[nodiscard]] constexpr auto Widen(intrinsics::WrappedFloatType<BaseType> source)
+ -> Wrapping<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Wide> {
+ return {source.value};
+}
+
template <typename T>
using WideType = decltype(Widen(std::declval<T>()));
@@ -812,6 +910,12 @@ template <typename BaseType>
return {static_cast<typename TypeTraits<BaseType>::Narrow>(source.value)};
}
+template <typename BaseType>
+[[nodiscard]] constexpr auto Narrow(intrinsics::WrappedFloatType<BaseType> source)
+ -> Wrapping<typename TypeTraits<intrinsics::WrappedFloatType<BaseType>>::Narrow> {
+ return {source.value};
+}
+
template <typename T>
using NarrowType = decltype(Narrow(std::declval<T>()));
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index c9cabf68..a150d048 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -234,7 +234,7 @@ class Decoder {
kEbreak = 0b000000000001'00000'000'00000,
};
- enum class VLoadUnitStrideOpcode : uint8_t {
+ enum class VLUmOpOpcode : uint8_t {
kVleXX = 0b00000,
kVlXreXX = 0b01000,
kVleXXff = 0b10000,
@@ -295,8 +295,8 @@ class Decoder {
kVfsgnjnvv = 0b001001,
kVfsgnjxvv = 0b001010,
kVfmvfs = 0b010000,
- kVfcvtXX = 0b010010,
- kVXXXv = 0b010011, // Vfsqrt.v/Vfrsqrt7.v/Vfrec7.v/Vfclass.v
+ kVFUnary0 = 0b010010,
+ kVFUnary1 = 0b010011,
kVmfeqvv = 0b011000,
kVmflevv = 0b011001,
kVmfltvv = 0b011011,
@@ -345,7 +345,7 @@ class Decoder {
kVsadduvi = 0b100000,
kVsaddvi = 0b100001,
kVsllvi = 0b100101,
- kVmvvi = 0b100111,
+ kVmvXrv = 0b100111,
kVsrlvi = 0b101000,
kVsravi = 0b101001,
kVssrlvi = 0b101010,
@@ -414,9 +414,9 @@ class Decoder {
kVmnandmm = 0b011101,
kVmnormm = 0b011110,
kVmxnormm = 0b011111,
- kVXmXXs = 0b010000,
- kVmsXf = 0b010100,
- kVxunary0 = 0b010010,
+ kVWXUnary0 = 0b010000,
+ kVMUnary0 = 0b010100,
+ kVFUnary0 = 0b010010,
kVmulhuvv = 0b100100,
kVmulvv = 0b100101,
kVmulhsuvv = 0b100110,
@@ -475,7 +475,7 @@ class Decoder {
};
enum class VOpMVxOpcode : uint8_t {
- kVXmXXx = 0b010000,
+ kVRXUnary0 = 0b010000,
kVmulhuvx = 0b100100,
kVmulvx = 0b100101,
kVmulhsuvx = 0b100110,
@@ -486,30 +486,54 @@ class Decoder {
kVnmsacvx = 0b101111,
};
- enum class VStoreUnitStrideOpcode : uint8_t {
+ enum class VSUmOpOpcode : uint8_t {
kVseXX = 0b00000,
kVsX = 0b01000,
kVsm = 0b01011,
};
- enum class VXmXXxOpcode : uint8_t {
+ enum class VFUnary0Opcode : uint8_t {
+ kVfcvtxufv = 0b00000,
+ kVfcvtxfv = 0b00001,
+ kVfcvtfxuv = 0b00010,
+ kVfcvtfxv = 0b00011,
+ kVfcvtrtzxufv = 0b00110,
+ kVfcvtrtzxfv = 0b00111,
+ kVfwcvtxufv = 0b01000,
+ kVfwcvtxfv = 0b01001,
+ kVfwcvtfxuv = 0b01010,
+ kVfwcvtfxv = 0b01011,
+ kVfwcvtffv = 0b01100,
+ kVfwcvtrtzxufv = 0b01110,
+ kVfwcvtrtzxfv = 0b01111,
+ kVfncvtxufw = 0b10000,
+ kVfncvtxfw = 0b10001,
+ kVfncvtfxuw = 0b10010,
+ kVfncvtfxw = 0b10011,
+ kVfncvtffw = 0b10100,
+ kVfncvtrodffw = 0b10101,
+ kVfncvtrtzxufw = 0b10110,
+ kVfncvtrtzxfw = 0b10111,
+ };
+
+ enum class VRXUnary0Opcode : uint8_t {
kVmvsx = 0b00000,
};
- enum class VXmXXsOpcode : uint8_t {
+ enum class VWXUnary0Opcode : uint8_t {
kVmvxs = 0b00000,
kVcpopm = 0b10000,
kVfirstm = 0b10001,
};
- enum class VmsXfOpcode : uint8_t {
+ enum class VMUnary0Opcode : uint8_t {
kVmsbfm = 0b00001,
kVmsofm = 0b00010,
kVmsifm = 0b00011,
kVidv = 0b10001,
};
- enum class Vxunary0Opcode : uint8_t {
+ enum class VXUnary0Opcode : uint8_t {
kVzextvf8m = 0b00010,
kVsextvf8m = 0b00011,
kVzextvf4m = 0b00100,
@@ -774,7 +798,7 @@ class Decoder {
};
struct VLoadUnitStrideArgs {
- VLoadUnitStrideOpcode opcode;
+ VLUmOpOpcode opcode;
MemoryDataOperandType width;
bool vm;
uint8_t nf;
@@ -795,7 +819,10 @@ class Decoder {
bool vm;
uint8_t dst;
uint8_t src1;
- uint8_t src2;
+ union {
+ VFUnary0Opcode vfunary0_opcode;
+ uint8_t src2;
+ };
};
struct VOpIViArgs {
@@ -820,9 +847,9 @@ class Decoder {
uint8_t dst;
uint8_t src1;
union {
- VXmXXsOpcode vXmXXs_opcode;
- VmsXfOpcode vmsXf_opcode;
- Vxunary0Opcode vxunary0_opcode;
+ VWXUnary0Opcode vwxunary0_opcode;
+ VMUnary0Opcode vmunary0_opcode;
+ VXUnary0Opcode vxunary0_opcode;
uint8_t src2;
};
};
@@ -840,7 +867,7 @@ class Decoder {
bool vm;
uint8_t dst;
union {
- VXmXXxOpcode vXmXXx_opcode;
+ VRXUnary0Opcode vrxunary0_opcode;
uint8_t src1;
};
uint8_t src2;
@@ -885,7 +912,7 @@ class Decoder {
};
struct VStoreUnitStrideArgs {
- VStoreUnitStrideOpcode opcode;
+ VSUmOpOpcode opcode;
MemoryDataOperandType width;
bool vm;
uint8_t nf;
@@ -1637,7 +1664,7 @@ class Decoder {
switch (GetBits<26, 2>()) {
case 0b00: {
const VLoadUnitStrideArgs args = {
- .opcode = VLoadUnitStrideOpcode{GetBits<20, 5>()},
+ .opcode = VLUmOpOpcode{GetBits<20, 5>()},
.width = decoded_operand_type.eew,
.vm = GetBits<25, 1>(),
.nf = GetBits<29, 3>(),
@@ -1701,7 +1728,7 @@ class Decoder {
switch (GetBits<26, 2>()) {
case 0b00: {
const VStoreUnitStrideArgs args = {
- .opcode = VStoreUnitStrideOpcode{GetBits<20, 5>()},
+ .opcode = VSUmOpOpcode{GetBits<20, 5>()},
.width = decoded_operand_type.eew,
.vm = GetBits<25, 1>(),
.nf = GetBits<29, 3>(),
diff --git a/guest_state/Android.bp b/guest_state/Android.bp
index 2a8b2a21..26ee4c21 100644
--- a/guest_state/Android.bp
+++ b/guest_state/Android.bp
@@ -24,9 +24,11 @@ cc_library_headers {
export_include_dirs: ["include"],
header_libs: [
"libberberis_base_headers",
+ "native_bridge_guest_state_headers",
],
export_header_lib_headers: [
"libberberis_base_headers",
+ "native_bridge_guest_state_headers",
],
}
diff --git a/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h b/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
index 9c7f9044..6f87af8b 100644
--- a/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
+++ b/guest_state/riscv64/include/berberis/guest_state/guest_state_arch.h
@@ -27,11 +27,10 @@
#include "berberis/base/macros.h"
#include "berberis/guest_state/guest_addr.h"
#include "berberis/guest_state/guest_state_opaque.h"
+#include "native_bridge_support/riscv64/guest_state/guest_state_cpu_state.h"
namespace berberis {
-using Reservation = uint64_t;
-
enum class CsrName {
kFFlags = 0b00'00'0000'0001,
kFrm = 0b00'00'0000'0010,
@@ -69,48 +68,6 @@ enum class CsrName {
BERBERIS_RISV64_PROCESS_NOSTORAGE_CSR(Vxsat), BERBERIS_RISV64_PROCESS_NOSTORAGE_CSR(Vxrm), \
BERBERIS_RISV64_PROCESS_NOSTORAGE_CSR(Vlenb)
-struct CPUState {
- // x0 to x31.
- uint64_t x[32];
- // f0 to f31. We are using uint64_t because C++ may change values of NaN when they are passed from
- // or to function and RISC-V uses NaN-boxing which would make things problematic.
- uint64_t f[32];
- // v0 to v32. We only support 128bit vectors for now.
- alignas(16) __uint128_t v[32];
-
- GuestAddr insn_addr;
-
- GuestAddr reservation_address;
- Reservation reservation_value;
-
- // Technically only 9 bits are defined: sign bit and 8 low bits.
- // But for performance reason it's easier to keep full 64bits in this variable.
- uint64_t vtype;
- // This register usually contains zero and each vector instruction would reset it to zero.
- // But it's allowed to change it and if that happens we are supposed to support it.
- uint8_t vstart;
- // This register is usually set to process full 128 bits set of SIMD data.
- // But it's allowed to change it and if that happens we are supposed to support it.
- uint8_t vl;
- // Only 3 bits are defined but we allocate full byte to simplify implementation.
- uint8_t vcsr;
- // RISC-V has five rounding modes, while x86-64 has only four.
- //
- // Extra rounding mode (RMM in RISC-V documentation) is emulated but requires the use of
- // FE_TOWARDZERO mode for correct work.
- //
- // Additionally RISC-V implementation is supposed to support three “illegal” rounding modes and
- // when they are selected all instructions which use rounding mode trigger “undefined instruction”
- // exception.
- //
- // For simplicity we always keep full rounding mode (3 bits) in the frm field and set host
- // rounding mode to appropriate one.
- //
- // Exceptions, on the other hand, couldn't be stored here efficiently, instead we rely on the fact
- // that x86-64 implements all five exceptions that RISC-V needs (and more).
- uint8_t frm;
-};
-
static_assert(std::is_standard_layout_v<CPUState>);
constexpr uint32_t kNumGuestRegs = std::size(CPUState{}.x);
diff --git a/interpreter/Android.bp b/interpreter/Android.bp
index 98aabc65..ed9ca278 100644
--- a/interpreter/Android.bp
+++ b/interpreter/Android.bp
@@ -28,6 +28,7 @@ cc_library_static {
name: "libberberis_interpreter_riscv64",
defaults: ["berberis_defaults_64"],
host_supported: true,
+ cflags: ["-DBERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS"],
header_libs: [
"libberberis_base_headers",
"libberberis_decoder_riscv64_headers",
@@ -44,7 +45,22 @@ cc_library_static {
srcs: ["riscv64/faulty_memory_accesses_x86_64.cc"],
},
},
- srcs: ["riscv64/interpreter.cc"],
+ srcs: [
+ "riscv64/interpreter-main.cc",
+ "riscv64/interpreter-VLoadIndexedArgs.cc",
+ "riscv64/interpreter-VLoadStrideArgs.cc",
+ "riscv64/interpreter-VLoadUnitStrideArgs.cc",
+ "riscv64/interpreter-VOpFVfArgs.cc",
+ "riscv64/interpreter-VOpFVvArgs.cc",
+ "riscv64/interpreter-VOpIViArgs.cc",
+ "riscv64/interpreter-VOpIVvArgs.cc",
+ "riscv64/interpreter-VOpIVxArgs.cc",
+ "riscv64/interpreter-VOpMVvArgs.cc",
+ "riscv64/interpreter-VOpMVxArgs.cc",
+ "riscv64/interpreter-VStoreIndexedArgs.cc",
+ "riscv64/interpreter-VStoreStrideArgs.cc",
+ "riscv64/interpreter-VStoreUnitStrideArgs.cc",
+ ],
}
cc_test_library {
diff --git a/interpreter/riscv64/interpreter-VLoadIndexedArgs.cc b/interpreter/riscv64/interpreter-VLoadIndexedArgs.cc
new file mode 100644
index 00000000..6dd0f19d
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VLoadIndexedArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadIndexedArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VLoadStrideArgs.cc b/interpreter/riscv64/interpreter-VLoadStrideArgs.cc
new file mode 100644
index 00000000..ab8d78c5
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VLoadStrideArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadStrideArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VLoadUnitStrideArgs.cc b/interpreter/riscv64/interpreter-VLoadUnitStrideArgs.cc
new file mode 100644
index 00000000..63168fbd
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VLoadUnitStrideArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadUnitStrideArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpFVfArgs.cc b/interpreter/riscv64/interpreter-VOpFVfArgs.cc
new file mode 100644
index 00000000..4701cef8
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpFVfArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpFVfArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpFVvArgs.cc b/interpreter/riscv64/interpreter-VOpFVvArgs.cc
new file mode 100644
index 00000000..7b99809c
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpFVvArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpFVvArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpIViArgs.cc b/interpreter/riscv64/interpreter-VOpIViArgs.cc
new file mode 100644
index 00000000..f6ff4df1
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpIViArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIViArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpIVvArgs.cc b/interpreter/riscv64/interpreter-VOpIVvArgs.cc
new file mode 100644
index 00000000..5fe0e03c
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpIVvArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIVvArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpIVxArgs.cc b/interpreter/riscv64/interpreter-VOpIVxArgs.cc
new file mode 100644
index 00000000..f8cbecf7
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpIVxArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIVxArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpMVvArgs.cc b/interpreter/riscv64/interpreter-VOpMVvArgs.cc
new file mode 100644
index 00000000..d3bd73d6
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpMVvArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpMVvArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VOpMVxArgs.cc b/interpreter/riscv64/interpreter-VOpMVxArgs.cc
new file mode 100644
index 00000000..4dc7d6dc
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VOpMVxArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpMVxArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VStoreIndexedArgs.cc b/interpreter/riscv64/interpreter-VStoreIndexedArgs.cc
new file mode 100644
index 00000000..1d8eb0ee
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VStoreIndexedArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreIndexedArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VStoreStrideArgs.cc b/interpreter/riscv64/interpreter-VStoreStrideArgs.cc
new file mode 100644
index 00000000..9e32bb75
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VStoreStrideArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreStrideArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-VStoreUnitStrideArgs.cc b/interpreter/riscv64/interpreter-VStoreUnitStrideArgs.cc
new file mode 100644
index 00000000..d12c16e8
--- /dev/null
+++ b/interpreter/riscv64/interpreter-VStoreUnitStrideArgs.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#undef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+#include "interpreter.h"
+
+namespace berberis {
+
+template void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreUnitStrideArgs& args);
+
+} // namespace berberis
+#endif
diff --git a/interpreter/riscv64/interpreter-main.cc b/interpreter/riscv64/interpreter-main.cc
new file mode 100644
index 00000000..07834b5f
--- /dev/null
+++ b/interpreter/riscv64/interpreter-main.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file excenaupt in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "berberis/interpreter/riscv64/interpreter.h"
+
+#include "berberis/decoder/riscv64/decoder.h"
+#include "berberis/decoder/riscv64/semantics_player.h"
+#include "berberis/guest_state/guest_addr.h"
+#include "berberis/guest_state/guest_state.h"
+
+#include "faulty_memory_accesses.h"
+#include "interpreter.h"
+
+namespace berberis {
+
+void InitInterpreter() {
+ AddFaultyMemoryAccessRecoveryCode();
+}
+
+void InterpretInsn(ThreadState* state) {
+ GuestAddr pc = state->cpu.insn_addr;
+
+ Interpreter interpreter(state);
+ SemanticsPlayer sem_player(&interpreter);
+ Decoder decoder(&sem_player);
+ uint8_t insn_len = decoder.Decode(ToHostAddr<const uint16_t>(pc));
+ interpreter.FinalizeInsn(insn_len);
+}
+
+} // namespace berberis
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.h
index e39cbbb7..6680fc0e 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.h
@@ -44,8 +44,6 @@
namespace berberis {
-namespace {
-
inline constexpr std::memory_order AqRlToStdMemoryOrder(bool aq, bool rl) {
if (aq) {
if (rl) {
@@ -489,7 +487,7 @@ class Interpreter {
// Note: other tupes of loads and store are not special and would be processed as usual.
// TODO(khim): Handle vstart properly.
if constexpr (std::is_same_v<VOpArgs, Decoder::VLoadUnitStrideArgs>) {
- if (args.opcode == Decoder::VLoadUnitStrideOpcode::kVlXreXX) {
+ if (args.opcode == Decoder::VLUmOpOpcode::kVlXreXX) {
if (!IsPowerOf2(args.nf + 1)) {
return Unimplemented();
}
@@ -506,7 +504,7 @@ class Interpreter {
}
if constexpr (std::is_same_v<VOpArgs, Decoder::VStoreUnitStrideArgs>) {
- if (args.opcode == Decoder::VStoreUnitStrideOpcode::kVsX) {
+ if (args.opcode == Decoder::VSUmOpOpcode::kVsX) {
if (args.width != Decoder::MemoryDataOperandType::k8bit) {
return Unimplemented();
}
@@ -601,9 +599,10 @@ class Interpreter {
template <typename ElementType, typename VOpArgs, typename... ExtraArgs>
void OpVector(const VOpArgs& args, Register vtype, ExtraArgs... extra_args) {
- int vemul = Decoder::SignExtend<3>(vtype & 0b111);
+ auto vemul = Decoder::SignExtend<3>(vtype & 0b111);
vemul -= ((vtype >> 3) & 0b111); // Divide by SEW.
- vemul += static_cast<int>(args.width); // Multiply by EEW.
+ vemul +=
+ static_cast<std::underlying_type_t<decltype(args.width)>>(args.width); // Multiply by EEW.
if (vemul < -3 || vemul > 3) [[unlikely]] {
return Unimplemented();
}
@@ -736,7 +735,7 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
VectorRegisterGroupMultiplier vlmul,
auto vma,
typename VOpArgs,
@@ -773,7 +772,7 @@ class Interpreter {
}
}
- template <int kSegmentSize,
+ template <size_t kSegmentSize,
typename IndexElementType,
size_t kIndexRegistersInvolved,
TailProcessing vta,
@@ -801,7 +800,7 @@ class Interpreter {
}
template <typename DataElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
typename IndexElementType,
size_t kIndexRegistersInvolved,
TailProcessing vta,
@@ -874,7 +873,7 @@ class Interpreter {
template <typename DataElementType,
VectorRegisterGroupMultiplier vlmul,
typename IndexElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kIndexRegistersInvolved,
TailProcessing vta,
auto vma>
@@ -889,7 +888,7 @@ class Interpreter {
}
template <typename DataElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kNumRegistersInGroup,
typename IndexElementType,
size_t kIndexRegistersInvolved,
@@ -899,7 +898,7 @@ class Interpreter {
if (!IsAligned<kIndexRegistersInvolved>(args.idx)) {
return Unimplemented();
}
- constexpr int kElementsCount =
+ constexpr size_t kElementsCount =
static_cast<int>(sizeof(SIMD128Register) / sizeof(IndexElementType));
alignas(alignof(SIMD128Register))
IndexElementType indexes[kElementsCount * kIndexRegistersInvolved];
@@ -909,7 +908,7 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma>
@@ -919,7 +918,7 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kNumRegistersInGroup,
TailProcessing vta,
auto vma>
@@ -929,7 +928,7 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma>
@@ -939,29 +938,29 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kNumRegistersInGroup,
TailProcessing vta,
auto vma>
void OpVector(const Decoder::VLoadUnitStrideArgs& args, Register src) {
switch (args.opcode) {
- case Decoder::VLoadUnitStrideOpcode::kVleXXff:
+ case Decoder::VLUmOpOpcode::kVleXXff:
return OpVectorLoad<ElementType,
kSegmentSize,
kNumRegistersInGroup,
vta,
vma,
- Decoder::VLoadUnitStrideOpcode::kVleXXff>(
+ Decoder::VLUmOpOpcode::kVleXXff>(
args.dst, src, [](size_t index) { return kSegmentSize * sizeof(ElementType) * index; });
- case Decoder::VLoadUnitStrideOpcode::kVleXX:
+ case Decoder::VLUmOpOpcode::kVleXX:
return OpVectorLoad<ElementType,
kSegmentSize,
kNumRegistersInGroup,
vta,
vma,
- Decoder::VLoadUnitStrideOpcode::kVleXX>(
+ Decoder::VLUmOpOpcode::kVleXX>(
args.dst, src, [](size_t index) { return kSegmentSize * sizeof(ElementType) * index; });
- case Decoder::VLoadUnitStrideOpcode::kVlm:
+ case Decoder::VLUmOpOpcode::kVlm:
if constexpr (kSegmentSize == 1 &&
std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return OpVectorLoad<UInt8,
@@ -969,7 +968,7 @@ class Interpreter {
1,
TailProcessing::kAgnostic,
vma,
- Decoder::VLoadUnitStrideOpcode::kVlm>(
+ Decoder::VLUmOpOpcode::kVlm>(
args.dst, src, [](size_t index) { return index; });
}
return Unimplemented();
@@ -1004,14 +1003,13 @@ class Interpreter {
// v5: {B:20.21}{B:30.21}
// Now we have loaded a column from memory and all three colors are put into a different register
// groups for further processing.
- template <
- typename ElementType,
- int kSegmentSize,
- size_t kNumRegistersInGroup,
- TailProcessing vta,
- auto vma,
- typename Decoder::VLoadUnitStrideOpcode opcode = typename Decoder::VLoadUnitStrideOpcode{},
- typename GetElementOffsetLambdaType>
+ template <typename ElementType,
+ size_t kSegmentSize,
+ size_t kNumRegistersInGroup,
+ TailProcessing vta,
+ auto vma,
+ typename Decoder::VLUmOpOpcode opcode = typename Decoder::VLUmOpOpcode{},
+ typename GetElementOffsetLambdaType>
void OpVectorLoad(uint8_t dst, Register src, GetElementOffsetLambdaType GetElementOffset) {
using MaskType = std::conditional_t<sizeof(ElementType) == sizeof(Int8), UInt16, UInt8>;
if (!IsAligned<kNumRegistersInGroup>(dst)) {
@@ -1020,10 +1018,10 @@ class Interpreter {
if (dst + kNumRegistersInGroup * kSegmentSize >= 32) {
return Unimplemented();
}
- constexpr int kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
- if constexpr (opcode == Decoder::VLoadUnitStrideOpcode::kVlm) {
+ if constexpr (opcode == Decoder::VLUmOpOpcode::kVlm) {
vl = AlignUp<CHAR_BIT>(vl) / CHAR_BIT;
}
// In case of memory access fault we may set vstart to non-zero value, set it to zero here to
@@ -1064,7 +1062,7 @@ class Interpreter {
!(std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing> ||
static_cast<InactiveProcessing>(vma) != InactiveProcessing::kUndisturbed ||
register_mask == full_mask)) {
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
result[field].Set(state_->cpu.v[dst + within_group_id + field * kNumRegistersInGroup]);
}
}
@@ -1084,7 +1082,7 @@ class Interpreter {
}
}
// Load segment from memory.
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
FaultyLoadResult mem_access_result =
FaultyLoad(ptr + field * sizeof(ElementType) + GetElementOffset(element_index),
sizeof(ElementType));
@@ -1093,7 +1091,7 @@ class Interpreter {
// access fault happens but let's trigger an exception and treat the remaining elements
// using vta-specified strategy by simply just adjusting the vl.
vl = element_index;
- if constexpr (opcode == Decoder::VLoadUnitStrideOpcode::kVleXXff) {
+ if constexpr (opcode == Decoder::VLUmOpOpcode::kVleXXff) {
// Fail-first load only triggers exceptions for the first element, otherwise it
// changes vl to ensure that other operations would only process elements that are
// successfully loaded.
@@ -1127,7 +1125,7 @@ class Interpreter {
if (register_mask != full_mask) {
auto [simd_mask] =
intrinsics::BitMaskToSimdMaskForTests<ElementType>(Int64{MaskType{register_mask}});
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
if constexpr (vma == InactiveProcessing::kAgnostic) {
// vstart equal to zero is supposed to be exceptional. From RISV-V V manual (page 14):
// The vstart CSR is writable by unprivileged code, but non-zero vstart values may
@@ -1162,14 +1160,14 @@ class Interpreter {
}
// If we have tail elements and TailProcessing::kAgnostic mode then set them to ~0.
if constexpr (vta == TailProcessing::kAgnostic) {
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
if (vl < (within_group_id + 1) * kElementsCount) {
result[field] |= GetTailMask();
}
}
}
// Put values back into register file.
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
state_->cpu.v[dst + within_group_id + field * kNumRegistersInGroup] =
result[field].template Get<__uint128_t>();
}
@@ -1205,6 +1203,12 @@ class Interpreter {
InactiveProcessing::kUndisturbed>(
args.dst, arg2, /*dst_mask=*/args.src1);
}
+ case Decoder::VOpFVfOpcode::kVfmaxvf:
+ return OpVectorvx<intrinsics::Vfmaxvx<ElementType>, ElementType, vlmul, vta, vma>(
+ args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfminvf:
+ return OpVectorvx<intrinsics::Vfminvx<ElementType>, ElementType, vlmul, vta, vma>(
+ args.dst, args.src1, arg2);
default:
return Unimplemented();
}
@@ -1212,15 +1216,163 @@ class Interpreter {
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
void OpVector(const Decoder::VOpFVvArgs& args) {
- // We currently don't support Float32 operations, but conversion routines that deal with
+ using SignedType = std::make_signed_t<typename TypeTraits<ElementType>::Int>;
+ using UnsignedType = std::make_unsigned_t<typename TypeTraits<ElementType>::Int>;
+ // We currently don't support Float16 operations, but conversion routines that deal with
// double-width floats use these encodings to produce regular Float32 types.
- // That's why we need to call these routines twice: one here and one in the large switch below.
- if constexpr (sizeof(ElementType) < sizeof(Float32)) {
+ if constexpr (sizeof(ElementType) <= sizeof(Float32)) {
+ using WideElementType = typename TypeTraits<ElementType>::Wide;
switch (args.opcode) {
+ case Decoder::VOpFVvOpcode::kVFUnary0:
+ switch (args.vfunary0_opcode) {
+ case Decoder::VFUnary0Opcode::kVfwcvtfxuv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideElementType, UnsignedType>(FPFlags::DYN, frm, src);
+ },
+ WideElementType,
+ UnsignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfwcvtfxv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideElementType, SignedType>(FPFlags::DYN, frm, src);
+ },
+ WideElementType,
+ SignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtxufw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ UnsignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtxfw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ SignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtrtzxufw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::RTZ, frm, src);
+ },
+ UnsignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtrtzxfw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::RTZ, frm, src);
+ },
+ SignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ default:
+ break; // Make compiler happy.
+ }
+ break;
default:
- return Unimplemented();
+ break; // Make compiler happy.
}
- } else {
+ }
+ // Widening and narrowing opeation which take floating point “narrow” operand may only work
+ // correctly with Float32 input: Float16 is not supported yet, while Float64 input would produce
+ // 128bit output which is currently reserver in RISC-V V.
+ if constexpr (sizeof(ElementType) == sizeof(Float32)) {
+ using WideElementType = typename TypeTraits<ElementType>::Wide;
+ using WideSignedType = typename TypeTraits<SignedType>::Wide;
+ using WideUnsignedType = typename TypeTraits<UnsignedType>::Wide;
+ switch (args.opcode) {
+ case Decoder::VOpFVvOpcode::kVFUnary0:
+ switch (args.vfunary0_opcode) {
+ case Decoder::VFUnary0Opcode::kVfwcvtxufv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideUnsignedType, ElementType>(FPFlags::DYN, frm, src);
+ },
+ WideUnsignedType,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfwcvtxfv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideSignedType, ElementType>(FPFlags::DYN, frm, src);
+ },
+ WideSignedType,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfwcvtffv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideElementType, ElementType>(FPFlags::DYN, frm, src);
+ },
+ WideElementType,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfwcvtrtzxufv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideUnsignedType, ElementType>(FPFlags::RTZ, frm, src);
+ },
+ WideUnsignedType,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfwcvtrtzxfv:
+ return OpVectorWidenvr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<WideSignedType, ElementType>(FPFlags::RTZ, frm, src);
+ },
+ WideSignedType,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtfxuw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideUnsignedType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtffw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtfxw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideSignedType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ default:
+ break; // Make compiler happy.
+ }
+ break;
+ default:
+ break; // Make compiler happy.
+ }
+ }
+ // If our ElementType is Float16 then “straight” operations are unsupported and we whouldn't try
+ // instantiate any functions since this would lead to compilke-time error.
+ if constexpr (sizeof(ElementType) >= sizeof(Float32)) {
switch (args.opcode) {
case Decoder::VOpFVvOpcode::kVfmvfs:
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
@@ -1230,10 +1382,71 @@ class Interpreter {
return Unimplemented();
}
return OpVectorVmvfs<ElementType>(args.dst, args.src1);
+ case Decoder::VOpFVvOpcode::kVFUnary0:
+ switch (args.vfunary0_opcode) {
+ case Decoder::VFUnary0Opcode::kVfcvtxufv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, ElementType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfcvtxfv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, ElementType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfcvtfxuv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, UnsignedType>(FPFlags::DYN, frm, src);
+ },
+ UnsignedType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfcvtfxv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, SignedType>(FPFlags::DYN, frm, src);
+ },
+ SignedType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfcvtrtzxufv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, ElementType>(FPFlags::RTZ, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfcvtrtzxfv:
+ return OpVectorv<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, ElementType>(FPFlags::RTZ, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1);
+ default:
+ break; // Make compiler happy.
+ }
+ break;
default:
- return Unimplemented();
+ break; // Make compiler happy.
}
}
+ return Unimplemented();
}
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
@@ -1299,9 +1512,21 @@ class Interpreter {
InactiveProcessing::kUndisturbed>(
args.dst, BitCastToUnsigned(SignedType{args.imm}), /*dst_mask=*/args.src);
}
- case Decoder::VOpIViOpcode::kVmvvi:
+ case Decoder::VOpIViOpcode::kVmvXrv:
+ // kVmv<nr>rv instruction
if constexpr (std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
- return OpVectorVmvXr<ElementType>(args.dst, args.src, static_cast<uint8_t>(args.imm));
+ switch (args.imm) {
+ case 0:
+ return OpVectorVmvXrv<ElementType, 1>(args.dst, args.src);
+ case 1:
+ return OpVectorVmvXrv<ElementType, 2>(args.dst, args.src);
+ case 3:
+ return OpVectorVmvXrv<ElementType, 4>(args.dst, args.src);
+ case 7:
+ return OpVectorVmvXrv<ElementType, 8>(args.dst, args.src);
+ default:
+ return Unimplemented();
+ }
} else {
return Unimplemented();
}
@@ -1466,89 +1691,101 @@ class Interpreter {
case Decoder::VOpMVvOpcode::kVredmaxvs:
return OpVectorvs<intrinsics::Vredmaxvs<SignedType>, SignedType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
- case Decoder::VOpMVvOpcode::kVXmXXs:
- switch (args.vXmXXs_opcode) {
- case Decoder::VXmXXsOpcode::kVmvxs:
+ case Decoder::VOpMVvOpcode::kVWXUnary0:
+ switch (args.vwxunary0_opcode) {
+ case Decoder::VWXUnary0Opcode::kVmvxs:
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return Unimplemented();
}
return OpVectorVmvxs<SignedType>(args.dst, args.src1);
- case Decoder::VXmXXsOpcode::kVcpopm:
- return OpVectorVXmXXs<intrinsics::Vcpopm<Int128>, vma>(args.dst, args.src1);
- case Decoder::VXmXXsOpcode::kVfirstm:
- return OpVectorVXmXXs<intrinsics::Vfirstm<Int128>, vma>(args.dst, args.src1);
+ case Decoder::VWXUnary0Opcode::kVcpopm:
+ return OpVectorVWXUnary0<intrinsics::Vcpopm<Int128>, vma>(args.dst, args.src1);
+ case Decoder::VWXUnary0Opcode::kVfirstm:
+ return OpVectorVWXUnary0<intrinsics::Vfirstm<Int128>, vma>(args.dst, args.src1);
default:
- return Unimplemented();
+ return Unimplemented();
}
- case Decoder::VOpMVvOpcode::kVxunary0:
+ case Decoder::VOpMVvOpcode::kVFUnary0:
switch (args.vxunary0_opcode) {
- case Decoder::Vxunary0Opcode::kVzextvf2m:
- if constexpr (sizeof(UnsignedType) >= 2) {
- return OpVectorExtend<intrinsics::Vextf2<UnsignedType>,
- UnsignedType,
- 2,
- vlmul,
- vta,
- vma>(args.dst, args.src1);
- }
- break;
- case Decoder::Vxunary0Opcode::kVsextvf2m:
- if constexpr (sizeof(SignedType) >= 2) {
- return OpVectorExtend<intrinsics::Vextf2<SignedType>, SignedType, 2, vlmul, vta, vma>(
- args.dst, args.src1);
- }
- break;
- case Decoder::Vxunary0Opcode::kVzextvf4m:
- if constexpr (sizeof(UnsignedType) >= 4) {
- return OpVectorExtend<intrinsics::Vextf4<UnsignedType>,
- UnsignedType,
- 4,
- vlmul,
- vta,
- vma>(args.dst, args.src1);
- }
- break;
- case Decoder::Vxunary0Opcode::kVsextvf4m:
- if constexpr (sizeof(SignedType) >= 4) {
- return OpVectorExtend<intrinsics::Vextf4<SignedType>, SignedType, 4, vlmul, vta, vma>(
- args.dst, args.src1);
- }
- break;
- case Decoder::Vxunary0Opcode::kVzextvf8m:
- if constexpr (sizeof(UnsignedType) >= 8) {
- return OpVectorExtend<intrinsics::Vextf8<UnsignedType>,
- UnsignedType,
- 8,
- vlmul,
- vta,
- vma>(args.dst, args.src1);
- }
- break;
- case Decoder::Vxunary0Opcode::kVsextvf8m:
- if constexpr (sizeof(SignedType) >= 8) {
- return OpVectorExtend<intrinsics::Vextf8<SignedType>, SignedType, 8, vlmul, vta, vma>(
- args.dst, args.src1);
- }
- break;
+ case Decoder::VXUnary0Opcode::kVzextvf2m:
+ if constexpr (sizeof(UnsignedType) >= 2) {
+ return OpVectorVXUnary0<intrinsics::Vextf2<UnsignedType>,
+ UnsignedType,
+ 2,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
+ case Decoder::VXUnary0Opcode::kVsextvf2m:
+ if constexpr (sizeof(SignedType) >= 2) {
+ return OpVectorVXUnary0<intrinsics::Vextf2<SignedType>,
+ SignedType,
+ 2,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
+ case Decoder::VXUnary0Opcode::kVzextvf4m:
+ if constexpr (sizeof(UnsignedType) >= 4) {
+ return OpVectorVXUnary0<intrinsics::Vextf4<UnsignedType>,
+ UnsignedType,
+ 4,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
+ case Decoder::VXUnary0Opcode::kVsextvf4m:
+ if constexpr (sizeof(SignedType) >= 4) {
+ return OpVectorVXUnary0<intrinsics::Vextf4<SignedType>,
+ SignedType,
+ 4,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
+ case Decoder::VXUnary0Opcode::kVzextvf8m:
+ if constexpr (sizeof(UnsignedType) >= 8) {
+ return OpVectorVXUnary0<intrinsics::Vextf8<UnsignedType>,
+ UnsignedType,
+ 8,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
+ case Decoder::VXUnary0Opcode::kVsextvf8m:
+ if constexpr (sizeof(SignedType) >= 8) {
+ return OpVectorVXUnary0<intrinsics::Vextf8<SignedType>,
+ SignedType,
+ 8,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ }
+ break;
default:
- return Unimplemented();
+ return Unimplemented();
}
return Unimplemented();
- case Decoder::VOpMVvOpcode::kVmsXf:
- switch (args.vmsXf_opcode) {
- case Decoder::VmsXfOpcode::kVmsbfm:
- return OpVectorVmsXf<intrinsics::Vmsbfm<>, vma>(args.dst, args.src1);
- case Decoder::VmsXfOpcode::kVmsofm:
- return OpVectorVmsXf<intrinsics::Vmsofm<>, vma>(args.dst, args.src1);
- case Decoder::VmsXfOpcode::kVmsifm:
- return OpVectorVmsXf<intrinsics::Vmsifm<>, vma>(args.dst, args.src1);
- case Decoder::VmsXfOpcode::kVidv:
- if (args.src1) {
- return Unimplemented();
- }
- return OpVectorVidv<ElementType, vlmul, vta, vma>(args.dst);
- default:
+ case Decoder::VOpMVvOpcode::kVMUnary0:
+ switch (args.vmunary0_opcode) {
+ case Decoder::VMUnary0Opcode::kVmsbfm:
+ return OpVectorVMUnary0<intrinsics::Vmsbfm<>, vma>(args.dst, args.src1);
+ case Decoder::VMUnary0Opcode::kVmsofm:
+ return OpVectorVMUnary0<intrinsics::Vmsofm<>, vma>(args.dst, args.src1);
+ case Decoder::VMUnary0Opcode::kVmsifm:
+ return OpVectorVMUnary0<intrinsics::Vmsifm<>, vma>(args.dst, args.src1);
+ case Decoder::VMUnary0Opcode::kVidv:
+ if (args.src1) {
return Unimplemented();
+ }
+ return OpVectorVidv<ElementType, vlmul, vta, vma>(args.dst);
+ default:
+ return Unimplemented();
}
case Decoder::VOpMVvOpcode::kVmaddvv:
return OpVectorvvv<intrinsics::Vmaddvv<ElementType>, ElementType, vlmul, vta, vma>(
@@ -1709,15 +1946,15 @@ class Interpreter {
using SignedType = berberis::SignedType<ElementType>;
using UnsignedType = berberis::UnsignedType<ElementType>;
switch (args.opcode) {
- case Decoder::VOpMVxOpcode::kVXmXXx:
- switch (args.vXmXXx_opcode) {
- case Decoder::VXmXXxOpcode::kVmvsx:
- if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
- return Unimplemented();
- }
- return OpVectorVmvsx<SignedType, vta>(args.dst, MaybeTruncateTo<SignedType>(arg2));
- default:
+ case Decoder::VOpMVxOpcode::kVRXUnary0:
+ switch (args.vrxunary0_opcode) {
+ case Decoder::VRXUnary0Opcode::kVmvsx:
+ if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return Unimplemented();
+ }
+ return OpVectorVmvsx<SignedType, vta>(args.dst, MaybeTruncateTo<SignedType>(arg2));
+ default:
+ return Unimplemented();
}
case Decoder::VOpMVxOpcode::kVmaddvx:
return OpVectorvxv<intrinsics::Vmaddvx<ElementType>, ElementType, vlmul, vta, vma>(
@@ -1751,7 +1988,7 @@ class Interpreter {
template <typename DataElementType,
VectorRegisterGroupMultiplier vlmul,
typename IndexElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kIndexRegistersInvolved,
TailProcessing vta,
auto vma>
@@ -1765,7 +2002,7 @@ class Interpreter {
}
template <typename DataElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
size_t kNumRegistersInGroup,
typename IndexElementType,
size_t kIndexRegistersInvolved,
@@ -1774,7 +2011,7 @@ class Interpreter {
if (!IsAligned<kIndexRegistersInvolved>(args.idx)) {
return Unimplemented();
}
- constexpr int kElementsCount =
+ constexpr size_t kElementsCount =
static_cast<int>(sizeof(SIMD128Register) / sizeof(IndexElementType));
alignas(alignof(SIMD128Register))
IndexElementType indexes[kElementsCount * kIndexRegistersInvolved];
@@ -1784,7 +2021,7 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma>
@@ -1797,29 +2034,28 @@ class Interpreter {
}
template <typename ElementType,
- int kSegmentSize,
+ size_t kSegmentSize,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma>
void OpVector(const Decoder::VStoreUnitStrideArgs& args, Register src) {
switch (args.opcode) {
- case Decoder::VStoreUnitStrideOpcode::kVseXX:
+ case Decoder::VSUmOpOpcode::kVseXX:
return OpVectorStore<ElementType,
kSegmentSize,
NumberOfRegistersInvolved(vlmul),
!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>,
- Decoder::VStoreUnitStrideOpcode::kVseXX>(
- args.data, src, [](size_t index) {
- return kSegmentSize * sizeof(ElementType) * index;
- });
- case Decoder::VStoreUnitStrideOpcode::kVsm:
+ Decoder::VSUmOpOpcode::kVseXX>(args.data, src, [](size_t index) {
+ return kSegmentSize * sizeof(ElementType) * index;
+ });
+ case Decoder::VSUmOpOpcode::kVsm:
if constexpr (kSegmentSize == 1 &&
std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return OpVectorStore<UInt8,
1,
1,
/*kUseMasking=*/false,
- Decoder::VStoreUnitStrideOpcode::kVsm>(
+ Decoder::VSUmOpOpcode::kVsm>(
args.data, src, [](size_t index) { return index; });
}
return Unimplemented();
@@ -1830,13 +2066,12 @@ class Interpreter {
// Look for VLoadStrideArgs for explanation about semantics: VStoreStrideArgs is almost symmetric,
// except it ignores vta and vma modes and never alters inactive elements in memory.
- template <
- typename ElementType,
- int kSegmentSize,
- size_t kNumRegistersInGroup,
- bool kUseMasking,
- typename Decoder::VStoreUnitStrideOpcode opcode = typename Decoder::VStoreUnitStrideOpcode{},
- typename GetElementOffsetLambdaType>
+ template <typename ElementType,
+ size_t kSegmentSize,
+ size_t kNumRegistersInGroup,
+ bool kUseMasking,
+ typename Decoder::VSUmOpOpcode opcode = typename Decoder::VSUmOpOpcode{},
+ typename GetElementOffsetLambdaType>
void OpVectorStore(uint8_t data, Register src, GetElementOffsetLambdaType GetElementOffset) {
using MaskType = std::conditional_t<sizeof(ElementType) == sizeof(Int8), UInt16, UInt8>;
if (!IsAligned<kNumRegistersInGroup>(data)) {
@@ -1845,10 +2080,10 @@ class Interpreter {
if (data + kNumRegistersInGroup * kSegmentSize > 32) {
return Unimplemented();
}
- constexpr int kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
- if constexpr (opcode == Decoder::VStoreUnitStrideOpcode::kVsm) {
+ if constexpr (opcode == Decoder::VSUmOpOpcode::kVsm) {
vl = AlignUp<CHAR_BIT>(vl) / CHAR_BIT;
}
// In case of memory access fault we may set vstart to non-zero value, set it to zero here to
@@ -1892,7 +2127,7 @@ class Interpreter {
}
}
// Store segment to memory.
- for (int field = 0; field < kSegmentSize; ++field) {
+ for (size_t field = 0; field < kSegmentSize; ++field) {
bool exception_raised = FaultyStore(
ptr + field * sizeof(ElementType) + GetElementOffset(element_index),
sizeof(ElementType),
@@ -1923,10 +2158,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -1936,7 +2171,6 @@ class Interpreter {
result, std::get<0>(intrinsics::Vidv<ElementType>(index)), vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <typename ElementType>
@@ -1981,7 +2215,7 @@ class Interpreter {
}
template <auto Intrinsic, auto vma>
- void OpVectorVXmXXs(uint8_t dst, uint8_t src1) {
+ void OpVectorVWXUnary0(uint8_t dst, uint8_t src1) {
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
if (vstart != 0) [[unlikely]] {
@@ -2004,24 +2238,19 @@ class Interpreter {
void OpVectormm(uint8_t dst, uint8_t src1, uint8_t src2) {
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
- SIMD128Register arg1(state_->cpu.v[src1]);
- SIMD128Register arg2(state_->cpu.v[src2]);
- SIMD128Register result;
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
+ SIMD128Register arg1(state_->cpu.v[src1]);
+ SIMD128Register arg2(state_->cpu.v[src2]);
+ SIMD128Register result;
if (vstart > 0) [[unlikely]] {
- if (vstart >= vl) [[unlikely]] {
- result.Set(state_->cpu.v[dst]);
- } else {
- const auto [start_mask] = intrinsics::MakeBitmaskFromVl(vstart);
- result.Set(state_->cpu.v[dst]);
- result = (result & ~start_mask) | (Intrinsic(arg1, arg2) & start_mask);
- }
- SetCsr<CsrName::kVstart>(0);
+ const auto [start_mask] = intrinsics::MakeBitmaskFromVl(vstart);
+ result.Set(state_->cpu.v[dst]);
+ result = (result & ~start_mask) | (Intrinsic(arg1, arg2) & start_mask);
} else {
result = Intrinsic(arg1, arg2);
}
@@ -2031,7 +2260,7 @@ class Interpreter {
}
template <auto Intrinsic, auto vma>
- void OpVectorVmsXf(uint8_t dst, uint8_t src1) {
+ void OpVectorVMUnary0(uint8_t dst, uint8_t src1) {
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
if (vstart != 0) {
@@ -2063,40 +2292,36 @@ class Interpreter {
state_->cpu.v[dst] = result.Get<__uint128_t>();
}
- template <typename ElementType>
- void OpVectorVmvXr(uint8_t dst, uint8_t src, uint8_t nf) {
- if (!IsPowerOf2(nf + 1)) {
- return Unimplemented();
- }
- if (((dst | src) & nf) != 0) {
+ template <typename ElementType, size_t kRegistersInvolved>
+ void OpVectorVmvXrv(uint8_t dst, uint8_t src) {
+ if (!IsAligned<kRegistersInvolved>(dst | src)) {
return Unimplemented();
}
+ constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
size_t vstart = GetCsr<CsrName::kVstart>();
+ SetCsr<CsrName::kVstart>(0);
+ // The usual property that no elements are written if vstart >= vl does not apply to these
+ // instructions. Instead, no elements are written if vstart >= evl.
+ if (vstart >= kElementsCount * kRegistersInvolved) [[unlikely]] {
+ return;
+ }
if (vstart == 0) [[likely]] {
- for (int index = 0; index <= nf; ++index) {
+ for (size_t index = 0; index < kRegistersInvolved; ++index) {
state_->cpu.v[dst + index] = state_->cpu.v[src + index];
}
return;
}
- constexpr int kElementsCount = static_cast<int>(16 / sizeof(ElementType));
- for (int index = 0; index <= nf; ++index) {
- if (vstart >= kElementsCount) {
- vstart -= kElementsCount;
- continue;
- }
- if (vstart == 0) [[likely]] {
- state_->cpu.v[dst + index] = state_->cpu.v[src + index];
- } else {
- SIMD128Register destination{state_->cpu.v[dst + index]};
- SIMD128Register source{state_->cpu.v[src + index]};
- for (int element_index = vstart; element_index < kElementsCount; ++element_index) {
- destination.Set(source.Get<ElementType>(element_index), element_index);
- }
- state_->cpu.v[dst + index] = destination.Get<__uint128_t>();
- vstart = 0;
- }
+ size_t index = vstart / kElementsCount;
+ SIMD128Register destination{state_->cpu.v[dst + index]};
+ SIMD128Register source{state_->cpu.v[src + index]};
+ for (size_t element_index = vstart % kElementsCount; element_index < kElementsCount;
+ ++element_index) {
+ destination.Set(source.Get<ElementType>(element_index), element_index);
+ }
+ state_->cpu.v[dst + index] = destination.Get<__uint128_t>();
+ for (index++; index < kRegistersInvolved; ++index) {
+ state_->cpu.v[dst + index] = state_->cpu.v[src + index];
}
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic, typename ElementType, VectorRegisterGroupMultiplier vlmul, auto vma>
@@ -2113,12 +2338,12 @@ class Interpreter {
SIMD128Register original_result(state_->cpu.v[dst]);
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
SIMD128Register result_before_vl_masking;
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
result_before_vl_masking = original_result;
- SetCsr<CsrName::kVstart>(0);
} else {
result_before_vl_masking =
CollectBitmaskResult<ElementType, kRegistersInvolved>([this, src1, src2](auto index) {
@@ -2138,7 +2363,6 @@ class Interpreter {
const auto [start_mask] = intrinsics::MakeBitmaskFromVl(vstart);
result_before_vl_masking =
(original_result & ~start_mask) | (result_before_vl_masking & start_mask);
- SetCsr<CsrName::kVstart>(0);
}
}
const auto [tail_mask] = intrinsics::MakeBitmaskFromVl(vl);
@@ -2159,12 +2383,12 @@ class Interpreter {
SIMD128Register original_result(state_->cpu.v[dst]);
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
SIMD128Register result_before_vl_masking;
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
result_before_vl_masking = original_result;
- SetCsr<CsrName::kVstart>(0);
} else {
result_before_vl_masking =
CollectBitmaskResult<ElementType, kRegistersInvolved>([this, src1, arg2](auto index) {
@@ -2183,7 +2407,6 @@ class Interpreter {
const auto [start_mask] = intrinsics::MakeBitmaskFromVl(vstart);
result_before_vl_masking =
(original_result & ~start_mask) | (result_before_vl_masking & start_mask);
- SetCsr<CsrName::kVstart>(0);
}
}
const auto [tail_mask] = intrinsics::MakeBitmaskFromVl(vl);
@@ -2194,21 +2417,16 @@ class Interpreter {
typename ElementType,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
- auto vma>
- void OpVectorvs(uint8_t dst, uint8_t src1, uint8_t src2) {
- return OpVectorvs<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(
- dst, src1, src2);
- }
-
- template <auto Intrinsic,
- typename ElementType,
- VectorRegisterGroupMultiplier vlmul,
- TailProcessing vta,
auto vma,
+ CsrName... kExtraCsrs,
typename... DstMaskType>
void OpVectorv(uint8_t dst, uint8_t src1, DstMaskType... dst_mask) {
- return OpVectorv<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(
- dst, src1, dst_mask...);
+ return OpVectorv<Intrinsic,
+ ElementType,
+ NumberOfRegistersInvolved(vlmul),
+ vta,
+ vma,
+ kExtraCsrs...>(dst, src1, dst_mask...);
}
template <auto Intrinsic,
@@ -2216,6 +2434,7 @@ class Interpreter {
size_t kRegistersInvolved,
TailProcessing vta,
auto vma,
+ CsrName... kExtraCsrs,
typename... DstMaskType>
void OpVectorv(uint8_t dst, uint8_t src, DstMaskType... dst_mask) {
static_assert(sizeof...(dst_mask) <= 1);
@@ -2224,6 +2443,12 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
auto mask = GetMaskForVectorOperations<vma>();
for (size_t index = 0; index < kRegistersInvolved; ++index) {
SIMD128Register result{state_->cpu.v[dst + index]};
@@ -2235,11 +2460,26 @@ class Interpreter {
result_mask.Set(state_->cpu.v[dst_mask_unpacked[0] + index]);
}
SIMD128Register arg{state_->cpu.v[src + index]};
- result = VectorMasking<ElementType, vta, vma>(
- result, std::get<0>(Intrinsic(arg)), result_mask, vstart, vl, index, mask);
+ result =
+ VectorMasking<ElementType, vta, vma>(result,
+ std::get<0>(Intrinsic(GetCsr<kExtraCsrs>()..., arg)),
+ result_mask,
+ vstart,
+ vl,
+ index,
+ mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
+ }
+
+ template <auto Intrinsic,
+ typename ElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorvs(uint8_t dst, uint8_t src1, uint8_t src2) {
+ return OpVectorvs<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(
+ dst, src1, src2);
}
template <auto Intrinsic,
@@ -2256,6 +2496,7 @@ class Interpreter {
if (vstart != 0) {
return Unimplemented();
}
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vl == 0) [[unlikely]] {
@@ -2274,7 +2515,7 @@ class Interpreter {
element_index += MaskType{1}) {
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
if ((MaskType{mask_bits} & (MaskType{1} << element_index)) == MaskType{0}) {
- continue;
+ continue;
}
}
result = std::get<0>(Intrinsic(arg1, arg2.Get<ElementType>(element_index)));
@@ -2285,7 +2526,6 @@ class Interpreter {
result.Set(arg1, 0);
result = std::get<0>(intrinsics::VectorMasking<ElementType, vta>(result, result, 0, 1));
state_->cpu.v[dst] = result.Get<__uint128_t>();
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic,
@@ -2309,6 +2549,12 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
auto mask = GetMaskForVectorOperations<vma>();
for (size_t index = 0; index < kRegistersInvolved; ++index) {
SIMD128Register result{state_->cpu.v[dst + index]};
@@ -2318,7 +2564,6 @@ class Interpreter {
result, std::get<0>(Intrinsic(arg1, arg2)), vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic,
@@ -2342,10 +2587,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2357,7 +2602,59 @@ class Interpreter {
result, std::get<0>(Intrinsic(arg1, arg2, result)), vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
+ }
+
+ template <auto Intrinsic,
+ typename TargetElementType,
+ typename SourceElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorWidenvr(uint8_t dst, uint8_t src) {
+ return OpVectorWidenvr<Intrinsic,
+ TargetElementType,
+ SourceElementType,
+ NumRegistersInvolvedForWideOperand(vlmul),
+ NumberOfRegistersInvolved(vlmul),
+ vta,
+ vma>(dst, src);
+ }
+
+ template <auto Intrinsic,
+ typename TargetElementType,
+ typename SourceElementType,
+ size_t kDestRegistersInvolved,
+ size_t kRegistersInvolved,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorWidenvr(uint8_t dst, uint8_t src) {
+ if (!IsAligned<kDestRegistersInvolved>(dst) || !IsAligned<kRegistersInvolved>(src)) {
+ return Unimplemented();
+ }
+ size_t vstart = GetCsr<CsrName::kVstart>();
+ size_t vl = GetCsr<CsrName::kVl>();
SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
+ int8_t frm = GetCsr<CsrName::kFrm>();
+ auto mask = GetMaskForVectorOperations<vma>();
+ for (size_t index = 0; index < kRegistersInvolved; ++index) {
+ SIMD128Register result(state_->cpu.v[dst + 2 * index]);
+ SIMD128Register arg(state_->cpu.v[src + index]);
+ result = VectorMasking<TargetElementType, vta, vma>(
+ result, std::get<0>(Intrinsic(frm, arg)), vstart, vl, 2 * index, mask);
+ state_->cpu.v[dst + 2 * index] = result.Get<__uint128_t>();
+ if constexpr (kDestRegistersInvolved > 1) { // if lmul is one full register or more
+ result.Set(state_->cpu.v[dst + 2 * index + 1]);
+ std::tie(arg) = intrinsics::VMovTopHalfToBottom<SourceElementType>(arg);
+ result = VectorMasking<TargetElementType, vta, vma>(
+ result, std::get<0>(Intrinsic(frm, arg)), vstart, vl, 2 * index + 1, mask);
+ state_->cpu.v[dst + 2 * index + 1] = result.Get<__uint128_t>();
+ }
+ }
}
// 2*SEW = SEW op SEW
@@ -2378,7 +2675,7 @@ class Interpreter {
template <auto Intrinsic,
typename ElementType,
- int kDestRegistersInvolved,
+ size_t kDestRegistersInvolved,
size_t kRegistersInvolved,
TailProcessing vta,
auto vma>
@@ -2388,10 +2685,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2411,7 +2708,6 @@ class Interpreter {
state_->cpu.v[dst + 2 * index + 1] = result.Get<__uint128_t>();
}
}
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic,
@@ -2436,10 +2732,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2450,7 +2746,60 @@ class Interpreter {
result, std::get<0>(Intrinsic(arg1, arg2)), vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
+ }
+
+ template <auto Intrinsic,
+ typename TargetElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorNarrowwr(uint8_t dst, uint8_t src) {
+ return OpVectorNarrowwr<Intrinsic,
+ TargetElementType,
+ NumberOfRegistersInvolved(vlmul),
+ NumRegistersInvolvedForWideOperand(vlmul),
+ vta,
+ vma>(dst, src);
+ }
+
+ template <auto Intrinsic,
+ typename TargetElementType,
+ size_t kDestRegistersInvolved,
+ size_t kSrcRegistersInvolved,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorNarrowwr(uint8_t dst, uint8_t src) {
+ if constexpr (kDestRegistersInvolved == kSrcRegistersInvolved) {
+ if (!IsAligned<kDestRegistersInvolved>(dst | src)) {
+ return Unimplemented();
+ }
+ } else if (!IsAligned<kDestRegistersInvolved>(dst) || !IsAligned<kSrcRegistersInvolved>(src)) {
+ return Unimplemented();
+ }
+ size_t vstart = GetCsr<CsrName::kVstart>();
+ size_t vl = GetCsr<CsrName::kVl>();
SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
+ int8_t frm = GetCsr<CsrName::kFrm>();
+ auto mask = GetMaskForVectorOperations<vma>();
+ for (size_t index = 0; index < kDestRegistersInvolved; index++) {
+ SIMD128Register orig_result(state_->cpu.v[dst + index]);
+ SIMD128Register arg_low(state_->cpu.v[src + 2 * index]);
+ SIMD128Register intrinsic_result = std::get<0>(Intrinsic(frm, arg_low));
+ if constexpr (kSrcRegistersInvolved > 1) {
+ SIMD128Register arg_high(state_->cpu.v[src + 2 * index + 1]);
+ SIMD128Register result_high = std::get<0>(Intrinsic(frm, arg_high));
+ intrinsic_result = std::get<0>(
+ intrinsics::VMergeBottomHalfToTop<TargetElementType>(intrinsic_result, result_high));
+ }
+ auto result = VectorMasking<TargetElementType, vta, vma>(
+ orig_result, intrinsic_result, vstart, vl, index, mask);
+ state_->cpu.v[dst + index] = result.template Get<__uint128_t>();
+ }
}
// SEW = 2*SEW op SEW
@@ -2470,8 +2819,8 @@ class Interpreter {
template <auto Intrinsic,
typename ElementType,
- int kDestRegistersInvolved,
- int kSrcRegistersInvolved,
+ size_t kDestRegistersInvolved,
+ size_t kSrcRegistersInvolved,
TailProcessing vta,
auto vma>
void OpVectorNarrowwx(uint8_t dst, uint8_t src1, ElementType arg2) {
@@ -2484,14 +2833,14 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
- for (int index = 0; index < kDestRegistersInvolved; index++) {
+ for (size_t index = 0; index < kDestRegistersInvolved; index++) {
SIMD128Register orig_result(state_->cpu.v[dst + index]);
SIMD128Register arg1_low(state_->cpu.v[src1 + 2 * index]);
SIMD128Register intrinsic_result = std::get<0>(Intrinsic(arg1_low, arg2));
@@ -2507,7 +2856,6 @@ class Interpreter {
orig_result, intrinsic_result, vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.template Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
// SEW = 2*SEW op SEW
@@ -2528,7 +2876,7 @@ class Interpreter {
template <auto Intrinsic,
typename ElementType,
size_t kRegistersInvolved,
- int kFirstSrcRegistersInvolved,
+ size_t kFirstSrcRegistersInvolved,
TailProcessing vta,
auto vma>
void OpVectorNarrowwv(uint8_t dst, uint8_t src1, uint8_t src2) {
@@ -2542,10 +2890,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2567,7 +2915,6 @@ class Interpreter {
orig_result, intrinsic_result, vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.template Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic,
@@ -2576,7 +2923,7 @@ class Interpreter {
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma>
- void OpVectorExtend(uint8_t dst, uint8_t src) {
+ void OpVectorVXUnary0(uint8_t dst, uint8_t src) {
static_assert(kFactor == 2 || kFactor == 4 || kFactor == 8);
constexpr size_t kDestRegistersInvolved = NumberOfRegistersInvolved(vlmul);
constexpr size_t kSourceRegistersInvolved = (kDestRegistersInvolved / kFactor) ?: 1;
@@ -2626,6 +2973,12 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
auto mask = GetMaskForVectorOperations<vma>();
for (size_t index = 0; index < kRegistersInvolved; ++index) {
SIMD128Register result(state_->cpu.v[dst + index]);
@@ -2634,7 +2987,6 @@ class Interpreter {
result, std::get<0>(Intrinsic(arg1, arg2, result)), vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <auto Intrinsic,
@@ -2661,10 +3013,10 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2681,7 +3033,6 @@ class Interpreter {
result, std::get<0>(Intrinsic(arg2)), result_mask, vstart, vl, index, mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
@@ -2702,11 +3053,11 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
if (vstart >= vl) [[unlikely]] {
// From 16.3: For all of the [slide instructions], if vstart >= vl, the
// instruction performs no operation and leaves the destination vector
// register unchanged.
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
@@ -2743,7 +3094,6 @@ class Interpreter {
mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
- SetCsr<CsrName::kVstart>(0);
}
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, auto vma>
@@ -2764,15 +3114,14 @@ class Interpreter {
}
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
if (vstart >= vl) [[unlikely]] {
// From 16.3: For all of the [slide instructions], if vstart >= vl, the
// instruction performs no operation and leaves the destination vector
// register unchanged.
- SetCsr<CsrName::kVstart>(0);
return;
}
auto mask = GetMaskForVectorOperations<vma>();
-
for (size_t index = 0; index < kRegistersInvolved; ++index) {
SIMD128Register result(state_->cpu.v[dst + index]);
@@ -2794,8 +3143,6 @@ class Interpreter {
mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
-
- SetCsr<CsrName::kVstart>(0);
}
// Helper function needed to generate bitmak result from non-bitmask inputs.
@@ -3014,32 +3361,32 @@ class Interpreter {
};
template <>
-[[nodiscard]] Interpreter::Register Interpreter::GetCsr<CsrName::kFCsr>() const {
+[[nodiscard]] Interpreter::Register inline Interpreter::GetCsr<CsrName::kFCsr>() const {
return FeGetExceptions() | (state_->cpu.frm << 5);
}
template <>
-[[nodiscard]] Interpreter::Register Interpreter::GetCsr<CsrName::kFFlags>() const {
+[[nodiscard]] Interpreter::Register inline Interpreter::GetCsr<CsrName::kFFlags>() const {
return FeGetExceptions();
}
template <>
-[[nodiscard]] Interpreter::Register Interpreter::GetCsr<CsrName::kVlenb>() const {
+[[nodiscard]] Interpreter::Register inline Interpreter::GetCsr<CsrName::kVlenb>() const {
return 16;
}
template <>
-[[nodiscard]] Interpreter::Register Interpreter::GetCsr<CsrName::kVxrm>() const {
+[[nodiscard]] Interpreter::Register inline Interpreter::GetCsr<CsrName::kVxrm>() const {
return state_->cpu.*CsrFieldAddr<CsrName::kVcsr> & 0b11;
}
template <>
-[[nodiscard]] Interpreter::Register Interpreter::GetCsr<CsrName::kVxsat>() const {
+[[nodiscard]] Interpreter::Register inline Interpreter::GetCsr<CsrName::kVxsat>() const {
return state_->cpu.*CsrFieldAddr<CsrName::kVcsr> >> 2;
}
template <>
-void Interpreter::SetCsr<CsrName::kFCsr>(Register arg) {
+void inline Interpreter::SetCsr<CsrName::kFCsr>(Register arg) {
CHECK(!exception_raised_);
FeSetExceptions(arg & 0b1'1111);
arg = (arg >> 5) & kCsrMask<CsrName::kFrm>;
@@ -3048,13 +3395,13 @@ void Interpreter::SetCsr<CsrName::kFCsr>(Register arg) {
}
template <>
-void Interpreter::SetCsr<CsrName::kFFlags>(Register arg) {
+void inline Interpreter::SetCsr<CsrName::kFFlags>(Register arg) {
CHECK(!exception_raised_);
FeSetExceptions(arg & 0b1'1111);
}
template <>
-void Interpreter::SetCsr<CsrName::kFrm>(Register arg) {
+void inline Interpreter::SetCsr<CsrName::kFrm>(Register arg) {
CHECK(!exception_raised_);
arg &= kCsrMask<CsrName::kFrm>;
state_->cpu.frm = arg;
@@ -3062,34 +3409,36 @@ void Interpreter::SetCsr<CsrName::kFrm>(Register arg) {
}
template <>
-void Interpreter::SetCsr<CsrName::kVxrm>(Register arg) {
+void inline Interpreter::SetCsr<CsrName::kVxrm>(Register arg) {
CHECK(!exception_raised_);
state_->cpu.*CsrFieldAddr<CsrName::kVcsr> =
(state_->cpu.*CsrFieldAddr<CsrName::kVcsr> & 0b100) | (arg & 0b11);
}
template <>
-void Interpreter::SetCsr<CsrName::kVxsat>(Register arg) {
+void inline Interpreter::SetCsr<CsrName::kVxsat>(Register arg) {
CHECK(!exception_raised_);
state_->cpu.*CsrFieldAddr<CsrName::kVcsr> =
(state_->cpu.*CsrFieldAddr<CsrName::kVcsr> & 0b11) | ((arg & 0b1) << 2);
}
template <>
-Interpreter::FpRegister Interpreter::GetFRegAndUnboxNan<Interpreter::Float32>(uint8_t reg) {
+[[nodiscard]] Interpreter::FpRegister inline Interpreter::GetFRegAndUnboxNan<Interpreter::Float32>(
+ uint8_t reg) {
CheckFpRegIsValid(reg);
FpRegister value = state_->cpu.f[reg];
return UnboxNan<Float32>(value);
}
template <>
-Interpreter::FpRegister Interpreter::GetFRegAndUnboxNan<Interpreter::Float64>(uint8_t reg) {
+[[nodiscard]] Interpreter::FpRegister inline Interpreter::GetFRegAndUnboxNan<Interpreter::Float64>(
+ uint8_t reg) {
CheckFpRegIsValid(reg);
return state_->cpu.f[reg];
}
template <>
-void Interpreter::NanBoxAndSetFpReg<Interpreter::Float32>(uint8_t reg, FpRegister value) {
+void inline Interpreter::NanBoxAndSetFpReg<Interpreter::Float32>(uint8_t reg, FpRegister value) {
if (exception_raised_) {
// Do not produce side effects.
return;
@@ -3099,7 +3448,7 @@ void Interpreter::NanBoxAndSetFpReg<Interpreter::Float32>(uint8_t reg, FpRegiste
}
template <>
-void Interpreter::NanBoxAndSetFpReg<Interpreter::Float64>(uint8_t reg, FpRegister value) {
+void inline Interpreter::NanBoxAndSetFpReg<Interpreter::Float64>(uint8_t reg, FpRegister value) {
if (exception_raised_) {
// Do not produce side effects.
return;
@@ -3108,20 +3457,33 @@ void Interpreter::NanBoxAndSetFpReg<Interpreter::Float64>(uint8_t reg, FpRegiste
state_->cpu.f[reg] = value;
}
-} // namespace
-
-void InitInterpreter() {
- AddFaultyMemoryAccessRecoveryCode();
-}
-
-void InterpretInsn(ThreadState* state) {
- GuestAddr pc = state->cpu.insn_addr;
-
- Interpreter interpreter(state);
- SemanticsPlayer sem_player(&interpreter);
- Decoder decoder(&sem_player);
- uint8_t insn_len = decoder.Decode(ToHostAddr<const uint16_t>(pc));
- interpreter.FinalizeInsn(insn_len);
-}
+#ifdef BERBERIS_RISCV64_INTERPRETER_SEPARATE_INSTANTIATION_OF_VECTOR_OPERATIONS
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadIndexedArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadStrideArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VLoadUnitStrideArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpFVfArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpFVvArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIViArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIVvArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpIVxArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpMVvArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VOpMVxArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreIndexedArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreStrideArgs& args);
+template <>
+extern void SemanticsPlayer<Interpreter>::OpVector(const Decoder::VStoreUnitStrideArgs& args);
+#endif
} // namespace berberis
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index aa67cfbc..ce991806 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -805,7 +805,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
// https://github.com/riscv/riscv-v-spec/pull/872
state_.cpu.vtype = BitUtilLog2(sizeof(ElementType)) << 3;
state_.cpu.vl = 0;
- constexpr int kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr int kElementsCount = static_cast<int>(sizeof(SIMD128Register) / sizeof(ElementType));
for (int vstart = 0; vstart <= kElementsCount * kNFfields; ++vstart) {
state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
state_.cpu.vstart = vstart;
@@ -817,7 +817,9 @@ class Riscv64InterpreterTest : public ::testing::Test {
for (int index = 0; index < 8; ++index) {
SIMD128Register expected_state{kVectorComparisonSource[index]};
SIMD128Register source_value{kVectorComparisonSource[index + 8]};
- if (index < kNFfields) {
+ if ((vstart < kElementsCount * kNFfields) && index < kNFfields) {
+ // The usual property that no elements are written if vstart >= vl does not apply to these
+ // instructions. Instead, no elements are written if vstart >= evl.
for (int element_index = 0; element_index < kElementsCount; ++element_index) {
if (element_index + index * kElementsCount >= vstart) {
expected_state.Set(source_value.Get<ElementType>(element_index), element_index);
@@ -929,6 +931,14 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
+ void TestVectorFloatInstruction(uint32_t insn_bytes,
+ const uint32_t (&expected_result_int32)[8][4],
+ const uint64_t (&expected_result_int64)[8][2],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kDefault>(
+ insn_bytes, source, expected_result_int32, expected_result_int64);
+ }
+
void TestVectorInstruction(uint32_t insn_bytes,
const uint8_t (&expected_result_int8)[8][16],
const uint16_t (&expected_result_int16)[8][8],
@@ -967,16 +977,46 @@ class Riscv64InterpreterTest : public ::testing::Test {
expected_result_int64);
}
+ void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes,
+ const uint32_t (&expected_result_int32)[4][4],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>(
+ insn_bytes, source, expected_result_int32);
+ }
+
+ void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes,
+ const uint16_t (&expected_result_int16)[4][8],
+ const uint32_t (&expected_result_int32)[4][4],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>(
+ insn_bytes, source, expected_result_int16, expected_result_int32);
+ }
+
void TestNarrowingVectorInstruction(uint32_t insn_bytes,
- const uint8_t (&expected_result_int8)[8][16],
- const uint16_t (&expected_result_int16)[8][8],
- const uint32_t (&expected_result_int32)[8][4],
+ const uint8_t (&expected_result_int8)[4][16],
+ const uint16_t (&expected_result_int16)[4][8],
+ const uint32_t (&expected_result_int32)[4][4],
const __v2du (&source)[16]) {
TestVectorInstruction<TestVectorInstructionKind::kInteger,
TestVectorInstructionMode::kNarrowing>(
insn_bytes, source, expected_result_int8, expected_result_int16, expected_result_int32);
}
+ void TestWideningVectorFloatInstruction(uint32_t insn_bytes,
+ const uint64_t (&expected_result_int64)[8][2],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kWidening>(
+ insn_bytes, source, expected_result_int64);
+ }
+
+ void TestWideningVectorFloatInstruction(uint32_t insn_bytes,
+ const uint32_t (&expected_result_int32)[8][4],
+ const uint64_t (&expected_result_int64)[8][2],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kWidening>(
+ insn_bytes, source, expected_result_int32, expected_result_int64);
+ }
+
void TestWideningVectorInstruction(uint32_t insn_bytes,
const uint16_t (&expected_result_int16)[8][8],
const uint32_t (&expected_result_int32)[8][4],
@@ -993,10 +1033,12 @@ class Riscv64InterpreterTest : public ::testing::Test {
template <TestVectorInstructionKind kTestVectorInstructionKind,
TestVectorInstructionMode kTestVectorInstructionMode,
typename... ElementType,
+ size_t... kResultsCount,
size_t... kElementCount>
- void TestVectorInstruction(uint32_t insn_bytes,
- const __v2du (&source)[16],
- const ElementType (&... expected_result)[8][kElementCount]) {
+ void TestVectorInstruction(
+ uint32_t insn_bytes,
+ const __v2du (&source)[16],
+ const ElementType (&... expected_result)[kResultsCount][kElementCount]) {
auto Verify = [this, &source](uint32_t insn_bytes,
uint8_t vsew,
uint8_t vlmul_max,
@@ -1895,6 +1937,265 @@ TEST_F(Riscv64InterpreterTest, TestVmXr) {
TestVmvXr<8>(0x9f03b457); // Vmv8r.v v8, v16
}
+TEST_F(Riscv64InterpreterTest, TestVfcvtxfv) {
+ TestVectorFloatInstruction(0x49801457, // Vfcvt.xu.f.v v8, v24, v0.t
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestVectorFloatInstruction(0x49809457, // Vfcvt.x.f.v v8, v24, v0.t
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0xffff'cacf, 0xc8cd'6a00},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff},
+ {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestVectorFloatInstruction(0x49811457, // Vfcvt.f.xu.v v8, v24, v0.t
+ {{0x4f16'0492, 0x4f1e'0c9a, 0x4f06'1482, 0x4f0e'1c8a},
+ {0x4f36'24b2, 0x4f3e'2cba, 0x4f26'34a2, 0x4f2e'3caa},
+ {0x4f56'44d2, 0x4f5e'4cda, 0x4f46'54c2, 0x4f4e'5cca},
+ {0x4f76'64f2, 0x4f7e'6cfa, 0x4f66'74e2, 0x4f6e'7cea},
+ {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa},
+ {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab},
+ {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896},
+ {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}},
+ {{0x43e3'c193'4132'c092, 0x43e1'c391'4310'c290},
+ {0x43e7'c597'4536'c496, 0x43e5'c795'4714'c694},
+ {0x43eb'c99b'493a'c89a, 0x43e9'cb99'4b18'ca98},
+ {0x43ef'cd9f'4d3e'cc9e, 0x43ed'cf9d'4f1c'ce9c},
+ {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805},
+ {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11},
+ {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511},
+ {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}},
+ kVectorCalculationsSource);
+ TestVectorFloatInstruction(0x49819457, // Vfcvt.f.x.v v8, v24, v0.t
+ {{0xced3'f6dc, 0xcec3'e6cc, 0xcef3'd6fc, 0xcee3'c6ec},
+ {0xce93'b69c, 0xce83'a68c, 0xceb3'96bc, 0xcea3'86ac},
+ {0xce26'ecb7, 0xce06'cc97, 0xce66'acf7, 0xce46'8cd7},
+ {0xcd19'b0da, 0xcbc9'82cc, 0xcdcc'58ec, 0xcd8c'18ac},
+ {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa},
+ {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab},
+ {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896},
+ {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}},
+ {{0xc3d8'7cd9'7d9a'7edc, 0xc3dc'78dd'79de'7adf},
+ {0xc3d0'74d1'7592'76d3, 0xc3d4'70d5'71d6'72d7},
+ {0xc3c0'd992'db14'dd97, 0xc3c8'd19a'd39c'd59f},
+ {0xc379'3059'6099'b0da, 0xc3b1'8315'8719'8b1e},
+ {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805},
+ {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11},
+ {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511},
+ {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}},
+ kVectorCalculationsSource);
+ TestVectorFloatInstruction(0x49831457, // Vfcvt.rtz.xu.f.v v8, v24, v0.t
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestVectorFloatInstruction(0x49839457, // Vfcvt.rtz.x.f.v v8, v24, v0.t
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0xffff'cad0, 0xc8cd'6a00},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff},
+ {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49c41457, // Vfwcvt.xu.f.v v8, v28, v0.t
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'6229'6000'0000, 0x662d'6480'0000'0000},
+ {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49849457, // Vfwcvt.x.f.v v8, v24, v0.t
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000},
+ {0xffff'ffff'ffff'cacf, 0xffff'ffff'c8cd'6a00},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49861457, // Vfwcvt.f.f.v v8, v24, v0.t
+ {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000},
+ {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000},
+ {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000},
+ {0xbcc6'9446'2000'0000, 0xbdc7'9547'0000'0000},
+ {0xc2c8'9a48'0000'0000, 0xc3c9'9b49'2000'0000},
+ {0xc0ca'984a'2000'0000, 0xc1cb'994b'0000'0000},
+ {0xc6cc'9e4c'0000'0000, 0xc7cd'9f4d'2000'0000},
+ {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49851457, // Vfwcvt.f.xu.v v8, v24, v0.t
+ {{0x4712'0000, 0x4716'0400, 0x471a'0900, 0x471e'0c00},
+ {0x4702'1100, 0x4706'1400, 0x470a'1800, 0x470e'1c00},
+ {0x4732'2000, 0x4736'2400, 0x473a'2900, 0x473e'2c00},
+ {0x4722'3100, 0x4726'3400, 0x472a'3800, 0x472e'3c00},
+ {0x4752'4000, 0x4756'4400, 0x475a'4900, 0x475e'4c00},
+ {0x4742'5100, 0x4746'5400, 0x474a'5800, 0x474e'5c00},
+ {0x4772'6000, 0x4776'6400, 0x477a'6900, 0x477e'6c00},
+ {0x4762'7100, 0x4766'7400, 0x476a'7800, 0x476e'7c00}},
+ {{0x41e2'c092'4000'0000, 0x41e3'c193'4120'0000},
+ {0x41e0'c290'4220'0000, 0x41e1'c391'4300'0000},
+ {0x41e6'c496'4400'0000, 0x41e7'c597'4520'0000},
+ {0x41e4'c694'4620'0000, 0x41e5'c795'4700'0000},
+ {0x41ea'c89a'4800'0000, 0x41eb'c99b'4920'0000},
+ {0x41e8'ca98'4a20'0000, 0x41e9'cb99'4b00'0000},
+ {0x41ee'cc9e'4c00'0000, 0x41ef'cd9f'4d20'0000},
+ {0x41ec'ce9c'4e20'0000, 0x41ed'cf9d'4f00'0000}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49859457, // Vfwcvt.f.x.v v8, v24, v0.t
+ {{0xc6dc'0000, 0xc6d3'f800, 0xc6cb'ee00, 0xc6c3'e800},
+ {0xc6fb'de00, 0xc6f3'd800, 0xc6eb'd000, 0xc6e3'c800},
+ {0xc69b'c000, 0xc693'b800, 0xc68b'ae00, 0xc683'a800},
+ {0xc6bb'9e00, 0xc6b3'9800, 0xc6ab'9000, 0xc6a3'8800},
+ {0xc637'0000, 0xc626'f000, 0xc616'dc00, 0xc606'd000},
+ {0xc676'bc00, 0xc666'b000, 0xc656'a000, 0xc646'9000},
+ {0xc55a'0000, 0xc519'c000, 0xc4b2'e000, 0xc3ca'0000},
+ {0xc5ec'7800, 0xc5cc'6000, 0xc5ac'4000, 0xc58c'2000}},
+ {{0xc1da'7edb'8000'0000, 0xc1d8'7cd9'7dc0'0000},
+ {0xc1de'7adf'7bc0'0000, 0xc1dc'78dd'7a00'0000},
+ {0xc1d2'76d3'7800'0000, 0xc1d0'74d1'75c0'0000},
+ {0xc1d6'72d7'73c0'0000, 0xc1d4'70d5'7200'0000},
+ {0xc1c4'dd96'e000'0000, 0xc1c0'd992'db80'0000},
+ {0xc1cc'd59e'd780'0000, 0xc1c8'd19a'd400'0000},
+ {0xc1a3'361b'4000'0000, 0xc179'3059'7000'0000},
+ {0xc1b9'8b1d'8f00'0000, 0xc1b1'8315'8800'0000}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49c71457, // Vfwcvt.rtz.xu.f.v v8, v28, v0.t
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'6229'6000'0000, 0x662d'6480'0000'0000},
+ {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
+ {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0x49879457, // Vfwcvt.rtz.x.f.v v8, v24, v0.t
+ {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0x0000'0000'0000'0000, 0x0000'0000'0000'0000},
+ {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000},
+ {0xffff'ffff'ffff'cad0, 0xffff'ffff'c8cd'6a00},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x49881457, // Vfncvt.xu.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x49889457, // Vfncvt.x.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x8000, 0x8000, 0xcacf, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x498a1457, // Vfncvt.f.f.w v8, v24, v0.t
+ {{0x8000'0000, 0x8000'0000, 0xb165'd14e, 0x8000'0000},
+ {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000},
+ {0x0000'0000, 0x0000'0000, 0x3561'd54a, 0x0000'0000},
+ {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x49891457, // Vfncvt.f.xu.w v8, v24, v0.t
+ {{0x5f1e'0c9a, 0x5f0e'1c8a, 0x5f3e'2cba, 0x5f2e'3caa},
+ {0x5f5e'4cda, 0x5f4e'5cca, 0x5f7e'6cfa, 0x5f6e'7cea},
+ {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab},
+ {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x49899457, // Vfncvt.f.x.w v8, v24, v0.t
+ {{0xdec3'e6cc, 0xdee3'c6ec, 0xde83'a68c, 0xdea3'86ac},
+ {0xde06'cc97, 0xde46'8cd7, 0xdbc9'82cb, 0xdd8c'18ac},
+ {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab},
+ {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x498b1457, // Vfncvt.rtz.xu.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x498b9457, // Vfncvt.rtz.x.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x8000, 0x8000, 0xcad0, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ kVectorCalculationsSource);
+}
+
TEST_F(Riscv64InterpreterTest, TestVfmvfs) {
TestVfmvfs<intrinsics::Float32>(0x428010d7, 0xffff'ffff'8302'8100); // Vfmv.f.s f1, v8
TestVfmvfs<intrinsics::Float64>(0x428010d7, 0x8706'8504'8302'8100); // Vfmv.f.s f1, v8
@@ -6886,6 +7187,24 @@ TEST_F(Riscv64InterpreterTest, TestVmin) {
{0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa},
{0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}},
kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x1100d457, // vfmin.vf v8, v16, f1, v0.t
+ {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa},
+ {0xbbbb'bbbb, 0xbbbb'bbbb, 0xaaaa'aaaa, 0xaaaa'aaaa},
+ {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111},
+ {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb},
+ {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}},
+ {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa},
+ {0xbbbb'bbbb'bbbb'bbbb, 0xaaaa'aaaa'aaaa'aaaa},
+ {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111},
+ {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb},
+ {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}},
+ kVectorComparisonSource);
}
TEST_F(Riscv64InterpreterTest, TestVmaxu) {
@@ -7032,6 +7351,24 @@ TEST_F(Riscv64InterpreterTest, TestVmax) {
{0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
{0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}},
kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x1900d457, // vfmax.vf v8, v16, f1, v0.t
+ {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000},
+ {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000},
+ {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000},
+ {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000},
+ {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000},
+ {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}},
+ {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
+ {0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
+ {0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
+ {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000},
+ {0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
+ {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}},
+ kVectorComparisonSource);
}
TEST_F(Riscv64InterpreterTest, TestVredsum) {
diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h b/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h
index c62ffbae..e4e705bf 100644
--- a/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h
+++ b/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h
@@ -95,9 +95,40 @@ std::tuple<TargetOperandType> FCvtFloatToInteger(int8_t rm, int8_t frm, SourceOp
std::is_same_v<Float64, SourceOperandType>);
static_assert(std::is_integral_v<TargetOperandType>);
int8_t actual_rm = rm == FPFlags::DYN ? frm : rm;
- TargetOperandType result =
- static_cast<TargetOperandType>(FPRound(arg, ToIntrinsicRoundingMode(actual_rm)));
- return static_cast<std::make_signed_t<TargetOperandType>>(result);
+ SourceOperandType result = FPRound(arg, ToIntrinsicRoundingMode(actual_rm));
+ if constexpr (std::is_signed_v<TargetOperandType>) {
+ // Note: because of how two's complement numbers and floats work minimum negative number always
+ // either representable precisely or not prepresentable at all, but this is not true for minimal
+ // possible value.
+ // Use ~min() to guarantee no surprises with rounding.
+ constexpr float kMinInBoundsNegativeValue =
+ static_cast<float>(std::numeric_limits<TargetOperandType>::min());
+ constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(-kMinInBoundsNegativeValue);
+ if (result < SourceOperandType{kMinInBoundsNegativeValue}) [[unlikely]] {
+ return std::numeric_limits<TargetOperandType>::min();
+ }
+ // Note: we have to ensure that NaN is properly handled by this comparison!
+ if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
+ return static_cast<TargetOperandType>(result);
+ }
+ } else {
+ // Note: if value is less than zero then result of conversion from float/double to unsigned
+ // integer is undefined and thus clang/gcc happily use conversion cvttss2si without doing
+ // anything to handle negative numbers. We need to handle that corner case here.
+ if (result < SourceOperandType{0.0f}) [[unlikely]] {
+ return 0;
+ }
+ // Similarly to signed interners case above, have to use -2.0f * min to properly handle NaNs.
+ constexpr float kMinNotInBoundsPositiveValue = static_cast<float>(
+ -2.0f *
+ static_cast<float>(std::numeric_limits<std::make_signed_t<TargetOperandType>>::min()));
+ // Note: we have to ensure that NaN is properly handled by this comparison!
+ if (result < SourceOperandType{kMinNotInBoundsPositiveValue}) [[likely]] {
+ return static_cast<TargetOperandType>(result);
+ }
+ }
+ // Handle too large numbers and NaN.
+ return std::numeric_limits<TargetOperandType>::max();
}
template <typename TargetOperandType,
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index aa394204..e9e396eb 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -481,6 +481,32 @@ inline std::tuple<SIMD128Register> Vmsofm(SIMD128Register simd_src) {
return {std::get<0>(Vmsbfm(simd_src)) ^ std::get<0>(Vmsifm(simd_src))};
}
+template <typename TargetElementType,
+ typename SourceElementType,
+ enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible>
+inline std::tuple<SIMD128Register> Vfcvtv(int8_t rm, int8_t frm, SIMD128Register src) {
+ SIMD128Register result;
+ constexpr int kElementsCount =
+ std::min(static_cast<int>(sizeof(SIMD128Register) / sizeof(TargetElementType)),
+ static_cast<int>(sizeof(SIMD128Register) / sizeof(SourceElementType)));
+ for (int index = 0; index < kElementsCount; ++index) {
+ if constexpr (std::is_integral_v<TargetElementType>) {
+ result.Set(std::get<0>(FCvtFloatToInteger<TargetElementType, SourceElementType>(
+ rm, frm, src.Get<SourceElementType>(index))),
+ index);
+ } else if constexpr (std::is_integral_v<SourceElementType>) {
+ result.Set(std::get<0>(FCvtIntegerToFloat<TargetElementType, SourceElementType>(
+ rm, frm, src.Get<SourceElementType>(index))),
+ index);
+ } else {
+ result.Set(std::get<0>(FCvtFloatToFloat<TargetElementType, SourceElementType>(
+ rm, frm, src.Get<SourceElementType>(index))),
+ index);
+ }
+ }
+ return result;
+}
+
#define DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS(...) __VA_ARGS__
#define DEFINE_ARITHMETIC_INTRINSIC(Name, arithmetic, parameters, arguments) \
\
@@ -609,10 +635,12 @@ DEFINE_3OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2, arg3] = std::tuple{a
(-(arg2 * arg3) + arg1))
DEFINE_3OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2, arg3] = std::tuple{args...};
(-(arg2 * arg3) + arg1))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(min, (std::min(args...)))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(min, (std::min(args...)))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(max, (std::max(args...)))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(max, (std::max(args...)))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fmin, std::get<0>(FMin(args...)))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fmax, std::get<0>(FMax(args...)))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(min, std::min(args...))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(min, std::min(args...))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(max, std::max(args...))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(max, std::max(args...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redsum, (args + ...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redand, (args & ...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redor, (args | ...))