diff options
45 files changed, 5650 insertions, 2369 deletions
@@ -238,6 +238,7 @@ cc_test_host { "libberberis_code_gen_lib_riscv64", ], whole_static_libs: [ + "libberberis_tests_main", "libberberis_backend_riscv64_to_x86_64_unit_tests", "libberberis_code_gen_lib_riscv64_unit_tests", "libberberis_guest_abi_riscv64_unit_tests", diff --git a/backend/x86_64/lir_instructions.json b/backend/x86_64/lir_instructions.json index 663305c2..ba093ff1 100644 --- a/backend/x86_64/lir_instructions.json +++ b/backend/x86_64/lir_instructions.json @@ -31,6 +31,7 @@ "AddpdXRegXReg", "AddpsXRegXReg", "AndqRegImm", + "AndqRegMemInsns", "AndqRegReg", "BtqRegImm", "Cmc", diff --git a/berberis_config.mk b/berberis_config.mk index 176feb79..eff9c83c 100644 --- a/berberis_config.mk +++ b/berberis_config.mk @@ -79,4 +79,74 @@ BERBERIS_DEV_PRODUCT_PACKAGES_RISCV64_TO_X86_64 := \ berberis_guest_loader_riscv64_tests BERBERIS_DISTRIBUTION_ARTIFACTS_RISCV64 := \ - system/bin/berberis_program_runner_binfmt_misc_riscv64 + system/bin/berberis_program_runner_binfmt_misc_riscv64 \ + system/bin/berberis_program_runner_riscv64 \ + system/bin/riscv64/app_process64 \ + system/bin/riscv64/linker64 \ + system/etc/binfmt_misc/riscv64_dyn \ + system/etc/binfmt_misc/riscv64_exe \ + system/etc/init/berberis.rc \ + system/etc/ld.config.riscv64.txt \ + system/lib64/libberberis_exec_region.so \ + system/lib64/libberberis_proxy_libEGL.so \ + system/lib64/libberberis_proxy_libGLESv1_CM.so \ + system/lib64/libberberis_proxy_libGLESv2.so \ + system/lib64/libberberis_proxy_libGLESv3.so \ + system/lib64/libberberis_proxy_libOpenMAXAL.so \ + system/lib64/libberberis_proxy_libOpenSLES.so \ + system/lib64/libberberis_proxy_libaaudio.so \ + system/lib64/libberberis_proxy_libamidi.so \ + system/lib64/libberberis_proxy_libandroid.so \ + system/lib64/libberberis_proxy_libandroid_runtime.so \ + system/lib64/libberberis_proxy_libbinder_ndk.so \ + system/lib64/libberberis_proxy_libc.so \ + system/lib64/libberberis_proxy_libcamera2ndk.so \ + system/lib64/libberberis_proxy_libjnigraphics.so \ + system/lib64/libberberis_proxy_libmediandk.so \ + system/lib64/libberberis_proxy_libnativehelper.so \ + system/lib64/libberberis_proxy_libnativewindow.so \ + system/lib64/libberberis_proxy_libneuralnetworks.so \ + system/lib64/libberberis_proxy_libwebviewchromium_plat_support.so \ + system/lib64/libberberis_riscv64.so \ + system/lib64/riscv64/ld-android.so \ + system/lib64/riscv64/libEGL.so \ + system/lib64/riscv64/libGLESv1_CM.so \ + system/lib64/riscv64/libGLESv2.so \ + system/lib64/riscv64/libGLESv3.so \ + system/lib64/riscv64/libOpenMAXAL.so \ + system/lib64/riscv64/libOpenSLES.so \ + system/lib64/riscv64/libaaudio.so \ + system/lib64/riscv64/libamidi.so \ + system/lib64/riscv64/libandroid.so \ + system/lib64/riscv64/libandroid_runtime.so \ + system/lib64/riscv64/libandroidicu.so \ + system/lib64/riscv64/libbase.so \ + system/lib64/riscv64/libbinder_ndk.so \ + system/lib64/riscv64/libc++.so \ + system/lib64/riscv64/libc.so \ + system/lib64/riscv64/libcamera2ndk.so \ + system/lib64/riscv64/libcompiler_rt.so \ + system/lib64/riscv64/libcrypto.so \ + system/lib64/riscv64/libcutils.so \ + system/lib64/riscv64/libdl.so \ + system/lib64/riscv64/libdl_android.so \ + system/lib64/riscv64/libicu.so \ + system/lib64/riscv64/libicui18n.so \ + system/lib64/riscv64/libicuuc.so \ + system/lib64/riscv64/libjnigraphics.so \ + system/lib64/riscv64/liblog.so \ + system/lib64/riscv64/libm.so \ + system/lib64/riscv64/libmediandk.so \ + system/lib64/riscv64/libnative_bridge_vdso.so \ + system/lib64/riscv64/libnativehelper.so \ + system/lib64/riscv64/libnativewindow.so \ + system/lib64/riscv64/libneuralnetworks.so \ + system/lib64/riscv64/libsqlite.so \ + system/lib64/riscv64/libssl.so \ + system/lib64/riscv64/libstdc++.so \ + system/lib64/riscv64/libsync.so \ + system/lib64/riscv64/libutils.so \ + system/lib64/riscv64/libvndksupport.so \ + system/lib64/riscv64/libvulkan.so \ + system/lib64/riscv64/libwebviewchromium_plat_support.so \ + system/lib64/riscv64/libz.so diff --git a/calling_conventions/calling_conventions_riscv64_test.cc b/calling_conventions/calling_conventions_riscv64_test.cc index fdd43412..2d3024c6 100644 --- a/calling_conventions/calling_conventions_riscv64_test.cc +++ b/calling_conventions/calling_conventions_riscv64_test.cc @@ -95,6 +95,95 @@ TEST(CallingConventions_riscv64, Smoke) { EXPECT_EQ(0u, loc.offset); } +TEST(CallingConventions_riscv64, FpSpilling) { + CallingConventions conv; + ArgLocation loc; + + loc = conv.GetNextIntArgLoc(4, 4); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(0U, loc.offset); + + loc = conv.GetNextIntArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(1U, loc.offset); + + loc = conv.GetNextFpArgLoc(4, 4); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(0U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(1U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(2U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(3U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(4U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(5U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(6U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(7U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(2U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(3U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(4U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(5U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(6U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationInt, loc.kind); + EXPECT_EQ(7U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationStack, loc.kind); + EXPECT_EQ(0U, loc.offset); + + loc = conv.GetNextIntArgLoc(8, 8); + EXPECT_EQ(kArgLocationStack, loc.kind); + EXPECT_EQ(8U, loc.offset); + + loc = conv.GetNextFpArgLoc(8, 8); + EXPECT_EQ(kArgLocationStack, loc.kind); + EXPECT_EQ(16U, loc.offset); + + loc = conv.GetNextIntArgLoc(8, 8); + EXPECT_EQ(kArgLocationStack, loc.kind); + EXPECT_EQ(24U, loc.offset); + + loc = conv.GetFpResLoc(4); + EXPECT_EQ(kArgLocationFp, loc.kind); + EXPECT_EQ(0U, loc.offset); +} + } // namespace } // namespace berberis::riscv64 diff --git a/calling_conventions/include/berberis/calling_conventions/calling_conventions_riscv64.h b/calling_conventions/include/berberis/calling_conventions/calling_conventions_riscv64.h index 4297589d..9caddcf2 100644 --- a/calling_conventions/include/berberis/calling_conventions/calling_conventions_riscv64.h +++ b/calling_conventions/include/berberis/calling_conventions/calling_conventions_riscv64.h @@ -77,7 +77,9 @@ class CallingConventions { return loc; } - return GetNextStackArgLoc(size, alignment); + // Once the floating-point registers have been exhausted, pass floating-point parameters + // according to the integer calling convention. + return GetNextIntArgLoc(size, alignment); } constexpr ArgLocation GetIntResLoc(unsigned size) { diff --git a/code_gen_lib/code_gen_lib_riscv64_test.cc b/code_gen_lib/code_gen_lib_riscv64_test.cc index 789cf5fe..7eb58431 100644 --- a/code_gen_lib/code_gen_lib_riscv64_test.cc +++ b/code_gen_lib/code_gen_lib_riscv64_test.cc @@ -319,18 +319,18 @@ TEST(CodeGenLib, GenWrapGuestFunction_Run10Int) { ASSERT_EQ(res, -10); } -void Run10Fp(GuestAddr pc, GuestArgumentBuffer* buf) { +void Run18Fp(GuestAddr pc, GuestArgumentBuffer* buf) { static_assert(sizeof(float) == sizeof(uint32_t)); ASSERT_EQ(pc, ToGuestAddr(&g_insn)); ASSERT_NE(nullptr, buf); // riscv verification - ASSERT_EQ(0, buf->argc); + ASSERT_EQ(8, buf->argc); ASSERT_EQ(8, buf->fp_argc); ASSERT_EQ(16, buf->stack_argc); ASSERT_EQ(0, buf->resc); ASSERT_EQ(1, buf->fp_resc); // 32-bit parameters passed in floating-point registers are 1-extended. - // 32-bit parameters passed on the stack are 0-extended. + // 32-bit parameters passed in general-purpose registers and on the stack are 0-extended. ASSERT_EQ(kNanBoxFloat32, buf->fp_argv[0] & kNanBoxFloat32); ASSERT_FLOAT_EQ(0.0f, bit_cast<float>(static_cast<uint32_t>(buf->fp_argv[0]))); ASSERT_EQ(kNanBoxFloat32, buf->fp_argv[1] & kNanBoxFloat32); @@ -347,8 +347,16 @@ void Run10Fp(GuestAddr pc, GuestArgumentBuffer* buf) { ASSERT_FLOAT_EQ(6.6f, bit_cast<float>(static_cast<uint32_t>(buf->fp_argv[6]))); ASSERT_EQ(kNanBoxFloat32, buf->fp_argv[7] & kNanBoxFloat32); ASSERT_FLOAT_EQ(7.7f, bit_cast<float>(static_cast<uint32_t>(buf->fp_argv[7]))); - ASSERT_FLOAT_EQ(8.8f, bit_cast<float>(static_cast<uint32_t>(buf->stack_argv[0]))); - ASSERT_FLOAT_EQ(9.9f, bit_cast<float>(static_cast<uint32_t>(buf->stack_argv[1]))); + ASSERT_FLOAT_EQ(8.8f, bit_cast<float>(static_cast<uint32_t>(buf->argv[0]))); + ASSERT_FLOAT_EQ(9.9f, bit_cast<float>(static_cast<uint32_t>(buf->argv[1]))); + ASSERT_FLOAT_EQ(10.01f, bit_cast<float>(static_cast<uint32_t>(buf->argv[2]))); + ASSERT_FLOAT_EQ(20.02f, bit_cast<float>(static_cast<uint32_t>(buf->argv[3]))); + ASSERT_FLOAT_EQ(30.03f, bit_cast<float>(static_cast<uint32_t>(buf->argv[4]))); + ASSERT_FLOAT_EQ(40.04f, bit_cast<float>(static_cast<uint32_t>(buf->argv[5]))); + ASSERT_FLOAT_EQ(50.05f, bit_cast<float>(static_cast<uint32_t>(buf->argv[6]))); + ASSERT_FLOAT_EQ(60.06f, bit_cast<float>(static_cast<uint32_t>(buf->argv[7]))); + ASSERT_FLOAT_EQ(70.07f, bit_cast<float>(static_cast<uint32_t>(buf->stack_argv[0]))); + ASSERT_FLOAT_EQ(80.08f, bit_cast<float>(static_cast<uint32_t>(buf->stack_argv[1]))); buf->fp_argv[0] = static_cast<uint64_t>(bit_cast<uint32_t>(45.45f)) | kNanBoxFloat32; } @@ -356,12 +364,46 @@ TEST(CodeGenLib, GenWrapGuestFunction_Run10Fp) { MachineCode machine_code; GenWrapGuestFunction( - &machine_code, ToGuestAddr(&g_insn), "fffffffffff", AsHostCode(Run10Fp), "Run10Fp"); + &machine_code, ToGuestAddr(&g_insn), "fffffffffffffffffff", AsHostCode(Run18Fp), "Run18Fp"); ScopedExecRegion exec(&machine_code); - using Func = float(float, float, float, float, float, float, float, float, float, float); - float res = exec.get<Func>()(0.0f, 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f); + using Func = float(float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float, + float); + float res = exec.get<Func>()(0.0f, + 1.1f, + 2.2f, + 3.3f, + 4.4f, + 5.5f, + 6.6f, + 7.7f, + 8.8f, + 9.9f, + 10.01f, + 20.02f, + 30.03f, + 40.04f, + 50.05f, + 60.06f, + 70.07f, + 80.08f); ASSERT_FLOAT_EQ(45.45f, res); } diff --git a/code_gen_lib/gen_wrapper_riscv64_to_x86_64.cc b/code_gen_lib/gen_wrapper_riscv64_to_x86_64.cc index 65c2811b..5a65c991 100644 --- a/code_gen_lib/gen_wrapper_riscv64_to_x86_64.cc +++ b/code_gen_lib/gen_wrapper_riscv64_to_x86_64.cc @@ -104,21 +104,32 @@ void GenWrapGuestFunction(MachineCode* mc, int fp_argc = 0; int stack_argc = 0; int host_stack_argc = 0; + static constexpr int kGuestParamRegs = 8; + static constexpr Assembler::Register kParamRegs[] = { + Assembler::rdi, + Assembler::rsi, + Assembler::rdx, + Assembler::rcx, + Assembler::r8, + Assembler::r9, + }; + static constexpr Assembler::XMMRegister kFpParamRegs[] = { + Assembler::xmm0, + Assembler::xmm1, + Assembler::xmm2, + Assembler::xmm3, + Assembler::xmm4, + Assembler::xmm5, + Assembler::xmm6, + Assembler::xmm7, + }; for (size_t i = 1; signature[i] != '\0'; ++i) { if (signature[i] == 'z' || signature[i] == 'b' || signature[i] == 's' || signature[i] == 'c' || signature[i] == 'i' || signature[i] == 'p' || signature[i] == 'l') { - static constexpr Assembler::Register kParamRegs[] = { - Assembler::rdi, - Assembler::rsi, - Assembler::rdx, - Assembler::rcx, - Assembler::r8, - Assembler::r9, - }; if (argc < static_cast<int>(std::size(kParamRegs))) { ExtendIntArg(as, signature[i], kParamRegs[argc], kParamRegs[argc]); as.Movq({.base = Assembler::rsp, .disp = kArgvOffset + argc * 8}, kParamRegs[argc]); - } else if (argc < 8) { + } else if (argc < kGuestParamRegs) { as.Movq(Assembler::rax, {.base = Assembler::rsp, .disp = params_offset + host_stack_argc * 8}); ++host_stack_argc; @@ -135,32 +146,30 @@ void GenWrapGuestFunction(MachineCode* mc, } ++argc; } else if (signature[i] == 'f' || signature[i] == 'd') { - static constexpr Assembler::XMMRegister kParamRegs[] = { - Assembler::xmm0, - Assembler::xmm1, - Assembler::xmm2, - Assembler::xmm3, - Assembler::xmm4, - Assembler::xmm5, - Assembler::xmm6, - Assembler::xmm7, - }; - if (fp_argc < static_cast<int>(std::size(kParamRegs))) { + // Floating-point parameters are passed in the floating-point parameter registers (fa0..7) + // first, then the general-purpose parameter registers (a0..7), then on the stack. + if (fp_argc < static_cast<int>(std::size(kFpParamRegs))) { if (signature[i] == 'f') { // LP64D requires 32-bit floats to be NaN boxed. if (host_platform::kHasAVX) { - as.MacroNanBoxAVX<intrinsics::Float32>(kParamRegs[fp_argc], kParamRegs[fp_argc]); + as.MacroNanBoxAVX<intrinsics::Float32>(kFpParamRegs[fp_argc], kFpParamRegs[fp_argc]); } else { - as.MacroNanBox<intrinsics::Float32>(kParamRegs[fp_argc]); + as.MacroNanBox<intrinsics::Float32>(kFpParamRegs[fp_argc]); } } if (host_platform::kHasAVX) { as.Vmovq({.base = Assembler::rsp, .disp = kFpArgvOffset + fp_argc * 8}, - kParamRegs[fp_argc]); + kFpParamRegs[fp_argc]); } else { as.Movq({.base = Assembler::rsp, .disp = kFpArgvOffset + fp_argc * 8}, - kParamRegs[fp_argc]); + kFpParamRegs[fp_argc]); } + } else if (argc < kGuestParamRegs) { + as.Movq(Assembler::rax, + {.base = Assembler::rsp, .disp = params_offset + host_stack_argc * 8}); + ++host_stack_argc; + as.Movq({.base = Assembler::rsp, .disp = kArgvOffset + argc * 8}, Assembler::rax); + ++argc; } else { as.Movq(Assembler::rax, {.base = Assembler::rsp, .disp = params_offset + host_stack_argc * 8}); diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h index c2eb4a65..63ff277a 100644 --- a/decoder/include/berberis/decoder/riscv64/decoder.h +++ b/decoder/include/berberis/decoder/riscv64/decoder.h @@ -277,8 +277,8 @@ class Decoder { kVfwsubwf = 0b110110, kVfwmulvf = 0b111000, kVfwmaccvf = 0b111100, - kVfwnmaccvf = 0b111100, - kVfwmsacvf = 0b111100, + kVfwnmaccvf = 0b111101, + kVfwmsacvf = 0b111110, kVfwnmsacvf = 0b111111, }; @@ -319,8 +319,8 @@ class Decoder { kVfwsubwv = 0b110110, kVfwmulvv = 0b111000, kVfwmaccvv = 0b111100, - kVfwnmaccvv = 0b111100, - kVfwmsacvv = 0b111100, + kVfwnmaccvv = 0b111101, + kVfwmsacvv = 0b111110, kVfwnmsacvv = 0b111111, }; @@ -560,6 +560,7 @@ class Decoder { }; enum class VFUnary1Opcode : uint8_t { + kVfsqrtv = 0b00000, kVfrsqrt7v = 0b00100, }; diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h index 7f4a20c0..eedb6fdf 100644 --- a/decoder/include/berberis/decoder/riscv64/semantics_player.h +++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h @@ -610,9 +610,25 @@ class SemanticsPlayer { int8_t dst, int8_t src1, int8_t src2) { - FpRegister arg1 = GetFRegAndUnboxNan<FloatType>(src1); - FpRegister arg2 = GetFRegAndUnboxNan<FloatType>(src2); + FpRegister arg1; + FpRegister arg2; FpRegister result; + // The sign-injection instructions (FSGNJ, FSGNJN, FSGNJX) do not canonicalize NaNs; + // they manipulate the underlying bit patterns directly. + bool canonicalize_nan = true; + switch (opcode) { + case Decoder::OpFpNoRoundingOpcode::kFSgnj: + case Decoder::OpFpNoRoundingOpcode::kFSgnjn: + case Decoder::OpFpNoRoundingOpcode::kFSgnjx: + arg1 = GetFpReg(src1); + arg2 = GetFpReg(src2); + canonicalize_nan = false; + break; + default: + // Unboxing canonicalizes NaNs. + arg1 = GetFRegAndUnboxNan<FloatType>(src1); + arg2 = GetFRegAndUnboxNan<FloatType>(src2); + } switch (opcode) { case Decoder::OpFpNoRoundingOpcode::kFSgnj: result = listener_->template FSgnj<FloatType>(arg1, arg2); @@ -633,7 +649,9 @@ class SemanticsPlayer { Undefined(); return; } - result = CanonicalizeNan<FloatType>(result); + if (canonicalize_nan) { + result = CanonicalizeNan<FloatType>(result); + } NanBoxAndSetFpReg<FloatType>(dst, result); } diff --git a/guest_os_primitives/Android.bp b/guest_os_primitives/Android.bp index e6639385..e39d7b9c 100644 --- a/guest_os_primitives/Android.bp +++ b/guest_os_primitives/Android.bp @@ -154,6 +154,7 @@ cc_library_static { ], host_supported: true, srcs: [ + "riscv64/gen_syscall_numbers.cc", "riscv64/guest_setjmp.cc", "riscv64/guest_signal.cc", ], @@ -187,6 +188,7 @@ cc_defaults { "libberberis_base_headers", "libberberis_guest_os_primitives_headers", "libberberis_guest_state_headers", + "libberberis_runtime_headers", ], } diff --git a/kernel_api/include/berberis/kernel_api/syscall_numbers.h b/guest_os_primitives/include/berberis/guest_os_primitives/gen_syscall_numbers.h index 9cbd20c7..6c68c38a 100644 --- a/kernel_api/include/berberis/kernel_api/syscall_numbers.h +++ b/guest_os_primitives/include/berberis/guest_os_primitives/gen_syscall_numbers.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 The Android Open Source Project + * Copyright (C) 2024 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,10 @@ * limitations under the License. */ -#ifndef BERBERIS_KERNEL_API_RISCV64_SYSCALL_NUMBERS_H_ -#define BERBERIS_KERNEL_API_RISCV64_SYSCALL_NUMBERS_H_ +#ifndef BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_H_ +#define BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_H_ -namespace berberis { +#include "berberis/guest_os_primitives/gen_syscall_numbers_arch.h" // IWYU pragma: export. +#include "berberis/guest_os_primitives/syscall_numbers.h" // IWYU pragma: export. -int ToHostSyscallNumber(int); - -} // namespace berberis - -#endif // BERBERIS_KERNEL_API_RISCV64_SYSCALL_NUMBERS_H_ +#endif // BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_H_ diff --git a/guest_os_primitives/include/berberis/guest_os_primitives/syscall_numbers.h b/guest_os_primitives/include/berberis/guest_os_primitives/syscall_numbers.h index e0cc781a..03728678 100644 --- a/guest_os_primitives/include/berberis/guest_os_primitives/syscall_numbers.h +++ b/guest_os_primitives/include/berberis/guest_os_primitives/syscall_numbers.h @@ -17,7 +17,11 @@ #ifndef BERBERIS_GUEST_OS_PRIMITIVES_SYSCALL_NUMBERS_H_ #define BERBERIS_GUEST_OS_PRIMITIVES_SYSCALL_NUMBERS_H_ -// TODO(b/280551708): Extract this include to make it more architecture agnostic. -#include "berberis/guest_os_primitives/gen_syscall_numbers_riscv64.h" +namespace berberis { -#endif // BERBERIS_GUEST_OS_PRIMITIVES_SYSCALL_NUMBERS_H_
\ No newline at end of file +int ToHostSyscallNumber(int nr); +int ToGuestSyscallNumber(int nr); + +} // namespace berberis + +#endif // BERBERIS_GUEST_OS_PRIMITIVES_SYSCALL_NUMBERS_H_ diff --git a/guest_os_primitives/include/berberis/guest_os_primitives/gen_syscall_numbers_riscv64.h b/guest_os_primitives/riscv64/gen_syscall_numbers.cc index c6ae646b..4a650a71 100644 --- a/guest_os_primitives/include/berberis/guest_os_primitives/gen_syscall_numbers_riscv64.h +++ b/guest_os_primitives/riscv64/gen_syscall_numbers.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 The Android Open Source Project + * Copyright (C) 2024 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,321 +14,11 @@ * limitations under the License. */ -#ifndef BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_RISCV64_H_ -#define BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_RISCV64_H_ +#include "berberis/guest_os_primitives/gen_syscall_numbers.h" namespace berberis { -enum { - GUEST_NR_accept = 202, - GUEST_NR_accept4 = 242, - GUEST_NR_acct = 89, - GUEST_NR_add_key = 217, - GUEST_NR_adjtimex = 171, - GUEST_NR_bind = 200, - GUEST_NR_bpf = 280, - GUEST_NR_brk = 214, - GUEST_NR_capget = 90, - GUEST_NR_capset = 91, - GUEST_NR_chdir = 49, - GUEST_NR_chroot = 51, - GUEST_NR_clock_adjtime = 266, - GUEST_NR_clock_getres = 114, - GUEST_NR_clock_gettime = 113, - GUEST_NR_clock_nanosleep = 115, - GUEST_NR_clock_settime = 112, - GUEST_NR_clone = 220, - GUEST_NR_clone3 = 435, - GUEST_NR_close = 57, - GUEST_NR_close_range = 436, - GUEST_NR_connect = 203, - GUEST_NR_copy_file_range = 285, - GUEST_NR_delete_module = 106, - GUEST_NR_dup = 23, - GUEST_NR_dup3 = 24, - GUEST_NR_epoll_create1 = 20, - GUEST_NR_epoll_ctl = 21, - GUEST_NR_epoll_pwait = 22, - GUEST_NR_epoll_pwait2 = 441, - GUEST_NR_eventfd2 = 19, - GUEST_NR_execve = 221, - GUEST_NR_execveat = 281, - GUEST_NR_exit = 93, - GUEST_NR_exit_group = 94, - GUEST_NR_faccessat = 48, - GUEST_NR_faccessat2 = 439, - GUEST_NR_fadvise64 = 223, - GUEST_NR_fallocate = 47, - GUEST_NR_fanotify_init = 262, - GUEST_NR_fanotify_mark = 263, - GUEST_NR_fchdir = 50, - GUEST_NR_fchmod = 52, - GUEST_NR_fchmodat = 53, - GUEST_NR_fchown = 55, - GUEST_NR_fchownat = 54, - GUEST_NR_fcntl = 25, - GUEST_NR_fdatasync = 83, - GUEST_NR_fgetxattr = 10, - GUEST_NR_finit_module = 273, - GUEST_NR_flistxattr = 13, - GUEST_NR_flock = 32, - GUEST_NR_fremovexattr = 16, - GUEST_NR_fsconfig = 431, - GUEST_NR_fsetxattr = 7, - GUEST_NR_fsmount = 432, - GUEST_NR_fsopen = 430, - GUEST_NR_fspick = 433, - GUEST_NR_fstat = 80, - GUEST_NR_fstatfs = 44, - GUEST_NR_fsync = 82, - GUEST_NR_ftruncate = 46, - GUEST_NR_futex = 98, - GUEST_NR_futex_waitv = 449, - GUEST_NR_get_mempolicy = 236, - GUEST_NR_get_robust_list = 100, - GUEST_NR_getcpu = 168, - GUEST_NR_getcwd = 17, - GUEST_NR_getdents64 = 61, - GUEST_NR_getegid = 177, - GUEST_NR_geteuid = 175, - GUEST_NR_getgid = 176, - GUEST_NR_getgroups = 158, - GUEST_NR_getitimer = 102, - GUEST_NR_getpeername = 205, - GUEST_NR_getpgid = 155, - GUEST_NR_getpid = 172, - GUEST_NR_getppid = 173, - GUEST_NR_getpriority = 141, - GUEST_NR_getrandom = 278, - GUEST_NR_getresgid = 150, - GUEST_NR_getresuid = 148, - GUEST_NR_getrlimit = 163, - GUEST_NR_getrusage = 165, - GUEST_NR_getsid = 156, - GUEST_NR_getsockname = 204, - GUEST_NR_getsockopt = 209, - GUEST_NR_gettid = 178, - GUEST_NR_gettimeofday = 169, - GUEST_NR_getuid = 174, - GUEST_NR_getxattr = 8, - GUEST_NR_init_module = 105, - GUEST_NR_inotify_add_watch = 27, - GUEST_NR_inotify_init1 = 26, - GUEST_NR_inotify_rm_watch = 28, - GUEST_NR_io_cancel = 3, - GUEST_NR_io_destroy = 1, - GUEST_NR_io_getevents = 4, - GUEST_NR_io_pgetevents = 292, - GUEST_NR_io_setup = 0, - GUEST_NR_io_submit = 2, - GUEST_NR_io_uring_enter = 426, - GUEST_NR_io_uring_register = 427, - GUEST_NR_io_uring_setup = 425, - GUEST_NR_ioctl = 29, - GUEST_NR_ioprio_get = 31, - GUEST_NR_ioprio_set = 30, - GUEST_NR_kcmp = 272, - GUEST_NR_kexec_file_load = 294, - GUEST_NR_kexec_load = 104, - GUEST_NR_keyctl = 219, - GUEST_NR_kill = 129, - GUEST_NR_landlock_add_rule = 445, - GUEST_NR_landlock_create_ruleset = 444, - GUEST_NR_landlock_restrict_self = 446, - GUEST_NR_lgetxattr = 9, - GUEST_NR_linkat = 37, - GUEST_NR_listen = 201, - GUEST_NR_listxattr = 11, - GUEST_NR_llistxattr = 12, - GUEST_NR_lookup_dcookie = 18, - GUEST_NR_lremovexattr = 15, - GUEST_NR_lseek = 62, - GUEST_NR_lsetxattr = 6, - GUEST_NR_madvise = 233, - GUEST_NR_mbind = 235, - GUEST_NR_membarrier = 283, - GUEST_NR_memfd_create = 279, - GUEST_NR_memfd_secret = 447, - GUEST_NR_migrate_pages = 238, - GUEST_NR_mincore = 232, - GUEST_NR_mkdirat = 34, - GUEST_NR_mknodat = 33, - GUEST_NR_mlock = 228, - GUEST_NR_mlock2 = 284, - GUEST_NR_mlockall = 230, - GUEST_NR_mmap = 222, - GUEST_NR_mount = 40, - GUEST_NR_mount_setattr = 442, - GUEST_NR_move_mount = 429, - GUEST_NR_move_pages = 239, - GUEST_NR_mprotect = 226, - GUEST_NR_mq_getsetattr = 185, - GUEST_NR_mq_notify = 184, - GUEST_NR_mq_open = 180, - GUEST_NR_mq_timedreceive = 183, - GUEST_NR_mq_timedsend = 182, - GUEST_NR_mq_unlink = 181, - GUEST_NR_mremap = 216, - GUEST_NR_msgctl = 187, - GUEST_NR_msgget = 186, - GUEST_NR_msgrcv = 188, - GUEST_NR_msgsnd = 189, - GUEST_NR_msync = 227, - GUEST_NR_munlock = 229, - GUEST_NR_munlockall = 231, - GUEST_NR_munmap = 215, - GUEST_NR_name_to_handle_at = 264, - GUEST_NR_nanosleep = 101, - GUEST_NR_newfstatat = 79, - GUEST_NR_nfsservctl = 42, - GUEST_NR_open_by_handle_at = 265, - GUEST_NR_open_tree = 428, - GUEST_NR_openat = 56, - GUEST_NR_openat2 = 437, - GUEST_NR_perf_event_open = 241, - GUEST_NR_personality = 92, - GUEST_NR_pidfd_getfd = 438, - GUEST_NR_pidfd_open = 434, - GUEST_NR_pidfd_send_signal = 424, - GUEST_NR_pipe2 = 59, - GUEST_NR_pivot_root = 41, - GUEST_NR_pkey_alloc = 289, - GUEST_NR_pkey_free = 290, - GUEST_NR_pkey_mprotect = 288, - GUEST_NR_ppoll = 73, - GUEST_NR_prctl = 167, - GUEST_NR_pread64 = 67, - GUEST_NR_preadv = 69, - GUEST_NR_preadv2 = 286, - GUEST_NR_prlimit64 = 261, - GUEST_NR_process_madvise = 440, - GUEST_NR_process_mrelease = 448, - GUEST_NR_process_vm_readv = 270, - GUEST_NR_process_vm_writev = 271, - GUEST_NR_pselect6 = 72, - GUEST_NR_ptrace = 117, - GUEST_NR_pwrite64 = 68, - GUEST_NR_pwritev = 70, - GUEST_NR_pwritev2 = 287, - GUEST_NR_quotactl = 60, - GUEST_NR_quotactl_fd = 443, - GUEST_NR_read = 63, - GUEST_NR_readahead = 213, - GUEST_NR_readlinkat = 78, - GUEST_NR_readv = 65, - GUEST_NR_reboot = 142, - GUEST_NR_recvfrom = 207, - GUEST_NR_recvmmsg = 243, - GUEST_NR_recvmsg = 212, - GUEST_NR_remap_file_pages = 234, - GUEST_NR_removexattr = 14, - GUEST_NR_renameat = 38, - GUEST_NR_renameat2 = 276, - GUEST_NR_request_key = 218, - GUEST_NR_restart_syscall = 128, - GUEST_NR_rseq = 293, - GUEST_NR_rt_sigaction = 134, - GUEST_NR_rt_sigpending = 136, - GUEST_NR_rt_sigprocmask = 135, - GUEST_NR_rt_sigqueueinfo = 138, - GUEST_NR_rt_sigreturn = 139, - GUEST_NR_rt_sigsuspend = 133, - GUEST_NR_rt_sigtimedwait = 137, - GUEST_NR_rt_tgsigqueueinfo = 240, - GUEST_NR_sched_get_priority_max = 125, - GUEST_NR_sched_get_priority_min = 126, - GUEST_NR_sched_getaffinity = 123, - GUEST_NR_sched_getattr = 275, - GUEST_NR_sched_getparam = 121, - GUEST_NR_sched_getscheduler = 120, - GUEST_NR_sched_rr_get_interval = 127, - GUEST_NR_sched_setaffinity = 122, - GUEST_NR_sched_setattr = 274, - GUEST_NR_sched_setparam = 118, - GUEST_NR_sched_setscheduler = 119, - GUEST_NR_sched_yield = 124, - GUEST_NR_seccomp = 277, - GUEST_NR_semctl = 191, - GUEST_NR_semget = 190, - GUEST_NR_semop = 193, - GUEST_NR_semtimedop = 192, - GUEST_NR_sendfile = 71, - GUEST_NR_sendmmsg = 269, - GUEST_NR_sendmsg = 211, - GUEST_NR_sendto = 206, - GUEST_NR_set_mempolicy = 237, - GUEST_NR_set_mempolicy_home_node = 450, - GUEST_NR_set_robust_list = 99, - GUEST_NR_set_tid_address = 96, - GUEST_NR_setdomainname = 162, - GUEST_NR_setfsgid = 152, - GUEST_NR_setfsuid = 151, - GUEST_NR_setgid = 144, - GUEST_NR_setgroups = 159, - GUEST_NR_sethostname = 161, - GUEST_NR_setitimer = 103, - GUEST_NR_setns = 268, - GUEST_NR_setpgid = 154, - GUEST_NR_setpriority = 140, - GUEST_NR_setregid = 143, - GUEST_NR_setresgid = 149, - GUEST_NR_setresuid = 147, - GUEST_NR_setreuid = 145, - GUEST_NR_setrlimit = 164, - GUEST_NR_setsid = 157, - GUEST_NR_setsockopt = 208, - GUEST_NR_settimeofday = 170, - GUEST_NR_setuid = 146, - GUEST_NR_setxattr = 5, - GUEST_NR_shmat = 196, - GUEST_NR_shmctl = 195, - GUEST_NR_shmdt = 197, - GUEST_NR_shmget = 194, - GUEST_NR_shutdown = 210, - GUEST_NR_sigaltstack = 132, - GUEST_NR_signalfd4 = 74, - GUEST_NR_socket = 198, - GUEST_NR_socketpair = 199, - GUEST_NR_splice = 76, - GUEST_NR_statfs = 43, - GUEST_NR_statx = 291, - GUEST_NR_swapoff = 225, - GUEST_NR_swapon = 224, - GUEST_NR_symlinkat = 36, - GUEST_NR_sync = 81, - GUEST_NR_sync_file_range = 84, - GUEST_NR_syncfs = 267, - GUEST_NR_sysinfo = 179, - GUEST_NR_syslog = 116, - GUEST_NR_tee = 77, - GUEST_NR_tgkill = 131, - GUEST_NR_timer_create = 107, - GUEST_NR_timer_delete = 111, - GUEST_NR_timer_getoverrun = 109, - GUEST_NR_timer_gettime = 108, - GUEST_NR_timer_settime = 110, - GUEST_NR_timerfd_create = 85, - GUEST_NR_timerfd_gettime = 87, - GUEST_NR_timerfd_settime = 86, - GUEST_NR_times = 153, - GUEST_NR_tkill = 130, - GUEST_NR_truncate = 45, - GUEST_NR_umask = 166, - GUEST_NR_umount2 = 39, - GUEST_NR_uname = 160, - GUEST_NR_unlinkat = 35, - GUEST_NR_unshare = 97, - GUEST_NR_userfaultfd = 282, - GUEST_NR_utimensat = 88, - GUEST_NR_vhangup = 58, - GUEST_NR_vmsplice = 75, - GUEST_NR_wait4 = 260, - GUEST_NR_waitid = 95, - GUEST_NR_write = 64, - GUEST_NR_writev = 66, -}; - -inline int ToHostSyscallNumber(int nr) { +int ToHostSyscallNumber(int nr) { switch (nr) { case 202: // __NR_accept return 43; @@ -947,7 +637,7 @@ inline int ToHostSyscallNumber(int nr) { } } -inline int ToGuestSyscallNumber(int nr) { +int ToGuestSyscallNumber(int nr) { switch (nr) { case 156: // __NR__sysctl - missing on riscv64 return -1; @@ -1679,5 +1369,3 @@ inline int ToGuestSyscallNumber(int nr) { } } // namespace berberis - -#endif // BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_RISCV64_H_
\ No newline at end of file diff --git a/guest_os_primitives/riscv64/include/berberis/guest_os_primitives/gen_syscall_numbers_arch.h b/guest_os_primitives/riscv64/include/berberis/guest_os_primitives/gen_syscall_numbers_arch.h new file mode 100644 index 00000000..bcca9439 --- /dev/null +++ b/guest_os_primitives/riscv64/include/berberis/guest_os_primitives/gen_syscall_numbers_arch.h @@ -0,0 +1,333 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_ +#define BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_ + +namespace berberis { + +enum { + GUEST_NR_accept = 202, + GUEST_NR_accept4 = 242, + GUEST_NR_acct = 89, + GUEST_NR_add_key = 217, + GUEST_NR_adjtimex = 171, + GUEST_NR_bind = 200, + GUEST_NR_bpf = 280, + GUEST_NR_brk = 214, + GUEST_NR_capget = 90, + GUEST_NR_capset = 91, + GUEST_NR_chdir = 49, + GUEST_NR_chroot = 51, + GUEST_NR_clock_adjtime = 266, + GUEST_NR_clock_getres = 114, + GUEST_NR_clock_gettime = 113, + GUEST_NR_clock_nanosleep = 115, + GUEST_NR_clock_settime = 112, + GUEST_NR_clone = 220, + GUEST_NR_clone3 = 435, + GUEST_NR_close = 57, + GUEST_NR_close_range = 436, + GUEST_NR_connect = 203, + GUEST_NR_copy_file_range = 285, + GUEST_NR_delete_module = 106, + GUEST_NR_dup = 23, + GUEST_NR_dup3 = 24, + GUEST_NR_epoll_create1 = 20, + GUEST_NR_epoll_ctl = 21, + GUEST_NR_epoll_pwait = 22, + GUEST_NR_epoll_pwait2 = 441, + GUEST_NR_eventfd2 = 19, + GUEST_NR_execve = 221, + GUEST_NR_execveat = 281, + GUEST_NR_exit = 93, + GUEST_NR_exit_group = 94, + GUEST_NR_faccessat = 48, + GUEST_NR_faccessat2 = 439, + GUEST_NR_fadvise64 = 223, + GUEST_NR_fallocate = 47, + GUEST_NR_fanotify_init = 262, + GUEST_NR_fanotify_mark = 263, + GUEST_NR_fchdir = 50, + GUEST_NR_fchmod = 52, + GUEST_NR_fchmodat = 53, + GUEST_NR_fchown = 55, + GUEST_NR_fchownat = 54, + GUEST_NR_fcntl = 25, + GUEST_NR_fdatasync = 83, + GUEST_NR_fgetxattr = 10, + GUEST_NR_finit_module = 273, + GUEST_NR_flistxattr = 13, + GUEST_NR_flock = 32, + GUEST_NR_fremovexattr = 16, + GUEST_NR_fsconfig = 431, + GUEST_NR_fsetxattr = 7, + GUEST_NR_fsmount = 432, + GUEST_NR_fsopen = 430, + GUEST_NR_fspick = 433, + GUEST_NR_fstat = 80, + GUEST_NR_fstatfs = 44, + GUEST_NR_fsync = 82, + GUEST_NR_ftruncate = 46, + GUEST_NR_futex = 98, + GUEST_NR_futex_waitv = 449, + GUEST_NR_get_mempolicy = 236, + GUEST_NR_get_robust_list = 100, + GUEST_NR_getcpu = 168, + GUEST_NR_getcwd = 17, + GUEST_NR_getdents64 = 61, + GUEST_NR_getegid = 177, + GUEST_NR_geteuid = 175, + GUEST_NR_getgid = 176, + GUEST_NR_getgroups = 158, + GUEST_NR_getitimer = 102, + GUEST_NR_getpeername = 205, + GUEST_NR_getpgid = 155, + GUEST_NR_getpid = 172, + GUEST_NR_getppid = 173, + GUEST_NR_getpriority = 141, + GUEST_NR_getrandom = 278, + GUEST_NR_getresgid = 150, + GUEST_NR_getresuid = 148, + GUEST_NR_getrlimit = 163, + GUEST_NR_getrusage = 165, + GUEST_NR_getsid = 156, + GUEST_NR_getsockname = 204, + GUEST_NR_getsockopt = 209, + GUEST_NR_gettid = 178, + GUEST_NR_gettimeofday = 169, + GUEST_NR_getuid = 174, + GUEST_NR_getxattr = 8, + GUEST_NR_init_module = 105, + GUEST_NR_inotify_add_watch = 27, + GUEST_NR_inotify_init1 = 26, + GUEST_NR_inotify_rm_watch = 28, + GUEST_NR_io_cancel = 3, + GUEST_NR_io_destroy = 1, + GUEST_NR_io_getevents = 4, + GUEST_NR_io_pgetevents = 292, + GUEST_NR_io_setup = 0, + GUEST_NR_io_submit = 2, + GUEST_NR_io_uring_enter = 426, + GUEST_NR_io_uring_register = 427, + GUEST_NR_io_uring_setup = 425, + GUEST_NR_ioctl = 29, + GUEST_NR_ioprio_get = 31, + GUEST_NR_ioprio_set = 30, + GUEST_NR_kcmp = 272, + GUEST_NR_kexec_file_load = 294, + GUEST_NR_kexec_load = 104, + GUEST_NR_keyctl = 219, + GUEST_NR_kill = 129, + GUEST_NR_landlock_add_rule = 445, + GUEST_NR_landlock_create_ruleset = 444, + GUEST_NR_landlock_restrict_self = 446, + GUEST_NR_lgetxattr = 9, + GUEST_NR_linkat = 37, + GUEST_NR_listen = 201, + GUEST_NR_listxattr = 11, + GUEST_NR_llistxattr = 12, + GUEST_NR_lookup_dcookie = 18, + GUEST_NR_lremovexattr = 15, + GUEST_NR_lseek = 62, + GUEST_NR_lsetxattr = 6, + GUEST_NR_madvise = 233, + GUEST_NR_mbind = 235, + GUEST_NR_membarrier = 283, + GUEST_NR_memfd_create = 279, + GUEST_NR_memfd_secret = 447, + GUEST_NR_migrate_pages = 238, + GUEST_NR_mincore = 232, + GUEST_NR_mkdirat = 34, + GUEST_NR_mknodat = 33, + GUEST_NR_mlock = 228, + GUEST_NR_mlock2 = 284, + GUEST_NR_mlockall = 230, + GUEST_NR_mmap = 222, + GUEST_NR_mount = 40, + GUEST_NR_mount_setattr = 442, + GUEST_NR_move_mount = 429, + GUEST_NR_move_pages = 239, + GUEST_NR_mprotect = 226, + GUEST_NR_mq_getsetattr = 185, + GUEST_NR_mq_notify = 184, + GUEST_NR_mq_open = 180, + GUEST_NR_mq_timedreceive = 183, + GUEST_NR_mq_timedsend = 182, + GUEST_NR_mq_unlink = 181, + GUEST_NR_mremap = 216, + GUEST_NR_msgctl = 187, + GUEST_NR_msgget = 186, + GUEST_NR_msgrcv = 188, + GUEST_NR_msgsnd = 189, + GUEST_NR_msync = 227, + GUEST_NR_munlock = 229, + GUEST_NR_munlockall = 231, + GUEST_NR_munmap = 215, + GUEST_NR_name_to_handle_at = 264, + GUEST_NR_nanosleep = 101, + GUEST_NR_newfstatat = 79, + GUEST_NR_nfsservctl = 42, + GUEST_NR_open_by_handle_at = 265, + GUEST_NR_open_tree = 428, + GUEST_NR_openat = 56, + GUEST_NR_openat2 = 437, + GUEST_NR_perf_event_open = 241, + GUEST_NR_personality = 92, + GUEST_NR_pidfd_getfd = 438, + GUEST_NR_pidfd_open = 434, + GUEST_NR_pidfd_send_signal = 424, + GUEST_NR_pipe2 = 59, + GUEST_NR_pivot_root = 41, + GUEST_NR_pkey_alloc = 289, + GUEST_NR_pkey_free = 290, + GUEST_NR_pkey_mprotect = 288, + GUEST_NR_ppoll = 73, + GUEST_NR_prctl = 167, + GUEST_NR_pread64 = 67, + GUEST_NR_preadv = 69, + GUEST_NR_preadv2 = 286, + GUEST_NR_prlimit64 = 261, + GUEST_NR_process_madvise = 440, + GUEST_NR_process_mrelease = 448, + GUEST_NR_process_vm_readv = 270, + GUEST_NR_process_vm_writev = 271, + GUEST_NR_pselect6 = 72, + GUEST_NR_ptrace = 117, + GUEST_NR_pwrite64 = 68, + GUEST_NR_pwritev = 70, + GUEST_NR_pwritev2 = 287, + GUEST_NR_quotactl = 60, + GUEST_NR_quotactl_fd = 443, + GUEST_NR_read = 63, + GUEST_NR_readahead = 213, + GUEST_NR_readlinkat = 78, + GUEST_NR_readv = 65, + GUEST_NR_reboot = 142, + GUEST_NR_recvfrom = 207, + GUEST_NR_recvmmsg = 243, + GUEST_NR_recvmsg = 212, + GUEST_NR_remap_file_pages = 234, + GUEST_NR_removexattr = 14, + GUEST_NR_renameat = 38, + GUEST_NR_renameat2 = 276, + GUEST_NR_request_key = 218, + GUEST_NR_restart_syscall = 128, + GUEST_NR_rseq = 293, + GUEST_NR_rt_sigaction = 134, + GUEST_NR_rt_sigpending = 136, + GUEST_NR_rt_sigprocmask = 135, + GUEST_NR_rt_sigqueueinfo = 138, + GUEST_NR_rt_sigreturn = 139, + GUEST_NR_rt_sigsuspend = 133, + GUEST_NR_rt_sigtimedwait = 137, + GUEST_NR_rt_tgsigqueueinfo = 240, + GUEST_NR_sched_get_priority_max = 125, + GUEST_NR_sched_get_priority_min = 126, + GUEST_NR_sched_getaffinity = 123, + GUEST_NR_sched_getattr = 275, + GUEST_NR_sched_getparam = 121, + GUEST_NR_sched_getscheduler = 120, + GUEST_NR_sched_rr_get_interval = 127, + GUEST_NR_sched_setaffinity = 122, + GUEST_NR_sched_setattr = 274, + GUEST_NR_sched_setparam = 118, + GUEST_NR_sched_setscheduler = 119, + GUEST_NR_sched_yield = 124, + GUEST_NR_seccomp = 277, + GUEST_NR_semctl = 191, + GUEST_NR_semget = 190, + GUEST_NR_semop = 193, + GUEST_NR_semtimedop = 192, + GUEST_NR_sendfile = 71, + GUEST_NR_sendmmsg = 269, + GUEST_NR_sendmsg = 211, + GUEST_NR_sendto = 206, + GUEST_NR_set_mempolicy = 237, + GUEST_NR_set_mempolicy_home_node = 450, + GUEST_NR_set_robust_list = 99, + GUEST_NR_set_tid_address = 96, + GUEST_NR_setdomainname = 162, + GUEST_NR_setfsgid = 152, + GUEST_NR_setfsuid = 151, + GUEST_NR_setgid = 144, + GUEST_NR_setgroups = 159, + GUEST_NR_sethostname = 161, + GUEST_NR_setitimer = 103, + GUEST_NR_setns = 268, + GUEST_NR_setpgid = 154, + GUEST_NR_setpriority = 140, + GUEST_NR_setregid = 143, + GUEST_NR_setresgid = 149, + GUEST_NR_setresuid = 147, + GUEST_NR_setreuid = 145, + GUEST_NR_setrlimit = 164, + GUEST_NR_setsid = 157, + GUEST_NR_setsockopt = 208, + GUEST_NR_settimeofday = 170, + GUEST_NR_setuid = 146, + GUEST_NR_setxattr = 5, + GUEST_NR_shmat = 196, + GUEST_NR_shmctl = 195, + GUEST_NR_shmdt = 197, + GUEST_NR_shmget = 194, + GUEST_NR_shutdown = 210, + GUEST_NR_sigaltstack = 132, + GUEST_NR_signalfd4 = 74, + GUEST_NR_socket = 198, + GUEST_NR_socketpair = 199, + GUEST_NR_splice = 76, + GUEST_NR_statfs = 43, + GUEST_NR_statx = 291, + GUEST_NR_swapoff = 225, + GUEST_NR_swapon = 224, + GUEST_NR_symlinkat = 36, + GUEST_NR_sync = 81, + GUEST_NR_sync_file_range = 84, + GUEST_NR_syncfs = 267, + GUEST_NR_sysinfo = 179, + GUEST_NR_syslog = 116, + GUEST_NR_tee = 77, + GUEST_NR_tgkill = 131, + GUEST_NR_timer_create = 107, + GUEST_NR_timer_delete = 111, + GUEST_NR_timer_getoverrun = 109, + GUEST_NR_timer_gettime = 108, + GUEST_NR_timer_settime = 110, + GUEST_NR_timerfd_create = 85, + GUEST_NR_timerfd_gettime = 87, + GUEST_NR_timerfd_settime = 86, + GUEST_NR_times = 153, + GUEST_NR_tkill = 130, + GUEST_NR_truncate = 45, + GUEST_NR_umask = 166, + GUEST_NR_umount2 = 39, + GUEST_NR_uname = 160, + GUEST_NR_unlinkat = 35, + GUEST_NR_unshare = 97, + GUEST_NR_userfaultfd = 282, + GUEST_NR_utimensat = 88, + GUEST_NR_vhangup = 58, + GUEST_NR_vmsplice = 75, + GUEST_NR_wait4 = 260, + GUEST_NR_waitid = 95, + GUEST_NR_write = 64, + GUEST_NR_writev = 66, +}; + +} // namespace berberis + +#endif // BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_ diff --git a/guest_state/Android.bp b/guest_state/Android.bp index 100c65ea..cd637f28 100644 --- a/guest_state/Android.bp +++ b/guest_state/Android.bp @@ -98,6 +98,7 @@ cc_library_static { "berberis_guest_state_defaults", ], srcs: [ + "arm64/get_cpu_state.cc", "arm64/guest_state_arch.cc", ], header_libs: [ @@ -132,3 +133,15 @@ cc_test_library { header_libs: ["libberberis_guest_state_riscv64_headers"], export_header_lib_headers: ["libberberis_guest_state_riscv64_headers"], } + +cc_test_library { + name: "libberberis_cpu_state_arm64_unit_test", + defaults: [ + "berberis_test_library_defaults_64", + "berberis_guest_state_defaults", + "berberis_guest_state_headers_defaults", + ], + srcs: ["arm64/get_cpu_state_test.cc"], + header_libs: ["libberberis_guest_state_arm64_headers"], + export_header_lib_headers: ["libberberis_guest_state_arm64_headers"], +} diff --git a/guest_state/arm64/get_cpu_state.cc b/guest_state/arm64/get_cpu_state.cc new file mode 100644 index 00000000..894ed30b --- /dev/null +++ b/guest_state/arm64/get_cpu_state.cc @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "berberis/base/logging.h" +#include "berberis/guest_state/get_cpu_state_opaque.h" +#include "berberis/guest_state/guest_state_arch.h" +#include "berberis/guest_state/guest_state_opaque.h" +#include "native_bridge_support/guest_state_accessor/accessor.h" + +namespace berberis { + +extern "C" __attribute__((visibility("default"))) int LoadGuestStateRegisters( + const void* guest_state_data, + size_t guest_state_data_size, + NativeBridgeGuestRegs* guest_regs) { + if (guest_state_data_size < sizeof(ThreadState)) { + ALOGE("The guest state data size is invalid: %zu", guest_state_data_size); + return NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_INVALID_STATE; + } + guest_regs->guest_arch = NATIVE_BRIDGE_ARCH_ARM64; + return GetCpuState(guest_regs, &(static_cast<const ThreadState*>(guest_state_data))->cpu); +} + +int GetCpuState(NativeBridgeGuestRegs* guest_regs, const CPUState* state) { + if (guest_regs->guest_arch != NATIVE_BRIDGE_ARCH_ARM64) { + ALOGE("The guest architecture is unmatched: %lu", guest_regs->guest_arch); + return NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_UNSUPPORTED_ARCH; + } + memcpy(&guest_regs->regs_arm64.x, &state->x, sizeof(guest_regs->regs_arm64.x)); + memcpy(&guest_regs->regs_arm64.sp, &state->sp, sizeof(guest_regs->regs_arm64.sp)); + memcpy(&guest_regs->regs_arm64.ip, &state->insn_addr, sizeof(guest_regs->regs_arm64.ip)); + memcpy(&guest_regs->regs_arm64.v, &state->v, sizeof(guest_regs->regs_arm64.v)); + return 0; +} +} // namespace berberis diff --git a/guest_state/arm64/get_cpu_state_test.cc b/guest_state/arm64/get_cpu_state_test.cc new file mode 100644 index 00000000..8ea6bbcb --- /dev/null +++ b/guest_state/arm64/get_cpu_state_test.cc @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +#include <cstddef> +#include <cstring> + +#include "berberis/guest_state/get_cpu_state_opaque.h" +#include "berberis/guest_state/guest_state_arch.h" +#include "native_bridge_support/guest_state_accessor/accessor.h" + +namespace berberis { + +namespace { + +TEST(GetArm64CpuStateTest, TestValuesSet) { + NativeBridgeGuestRegs guest_regs{.guest_arch = NATIVE_BRIDGE_ARCH_ARM64}; + CPUState cpu_state; + for (size_t off = 0; off < sizeof(CPUState); off++) { + auto val = off % 199; // 199 is prime to avoid regularly repeating values in registers + memcpy(reinterpret_cast<char*>(&cpu_state) + off, &val, 1); + } + + EXPECT_EQ(GetCpuState(&guest_regs, &cpu_state), 0); + + for (std::size_t i = 0; i < 31; i++) { + EXPECT_EQ(guest_regs.regs_arm64.x[i], cpu_state.x[i]); + } + for (std::size_t i = 0; i < 32; i++) { + EXPECT_EQ(guest_regs.regs_arm64.v[i], cpu_state.v[i]); + } + EXPECT_EQ(guest_regs.regs_arm64.sp, cpu_state.sp); + EXPECT_EQ(guest_regs.regs_arm64.ip, cpu_state.insn_addr); +} + +TEST(GetArm64CpuStateTest, TestErrorSize) { + NativeBridgeGuestRegs guest_regs{.guest_arch = NATIVE_BRIDGE_ARCH_ARM64}; + int res = LoadGuestStateRegisters(nullptr, sizeof(ThreadState) - 1, &guest_regs); + EXPECT_EQ(res, NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_INVALID_STATE); +} + +TEST(GetArm64CpuStateTest, TestErrorArch) { + NativeBridgeGuestRegs guest_regs{.guest_arch = NATIVE_BRIDGE_ARCH_RISCV64}; + CPUState cpu_state; + int res = GetCpuState(&guest_regs, &cpu_state); + EXPECT_EQ(res, NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_UNSUPPORTED_ARCH); +} + +} // namespace + +} // namespace berberis
\ No newline at end of file diff --git a/guest_state/include/berberis/guest_state/get_cpu_state_opaque.h b/guest_state/include/berberis/guest_state/get_cpu_state_opaque.h index bafba201..6ef950de 100644 --- a/guest_state/include/berberis/guest_state/get_cpu_state_opaque.h +++ b/guest_state/include/berberis/guest_state/get_cpu_state_opaque.h @@ -22,8 +22,7 @@ namespace berberis { -void GetCpuState(NativeBridgeGuestRegs* guest_regs, const CPUState* state); - +int GetCpuState(NativeBridgeGuestRegs* guest_regs, const CPUState* state); } #endif // BERBERIS_GUEST_STATE_GET_CPU_STATE_OPAQUE_H_ diff --git a/guest_state/riscv64/get_cpu_state.cc b/guest_state/riscv64/get_cpu_state.cc index a16723ff..52bada89 100644 --- a/guest_state/riscv64/get_cpu_state.cc +++ b/guest_state/riscv64/get_cpu_state.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "berberis/base/checks.h" +#include "berberis/base/logging.h" #include "berberis/guest_state/get_cpu_state_opaque.h" #include "berberis/guest_state/guest_state_arch.h" #include "berberis/guest_state/guest_state_opaque.h" @@ -26,17 +26,23 @@ extern "C" __attribute__((visibility("default"))) int LoadGuestStateRegisters( const void* guest_state_data, size_t guest_state_data_size, NativeBridgeGuestRegs* guest_regs) { - CHECK_GT(guest_state_data_size, 0); + if (guest_state_data_size < sizeof(ThreadState)) { + ALOGE("The guest state data size is invalid: %zu", guest_state_data_size); + return NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_INVALID_STATE; + } guest_regs->guest_arch = NATIVE_BRIDGE_ARCH_RISCV64; - GetCpuState(guest_regs, &(static_cast<const ThreadState*>(guest_state_data))->cpu); - return 0; + return GetCpuState(guest_regs, &(static_cast<const ThreadState*>(guest_state_data))->cpu); } -void GetCpuState(NativeBridgeGuestRegs* guest_regs, const CPUState* state) { - CHECK_EQ(guest_regs->guest_arch, NATIVE_BRIDGE_ARCH_RISCV64); +int GetCpuState(NativeBridgeGuestRegs* guest_regs, const CPUState* state) { + if (guest_regs->guest_arch != NATIVE_BRIDGE_ARCH_RISCV64) { + ALOGE("The guest architecture is unmatched: %lu", guest_regs->guest_arch); + return NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_UNSUPPORTED_ARCH; + } memcpy(&guest_regs->regs_riscv64.x, &state->x, sizeof(guest_regs->regs_riscv64.x)); memcpy(&guest_regs->regs_riscv64.f, &state->f, sizeof(guest_regs->regs_riscv64.f)); memcpy(&guest_regs->regs_riscv64.v, &state->v, sizeof(guest_regs->regs_riscv64.v)); memcpy(&guest_regs->regs_riscv64.ip, &state->insn_addr, sizeof(guest_regs->regs_riscv64.ip)); + return 0; } } // namespace berberis diff --git a/guest_state/riscv64/get_cpu_state_test.cc b/guest_state/riscv64/get_cpu_state_test.cc index 38264d1e..e28d9261 100644 --- a/guest_state/riscv64/get_cpu_state_test.cc +++ b/guest_state/riscv64/get_cpu_state_test.cc @@ -35,7 +35,7 @@ TEST(GetCpuStateTest, TestValuesSet) { memcpy(reinterpret_cast<char*>(&cpu_state) + off, &val, 1); } - GetCpuState(&guest_regs, &cpu_state); + EXPECT_EQ(GetCpuState(&guest_regs, &cpu_state), 0); for (std::size_t i = 0; i < 32; i++) { EXPECT_EQ(guest_regs.regs_riscv64.x[i], cpu_state.x[i]); @@ -49,6 +49,19 @@ TEST(GetCpuStateTest, TestValuesSet) { EXPECT_EQ(guest_regs.regs_riscv64.ip, cpu_state.insn_addr); } +TEST(GetArm64CpuStateTest, TestErrorSize) { + NativeBridgeGuestRegs guest_regs{.guest_arch = NATIVE_BRIDGE_ARCH_RISCV64}; + int res = LoadGuestStateRegisters(nullptr, sizeof(ThreadState) - 1, &guest_regs); + EXPECT_EQ(res, NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_INVALID_STATE); +} + +TEST(GetArm64CpuStateTest, TestErrorArch) { + NativeBridgeGuestRegs guest_regs{.guest_arch = NATIVE_BRIDGE_ARCH_ARM64}; + CPUState cpu_state; + int res = GetCpuState(&guest_regs, &cpu_state); + EXPECT_EQ(res, NATIVE_BRIDGE_GUEST_STATE_ACCESSOR_ERROR_UNSUPPORTED_ARCH); +} + } // namespace } // namespace berberis
\ No newline at end of file diff --git a/heavy_optimizer/riscv64/frontend.h b/heavy_optimizer/riscv64/frontend.h index 30c5e149..17de50c8 100644 --- a/heavy_optimizer/riscv64/frontend.h +++ b/heavy_optimizer/riscv64/frontend.h @@ -376,7 +376,7 @@ class HeavyOptimizerFrontend { Gen<x86_64::AndbRegImm>(tmp, kCsrMask<kName>, GetFlagsRegister()); Gen<x86_64::MovbMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<kName>, tmp); } else if constexpr (sizeof(CsrFieldType<kName>) == 8) { - Gen<x86_64::AndqRegImm>( + Gen<x86_64::AndqRegMemAbsolute>( tmp, constants_pool::kConst<uint64_t{kCsrMask<kName>}>, GetFlagsRegister()); Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<kName>, tmp); } else { diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 661e4d92..01cbcaad 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -1148,7 +1148,7 @@ class Interpreter { if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) { if (register_mask != full_mask) { auto [simd_mask] = - intrinsics::BitMaskToSimdMaskForTests<ElementType>(Int64{MaskType{register_mask}}); + intrinsics::BitMaskToSimdMask<ElementType>(Int64{MaskType{register_mask}}); for (size_t field = 0; field < kSegmentSize; ++field) { if constexpr (vma == InactiveProcessing::kAgnostic) { // vstart equal to zero is supposed to be exceptional. From RISV-V V manual (page 14): @@ -1301,6 +1301,34 @@ class Interpreter { vta, vma, kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwmaccvf: + return OpVectorWidenvxw<intrinsics::Vfwmaccvf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwnmaccvf: + return OpVectorWidenvxw<intrinsics::Vfwnmaccvf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwmsacvf: + return OpVectorWidenvxw<intrinsics::Vfwmsacvf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwnmsacvf: + return OpVectorWidenvxw<intrinsics::Vfwnmsacvf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); default: break; } @@ -1415,6 +1443,30 @@ class Interpreter { vta, vma, kFrm>(args.dst, Vec<SignedType{}>{args.src1}, arg2); + case Decoder::VOpFVfOpcode::kVfmaccvf: + return OpVectorvxv<intrinsics::Vfmaccvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfmsacvf: + return OpVectorvxv<intrinsics::Vfmsacvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfmaddvf: + return OpVectorvxv<intrinsics::Vfmaddvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfmsubvf: + return OpVectorvxv<intrinsics::Vfmsubvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfnmaccvf: + return OpVectorvxv<intrinsics::Vfnmaccvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfnmsacvf: + return OpVectorvxv<intrinsics::Vfnmsacvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfnmaddvf: + return OpVectorvxv<intrinsics::Vfnmaddvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfnmsubvf: + return OpVectorvxv<intrinsics::Vfnmsubvf<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, arg2); default: return Undefined(); } @@ -1538,6 +1590,34 @@ class Interpreter { vta, vma, kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwmaccvv: + return OpVectorWidenvvw<intrinsics::Vfwmaccvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwnmaccvv: + return OpVectorWidenvvw<intrinsics::Vfwnmaccvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwmsacvv: + return OpVectorWidenvvw<intrinsics::Vfwmsacvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwnmsacvv: + return OpVectorWidenvvw<intrinsics::Vfwnmsacvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); case Decoder::VOpFVvOpcode::kVFUnary0: switch (args.vfunary0_opcode) { case Decoder::VFUnary0Opcode::kVfwcvtxufv: @@ -1763,6 +1843,14 @@ class Interpreter { break; case Decoder::VOpFVvOpcode::kVFUnary1: switch (args.vfunary1_opcode) { + case Decoder::VFUnary1Opcode::kVfsqrtv: + return OpVectorv<intrinsics::Vfsqrtv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1); + break; case Decoder::VFUnary1Opcode::kVfrsqrt7v: return OpVectorv<intrinsics::Vfrsqrt7v<ElementType>, ElementType, vlmul, vta, vma>( args.dst, args.src1); @@ -1827,6 +1915,46 @@ class Interpreter { vma, kFrm>( args.dst, Vec<SignedType{}>{args.src1}, Vec<SignedType{}>{args.src2}); + case Decoder::VOpFVvOpcode::kVfmaccvv: + return OpVectorvvv<intrinsics::Vfmaccvv<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfmsacvv: + return OpVectorvvv<intrinsics::Vfmsacvv<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfmaddvv: + return OpVectorvvv<intrinsics::Vfmaddvv<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfmsubvv: + return OpVectorvvv<intrinsics::Vfmsubvv<ElementType>, ElementType, vlmul, vta, vma, kFrm>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfnmaccvv: + return OpVectorvvv<intrinsics::Vfnmaccvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfnmsacvv: + return OpVectorvvv<intrinsics::Vfnmsacvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfnmaddvv: + return OpVectorvvv<intrinsics::Vfnmaddvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfnmsubvv: + return OpVectorvvv<intrinsics::Vfnmsubvv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); default: break; // Make compiler happy. } @@ -3217,10 +3345,15 @@ class Interpreter { typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, - auto vma> + auto vma, + CsrName... kExtraCsrs> void OpVectorvvv(uint8_t dst, uint8_t src1, uint8_t src2) { - return OpVectorSameWidth<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>( - dst, Vec{src1}, Vec{src2}, Vec{dst}); + return OpVectorSameWidth<Intrinsic, + ElementType, + NumberOfRegistersInvolved(vlmul), + vta, + vma, + kExtraCsrs...>(dst, Vec{src1}, Vec{src2}, Vec{dst}); } template <auto Intrinsic, @@ -3649,10 +3782,15 @@ class Interpreter { typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta, - auto vma> + auto vma, + CsrName... kExtraCsrs> void OpVectorvxv(uint8_t dst, uint8_t src1, ElementType arg2) { - return OpVectorSameWidth<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>( - dst, Vec{src1}, arg2, Vec{dst}); + return OpVectorSameWidth<Intrinsic, + ElementType, + NumberOfRegistersInvolved(vlmul), + vta, + vma, + kExtraCsrs...>(dst, Vec{src1}, arg2, Vec{dst}); } template <auto Intrinsic, diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 1cbdfcb2..de987a8f 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -1143,21 +1143,6 @@ class Riscv64InterpreterTest : public ::testing::Test { expected_result_int64); } - void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes, - const uint32_t (&expected_result_int32)[4][4], - const __v2du (&source)[16]) { - TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( - insn_bytes, source, expected_result_int32); - } - - void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes, - const uint16_t (&expected_result_int16)[4][8], - const uint32_t (&expected_result_int32)[4][4], - const __v2du (&source)[16]) { - TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( - insn_bytes, source, expected_result_int16, expected_result_int32); - } - void TestNarrowingVectorInstruction(uint32_t insn_bytes, const uint8_t (&expected_result_int8)[4][16], const uint16_t (&expected_result_int16)[4][8], @@ -1170,9 +1155,11 @@ class Riscv64InterpreterTest : public ::testing::Test { void TestWideningVectorFloatInstruction(uint32_t insn_bytes, const uint64_t (&expected_result_int64)[8][2], - const __v2du (&source)[16]) { - TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kWidening>( - insn_bytes, source, expected_result_int64); + const __v2du (&source)[16], + __m128i dst_result = kUndisturbedResult) { + TestVectorInstructionInternal<TestVectorInstructionKind::kFloat, + TestVectorInstructionMode::kWidening>( + insn_bytes, dst_result, source, expected_result_int64); } void TestWideningVectorFloatInstruction(uint32_t insn_bytes, @@ -1205,11 +1192,25 @@ class Riscv64InterpreterTest : public ::testing::Test { uint32_t insn_bytes, const __v2du (&source)[16], const ElementType (&... expected_result)[kResultsCount][kElementCount]) { - auto Verify = [this, &source](uint32_t insn_bytes, - uint8_t vsew, - uint8_t vlmul_max, - const auto& expected_result, - auto mask) { + TestVectorInstructionInternal<kTestVectorInstructionKind, kTestVectorInstructionMode>( + insn_bytes, kUndisturbedResult, source, expected_result...); + } + + template <TestVectorInstructionKind kTestVectorInstructionKind, + TestVectorInstructionMode kTestVectorInstructionMode, + typename... ElementType, + size_t... kResultsCount, + size_t... kElementCount> + void TestVectorInstructionInternal( + uint32_t insn_bytes, + __m128i dst_result, + const __v2du (&source)[16], + const ElementType (&... expected_result)[kResultsCount][kElementCount]) { + auto Verify = [this, &source, dst_result](uint32_t insn_bytes, + uint8_t vsew, + uint8_t vlmul_max, + const auto& expected_result, + auto mask) { // Mask register is, unconditionally, v0, and we need 8, 16, or 24 to handle full 8-registers // inputs thus we use v8..v15 for destination and place sources into v16..v23 and v24..v31. state_.cpu.v[0] = SIMD128Register{kMask}.Get<__uint128_t>(); @@ -1268,7 +1269,7 @@ class Riscv64InterpreterTest : public ::testing::Test { // Set expected_result vector registers into 0b01010101… pattern. for (size_t index = 0; index < 8; ++index) { - state_.cpu.v[8 + index] = SIMD128Register{kUndisturbedResult}.Get<__uint128_t>(); + state_.cpu.v[8 + index] = SIMD128Register{dst_result}.Get<__uint128_t>(); } state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); @@ -1282,14 +1283,14 @@ class Riscv64InterpreterTest : public ::testing::Test { std::copy_n(source, 8, expected_inactive); } else { // For most instructions, follow basic inactive processing rules based on vma flag. - std::fill_n(expected_inactive, 8, (vma ? kAgnosticResult : kUndisturbedResult)); + std::fill_n(expected_inactive, 8, (vma ? kAgnosticResult : dst_result)); } if (emul < 4) { for (size_t index = 0; index < 1 << emul; ++index) { if (index == 0 && emul == 2) { EXPECT_EQ(state_.cpu.v[8 + index], - ((kUndisturbedResult & kFractionMaskInt8[3]) | + ((dst_result & kFractionMaskInt8[3]) | (SIMD128Register{expected_result[index]} & mask[index] & ~kFractionMaskInt8[3]) | (expected_inactive[index] & ~mask[index] & ~kFractionMaskInt8[3])) @@ -1299,12 +1300,12 @@ class Riscv64InterpreterTest : public ::testing::Test { ((SIMD128Register{expected_result[index]} & mask[index] & kFractionMaskInt8[3]) | (expected_inactive[index] & ~mask[index] & kFractionMaskInt8[3]) | - ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[3])) + ((vta ? kAgnosticResult : dst_result) & ~kFractionMaskInt8[3])) .template Get<__uint128_t>()); } else if (index == 3 && emul == 2 && vta) { EXPECT_EQ(state_.cpu.v[8 + index], SIMD128Register{kAgnosticResult}); } else if (index == 3 && emul == 2) { - EXPECT_EQ(state_.cpu.v[8 + index], SIMD128Register{kUndisturbedResult}); + EXPECT_EQ(state_.cpu.v[8 + index], SIMD128Register{dst_result}); } else { EXPECT_EQ(state_.cpu.v[8 + index], ((SIMD128Register{expected_result[index]} & mask[index]) | @@ -1317,7 +1318,7 @@ class Riscv64InterpreterTest : public ::testing::Test { state_.cpu.v[8], ((SIMD128Register{expected_result[0]} & mask[0] & kFractionMaskInt8[emul - 4]) | (expected_inactive[0] & ~mask[0] & kFractionMaskInt8[emul - 4]) | - ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[emul - 4])) + ((vta ? kAgnosticResult : dst_result) & ~kFractionMaskInt8[emul - 4])) .template Get<__uint128_t>()); } @@ -1531,136 +1532,6 @@ class Riscv64InterpreterTest : public ::testing::Test { } } - template <bool kIsMasked, typename ElementType> - auto MaskForElemIfMasked() { - if constexpr (!kIsMasked) { - return kNoMask; - } else { - return MaskForElem<ElementType>(); - } - } - - template <bool kIsMasked> - void TestVectorIota(uint32_t insn_bytes, - const uint8_t (&expected_result_int8)[8][16], - const uint16_t (&expected_result_int16)[8][8], - const uint32_t (&expected_result_int32)[8][4], - const uint64_t (&expected_result_int64)[8][2], - const __v2du(&source)) { - TestVectorIota<kIsMasked>(insn_bytes, - source, - expected_result_int8, - expected_result_int16, - expected_result_int32, - expected_result_int64); - } - - template <bool kIsMasked, - typename... ElementType, - size_t... kResultsCount, - size_t... kElementCount> - void TestVectorIota(uint32_t insn_bytes, - const __v2du& src1, - const ElementType (&... expected_result)[kResultsCount][kElementCount]) { - const __uint128_t mask = SIMD128Register{kMask}.Get<__uint128_t>(); - const __uint128_t src = SIMD128Register{src1}.Get<__uint128_t>(); - const __uint128_t undisturbed = SIMD128Register{kUndisturbedResult}.Get<__uint128_t>(); - - auto Verify = [this, &src, &mask, &undisturbed](uint32_t insn_bytes, - uint8_t vsew, - const auto& expected_result, - auto elem_mask) { - state_.cpu.v[0] = mask; - state_.cpu.v[16] = src; - for (uint8_t vlmul = 0; vlmul < 8; ++vlmul) { - for (uint8_t vta = 0; vta < 2; ++vta) { - for (uint8_t vma = 0; vma < 2; ++vma) { - auto [vlmax, vtype] = - intrinsics::Vsetvl(~0ULL, (vma << 7) | (vta << 6) | (vsew << 3) | vlmul); - // Incompatible vsew and vlmax. Skip it. - if (vlmax == 0) { - continue; - } - - for (uint8_t vl = 0; vl < vlmax; vl += vlmax) { - // To make tests quick enough we don't test vl change with small register sets. Only - // with vlmul == 2 (4 registers) we set vl to skip last register and half of next-to - // last register. - if (vlmul == 2 && vl == vlmax) { - state_.cpu.vl = 5 * vlmax / 8; - } else { - state_.cpu.vl = vl; - } - - state_.cpu.vstart = 0; - state_.cpu.vtype = vtype; - for (size_t index = 0; index < 8; ++index) { - state_.cpu.v[8 + index] = undisturbed; - } - - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); - - __m128i expected_inactive[8]; - std::fill_n(expected_inactive, 8, (vma ? kAgnosticResult : kUndisturbedResult)); - - // vl of 0 should never change dst registers - if (vl == 0) { - for (size_t index = 0; index < 8; ++index) { - EXPECT_EQ(state_.cpu.v[8 + index], undisturbed); - } - } else if (vlmul < 4) { - for (size_t index = 0; index < 1 << vlmul; ++index) { - for (size_t index = 0; index < 1 << vlmul; ++index) { - if (index == 2 && vlmul == 2) { - EXPECT_EQ( - state_.cpu.v[8 + index], - ((SIMD128Register{expected_result[index]} & elem_mask[index] & - kFractionMaskInt8[3]) | - (expected_inactive[index] & ~elem_mask[index] & kFractionMaskInt8[3]) | - ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[3])) - .template Get<__uint128_t>()); - } else if (index == 3 && vlmul == 2) { - EXPECT_EQ(state_.cpu.v[8 + index], - SIMD128Register{vta ? kAgnosticResult : kUndisturbedResult}); - } else { - EXPECT_EQ(state_.cpu.v[8 + index], - ((SIMD128Register{expected_result[index]} & elem_mask[index]) | - (expected_inactive[index] & ~elem_mask[index])) - .template Get<__uint128_t>()); - } - // Every vector instruction must set vstart to 0, but shouldn't touch vl. - EXPECT_EQ(state_.cpu.vstart, 0); - if (vlmul == 2) { - EXPECT_EQ(state_.cpu.vl, 5 * vlmax / 8); - } else { - EXPECT_EQ(state_.cpu.vl, vlmax); - } - } - } - } else { - // vlmul >= 4 only uses 1 register - EXPECT_EQ( - state_.cpu.v[8], - ((SIMD128Register{expected_result[0]} & elem_mask[0] & - kFractionMaskInt8[vlmul - 4]) | - (expected_inactive[0] & ~elem_mask[0] & kFractionMaskInt8[vlmul - 4]) | - ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[vlmul - 4])) - .template Get<__uint128_t>()); - } - } - } - } - } - }; - - (Verify(insn_bytes, - BitUtilLog2(sizeof(ElementType)), - expected_result, - MaskForElemIfMasked<kIsMasked, ElementType>()), - ...); - } - void TestVectorMaskTargetInstruction(uint32_t insn_bytes, const uint32_t expected_result_int32, const uint16_t expected_result_int64, @@ -3964,265 +3835,6 @@ TEST_F(Riscv64InterpreterTest, TestVfrsqrt7) { kVectorCalculationsSource); } -TEST_F(Riscv64InterpreterTest, TestVfcvtxfv) { - TestVectorFloatInstruction(0x49801457, // Vfcvt.xu.f.v v8, v24, v0.t - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x49809457, // Vfcvt.x.f.v v8, v24, v0.t - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x8000'0000, 0x8000'0000, 0xffff'cacf, 0xc8cd'6a00}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00}, - {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}, - {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x49811457, // Vfcvt.f.xu.v v8, v24, v0.t - {{0x4f16'0492, 0x4f1e'0c9a, 0x4f06'1482, 0x4f0e'1c8a}, - {0x4f36'24b2, 0x4f3e'2cba, 0x4f26'34a2, 0x4f2e'3caa}, - {0x4f56'44d2, 0x4f5e'4cda, 0x4f46'54c2, 0x4f4e'5cca}, - {0x4f76'64f2, 0x4f7e'6cfa, 0x4f66'74e2, 0x4f6e'7cea}, - {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa}, - {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab}, - {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896}, - {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}}, - {{0x43e3'c193'4132'c092, 0x43e1'c391'4310'c290}, - {0x43e7'c597'4536'c496, 0x43e5'c795'4714'c694}, - {0x43eb'c99b'493a'c89a, 0x43e9'cb99'4b18'ca98}, - {0x43ef'cd9f'4d3e'cc9e, 0x43ed'cf9d'4f1c'ce9c}, - {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805}, - {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11}, - {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511}, - {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x49819457, // Vfcvt.f.x.v v8, v24, v0.t - {{0xced3'f6dc, 0xcec3'e6cc, 0xcef3'd6fc, 0xcee3'c6ec}, - {0xce93'b69c, 0xce83'a68c, 0xceb3'96bc, 0xcea3'86ac}, - {0xce26'ecb7, 0xce06'cc97, 0xce66'acf7, 0xce46'8cd7}, - {0xcd19'b0da, 0xcbc9'82cc, 0xcdcc'58ec, 0xcd8c'18ac}, - {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa}, - {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab}, - {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896}, - {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}}, - {{0xc3d8'7cd9'7d9a'7edc, 0xc3dc'78dd'79de'7adf}, - {0xc3d0'74d1'7592'76d3, 0xc3d4'70d5'71d6'72d7}, - {0xc3c0'd992'db14'dd97, 0xc3c8'd19a'd39c'd59f}, - {0xc379'3059'6099'b0da, 0xc3b1'8315'8719'8b1e}, - {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805}, - {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11}, - {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511}, - {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x49831457, // Vfcvt.rtz.xu.f.v v8, v24, v0.t - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x49839457, // Vfcvt.rtz.x.f.v v8, v24, v0.t - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x8000'0000, 0x8000'0000, 0xffff'cad0, 0xc8cd'6a00}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00}, - {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}, - {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49c41457, // Vfwcvt.xu.f.v v8, v28, v0.t - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'6229'6000'0000, 0x662d'6480'0000'0000}, - {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49849457, // Vfwcvt.x.f.v v8, v24, v0.t - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000}, - {0xffff'ffff'ffff'cacf, 0xffff'ffff'c8cd'6a00}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49861457, // Vfwcvt.f.f.v v8, v24, v0.t - {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000}, - {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000}, - {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, - {0xbcc6'9446'2000'0000, 0xbdc7'9547'0000'0000}, - {0xc2c8'9a48'0000'0000, 0xc3c9'9b49'2000'0000}, - {0xc0ca'984a'2000'0000, 0xc1cb'994b'0000'0000}, - {0xc6cc'9e4c'0000'0000, 0xc7cd'9f4d'2000'0000}, - {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49851457, // Vfwcvt.f.xu.v v8, v24, v0.t - {{0x4712'0000, 0x4716'0400, 0x471a'0900, 0x471e'0c00}, - {0x4702'1100, 0x4706'1400, 0x470a'1800, 0x470e'1c00}, - {0x4732'2000, 0x4736'2400, 0x473a'2900, 0x473e'2c00}, - {0x4722'3100, 0x4726'3400, 0x472a'3800, 0x472e'3c00}, - {0x4752'4000, 0x4756'4400, 0x475a'4900, 0x475e'4c00}, - {0x4742'5100, 0x4746'5400, 0x474a'5800, 0x474e'5c00}, - {0x4772'6000, 0x4776'6400, 0x477a'6900, 0x477e'6c00}, - {0x4762'7100, 0x4766'7400, 0x476a'7800, 0x476e'7c00}}, - {{0x41e2'c092'4000'0000, 0x41e3'c193'4120'0000}, - {0x41e0'c290'4220'0000, 0x41e1'c391'4300'0000}, - {0x41e6'c496'4400'0000, 0x41e7'c597'4520'0000}, - {0x41e4'c694'4620'0000, 0x41e5'c795'4700'0000}, - {0x41ea'c89a'4800'0000, 0x41eb'c99b'4920'0000}, - {0x41e8'ca98'4a20'0000, 0x41e9'cb99'4b00'0000}, - {0x41ee'cc9e'4c00'0000, 0x41ef'cd9f'4d20'0000}, - {0x41ec'ce9c'4e20'0000, 0x41ed'cf9d'4f00'0000}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49859457, // Vfwcvt.f.x.v v8, v24, v0.t - {{0xc6dc'0000, 0xc6d3'f800, 0xc6cb'ee00, 0xc6c3'e800}, - {0xc6fb'de00, 0xc6f3'd800, 0xc6eb'd000, 0xc6e3'c800}, - {0xc69b'c000, 0xc693'b800, 0xc68b'ae00, 0xc683'a800}, - {0xc6bb'9e00, 0xc6b3'9800, 0xc6ab'9000, 0xc6a3'8800}, - {0xc637'0000, 0xc626'f000, 0xc616'dc00, 0xc606'd000}, - {0xc676'bc00, 0xc666'b000, 0xc656'a000, 0xc646'9000}, - {0xc55a'0000, 0xc519'c000, 0xc4b2'e000, 0xc3ca'0000}, - {0xc5ec'7800, 0xc5cc'6000, 0xc5ac'4000, 0xc58c'2000}}, - {{0xc1da'7edb'8000'0000, 0xc1d8'7cd9'7dc0'0000}, - {0xc1de'7adf'7bc0'0000, 0xc1dc'78dd'7a00'0000}, - {0xc1d2'76d3'7800'0000, 0xc1d0'74d1'75c0'0000}, - {0xc1d6'72d7'73c0'0000, 0xc1d4'70d5'7200'0000}, - {0xc1c4'dd96'e000'0000, 0xc1c0'd992'db80'0000}, - {0xc1cc'd59e'd780'0000, 0xc1c8'd19a'd400'0000}, - {0xc1a3'361b'4000'0000, 0xc179'3059'7000'0000}, - {0xc1b9'8b1d'8f00'0000, 0xc1b1'8315'8800'0000}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49c71457, // Vfwcvt.rtz.xu.f.v v8, v28, v0.t - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'6229'6000'0000, 0x662d'6480'0000'0000}, - {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0x49879457, // Vfwcvt.rtz.x.f.v v8, v24, v0.t - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000}, - {0xffff'ffff'ffff'cad0, 0xffff'ffff'c8cd'6a00}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction( - 0x49881457, // Vfncvt.xu.f.w v8, v24, v0.t - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction( - 0x49889457, // Vfncvt.x.f.w v8, v24, v0.t - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x8000, 0x8000, 0xcacf, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction(0x498a1457, // Vfncvt.f.f.w v8, v24, v0.t - {{0x8000'0000, 0x8000'0000, 0xb165'd14e, 0x8000'0000}, - {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}, - {0x0000'0000, 0x0000'0000, 0x3561'd54a, 0x0000'0000}, - {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction(0x49891457, // Vfncvt.f.xu.w v8, v24, v0.t - {{0x5f1e'0c9a, 0x5f0e'1c8a, 0x5f3e'2cba, 0x5f2e'3caa}, - {0x5f5e'4cda, 0x5f4e'5cca, 0x5f7e'6cfa, 0x5f6e'7cea}, - {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, - {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction(0x49899457, // Vfncvt.f.x.w v8, v24, v0.t - {{0xdec3'e6cc, 0xdee3'c6ec, 0xde83'a68c, 0xdea3'86ac}, - {0xde06'cc97, 0xde46'8cd7, 0xdbc9'82cb, 0xdd8c'18ac}, - {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, - {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction( - 0x498b1457, // Vfncvt.rtz.xu.f.w v8, v24, v0.t - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - kVectorCalculationsSource); - TestNarrowingVectorFloatInstruction( - 0x498b9457, // Vfncvt.rtz.x.f.w v8, v24, v0.t - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x8000, 0x8000, 0xcad0, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, - kVectorCalculationsSource); -} - TEST_F(Riscv64InterpreterTest, TestVfmvfs) { TestVfmvfs<intrinsics::Float32>(0x428010d7, 0xffff'ffff'8302'8100); // Vfmv.f.s f1, v8 TestVfmvfs<intrinsics::Float64>(0x428010d7, 0x8706'8504'8302'8100); // Vfmv.f.s f1, v8 @@ -7628,394 +7240,6 @@ TEST_F(Riscv64InterpreterTest, TestVsm) { {0, 129, 2, 131, 4, 133, 6, 135, 8, 137, 10, 139, 12, 141, 14, 143}); } -TEST_F(Riscv64InterpreterTest, TestVadd) { - TestVectorInstruction( - 0x10c0457, // Vadd.vv v8, v16, v24, v0.t - {{0, 131, 6, 137, 13, 143, 18, 149, 25, 155, 30, 161, 36, 167, 42, 173}, - {48, 179, 54, 185, 61, 191, 66, 197, 73, 203, 78, 209, 84, 215, 90, 221}, - {96, 227, 102, 233, 109, 239, 114, 245, 121, 251, 126, 1, 132, 7, 138, 13}, - {144, 19, 150, 25, 157, 31, 162, 37, 169, 43, 174, 49, 180, 55, 186, 61}, - {192, 67, 198, 73, 205, 79, 210, 85, 217, 91, 222, 97, 228, 103, 234, 109}, - {240, 115, 246, 121, 253, 127, 2, 133, 9, 139, 14, 145, 20, 151, 26, 157}, - {32, 163, 38, 169, 45, 175, 50, 181, 57, 187, 62, 193, 68, 199, 74, 205}, - {80, 211, 86, 217, 93, 223, 98, 229, 105, 235, 110, 241, 116, 247, 122, 253}}, - {{0x8300, 0x8906, 0x8f0d, 0x9512, 0x9b19, 0xa11e, 0xa724, 0xad2a}, - {0xb330, 0xb936, 0xbf3d, 0xc542, 0xcb49, 0xd14e, 0xd754, 0xdd5a}, - {0xe360, 0xe966, 0xef6d, 0xf572, 0xfb79, 0x017e, 0x0784, 0x0d8a}, - {0x1390, 0x1996, 0x1f9d, 0x25a2, 0x2ba9, 0x31ae, 0x37b4, 0x3dba}, - {0x43c0, 0x49c6, 0x4fcd, 0x55d2, 0x5bd9, 0x61de, 0x67e4, 0x6dea}, - {0x73f0, 0x79f6, 0x7ffd, 0x8602, 0x8c09, 0x920e, 0x9814, 0x9e1a}, - {0xa420, 0xaa26, 0xb02d, 0xb632, 0xbc39, 0xc23e, 0xc844, 0xce4a}, - {0xd450, 0xda56, 0xe05d, 0xe662, 0xec69, 0xf26e, 0xf874, 0xfe7a}}, - {{0x8906'8300, 0x9512'8f0d, 0xa11e'9b19, 0xad2a'a724}, - {0xb936'b330, 0xc542'bf3d, 0xd14e'cb49, 0xdd5a'd754}, - {0xe966'e360, 0xf572'ef6d, 0x017e'fb79, 0x0d8b'0784}, - {0x1997'1390, 0x25a3'1f9d, 0x31af'2ba9, 0x3dbb'37b4}, - {0x49c7'43c0, 0x55d3'4fcd, 0x61df'5bd9, 0x6deb'67e4}, - {0x79f7'73f0, 0x8603'7ffd, 0x920f'8c09, 0x9e1b'9814}, - {0xaa27'a420, 0xb633'b02d, 0xc23f'bc39, 0xce4b'c844}, - {0xda57'd450, 0xe663'e05d, 0xf26f'ec69, 0xfe7b'f874}}, - {{0x9512'8f0d'8906'8300, 0xad2a'a724'a11e'9b19}, - {0xc542'bf3d'b936'b330, 0xdd5a'd754'd14e'cb49}, - {0xf572'ef6d'e966'e360, 0x0d8b'0785'017e'fb79}, - {0x25a3'1f9e'1997'1390, 0x3dbb'37b5'31af'2ba9}, - {0x55d3'4fce'49c7'43c0, 0x6deb'67e5'61df'5bd9}, - {0x8603'7ffe'79f7'73f0, 0x9e1b'9815'920f'8c09}, - {0xb633'b02e'aa27'a420, 0xce4b'c845'c23f'bc39}, - {0xe663'e05e'da57'd450, 0xfe7b'f875'f26f'ec69}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x100c457, // Vadd.vx v8, v16, x1, v0.t - {{170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180, 53, 182, 55, 184, 57}, - {186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196, 69, 198, 71, 200, 73}, - {202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212, 85, 214, 87, 216, 89}, - {218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228, 101, 230, 103, 232, 105}, - {234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244, 117, 246, 119, 248, 121}, - {250, 123, 252, 125, 254, 127, 0, 129, 2, 131, 4, 133, 6, 135, 8, 137}, - {10, 139, 12, 141, 14, 143, 16, 145, 18, 147, 20, 149, 22, 151, 24, 153}, - {26, 155, 28, 157, 30, 159, 32, 161, 34, 163, 36, 165, 38, 167, 40, 169}}, - {{0x2baa, 0x2dac, 0x2fae, 0x31b0, 0x33b2, 0x35b4, 0x37b6, 0x39b8}, - {0x3bba, 0x3dbc, 0x3fbe, 0x41c0, 0x43c2, 0x45c4, 0x47c6, 0x49c8}, - {0x4bca, 0x4dcc, 0x4fce, 0x51d0, 0x53d2, 0x55d4, 0x57d6, 0x59d8}, - {0x5bda, 0x5ddc, 0x5fde, 0x61e0, 0x63e2, 0x65e4, 0x67e6, 0x69e8}, - {0x6bea, 0x6dec, 0x6fee, 0x71f0, 0x73f2, 0x75f4, 0x77f6, 0x79f8}, - {0x7bfa, 0x7dfc, 0x7ffe, 0x8200, 0x8402, 0x8604, 0x8806, 0x8a08}, - {0x8c0a, 0x8e0c, 0x900e, 0x9210, 0x9412, 0x9614, 0x9816, 0x9a18}, - {0x9c1a, 0x9e1c, 0xa01e, 0xa220, 0xa422, 0xa624, 0xa826, 0xaa28}}, - {{0x2dad'2baa, 0x31b1'2fae, 0x35b5'33b2, 0x39b9'37b6}, - {0x3dbd'3bba, 0x41c1'3fbe, 0x45c5'43c2, 0x49c9'47c6}, - {0x4dcd'4bca, 0x51d1'4fce, 0x55d5'53d2, 0x59d9'57d6}, - {0x5ddd'5bda, 0x61e1'5fde, 0x65e5'63e2, 0x69e9'67e6}, - {0x6ded'6bea, 0x71f1'6fee, 0x75f5'73f2, 0x79f9'77f6}, - {0x7dfd'7bfa, 0x8201'7ffe, 0x8605'8402, 0x8a09'8806}, - {0x8e0d'8c0a, 0x9211'900e, 0x9615'9412, 0x9a19'9816}, - {0x9e1d'9c1a, 0xa221'a01e, 0xa625'a422, 0xaa29'a826}}, - {{0x31b1'2faf'2dad'2baa, 0x39b9'37b7'35b5'33b2}, - {0x41c1'3fbf'3dbd'3bba, 0x49c9'47c7'45c5'43c2}, - {0x51d1'4fcf'4dcd'4bca, 0x59d9'57d7'55d5'53d2}, - {0x61e1'5fdf'5ddd'5bda, 0x69e9'67e7'65e5'63e2}, - {0x71f1'6fef'6ded'6bea, 0x79f9'77f7'75f5'73f2}, - {0x8201'7fff'7dfd'7bfa, 0x8a09'8807'8605'8402}, - {0x9211'900f'8e0d'8c0a, 0x9a19'9817'9615'9412}, - {0xa221'a01f'9e1d'9c1a, 0xaa29'a827'a625'a422}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x10ab457, // Vadd.vi v8, v16, -0xb, v0.t - {{245, 118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 128, 1, 130, 3, 132}, - {5, 134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148}, - {21, 150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164}, - {37, 166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180}, - {53, 182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196}, - {69, 198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212}, - {85, 214, 87, 216, 89, 218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228}, - {101, 230, 103, 232, 105, 234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244}}, - {{0x80f5, 0x82f7, 0x84f9, 0x86fb, 0x88fd, 0x8aff, 0x8d01, 0x8f03}, - {0x9105, 0x9307, 0x9509, 0x970b, 0x990d, 0x9b0f, 0x9d11, 0x9f13}, - {0xa115, 0xa317, 0xa519, 0xa71b, 0xa91d, 0xab1f, 0xad21, 0xaf23}, - {0xb125, 0xb327, 0xb529, 0xb72b, 0xb92d, 0xbb2f, 0xbd31, 0xbf33}, - {0xc135, 0xc337, 0xc539, 0xc73b, 0xc93d, 0xcb3f, 0xcd41, 0xcf43}, - {0xd145, 0xd347, 0xd549, 0xd74b, 0xd94d, 0xdb4f, 0xdd51, 0xdf53}, - {0xe155, 0xe357, 0xe559, 0xe75b, 0xe95d, 0xeb5f, 0xed61, 0xef63}, - {0xf165, 0xf367, 0xf569, 0xf76b, 0xf96d, 0xfb6f, 0xfd71, 0xff73}}, - {{0x8302'80f5, 0x8706'84f9, 0x8b0a'88fd, 0x8f0e'8d01}, - {0x9312'9105, 0x9716'9509, 0x9b1a'990d, 0x9f1e'9d11}, - {0xa322'a115, 0xa726'a519, 0xab2a'a91d, 0xaf2e'ad21}, - {0xb332'b125, 0xb736'b529, 0xbb3a'b92d, 0xbf3e'bd31}, - {0xc342'c135, 0xc746'c539, 0xcb4a'c93d, 0xcf4e'cd41}, - {0xd352'd145, 0xd756'd549, 0xdb5a'd94d, 0xdf5e'dd51}, - {0xe362'e155, 0xe766'e559, 0xeb6a'e95d, 0xef6e'ed61}, - {0xf372'f165, 0xf776'f569, 0xfb7a'f96d, 0xff7e'fd71}}, - {{0x8706'8504'8302'80f5, 0x8f0e'8d0c'8b0a'88fd}, - {0x9716'9514'9312'9105, 0x9f1e'9d1c'9b1a'990d}, - {0xa726'a524'a322'a115, 0xaf2e'ad2c'ab2a'a91d}, - {0xb736'b534'b332'b125, 0xbf3e'bd3c'bb3a'b92d}, - {0xc746'c544'c342'c135, 0xcf4e'cd4c'cb4a'c93d}, - {0xd756'd554'd352'd145, 0xdf5e'dd5c'db5a'd94d}, - {0xe766'e564'e362'e155, 0xef6e'ed6c'eb6a'e95d}, - {0xf776'f574'f372'f165, 0xff7e'fd7c'fb7a'f96d}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x810c0457, // Vsaddu.vv v8, v16, v24, v0.t - {{0, 255, 6, 255, 13, 255, 18, 255, 25, 255, 30, 255, 36, 255, 42, 255}, - {48, 255, 54, 255, 61, 255, 66, 255, 73, 255, 78, 255, 84, 255, 90, 255}, - {96, 255, 102, 255, 109, 255, 114, 255, 121, 255, 126, 255, 132, 255, 138, 255}, - {144, 255, 150, 255, 157, 255, 162, 255, 169, 255, 174, 255, 180, 255, 186, 255}, - {192, 211, 198, 217, 205, 223, 210, 229, 217, 203, 222, 209, 228, 215, 234, 221}, - {240, 255, 246, 255, 253, 255, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, - {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xd3c0, 0xd9c6, 0xdfcd, 0xe5d2, 0xcbd9, 0xd1de, 0xd7e4, 0xddea}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xfc09, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, - {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xd9c6'd3c0, 0xe5d2'dfcd, 0xd1de'cbd9, 0xddea'd7e4}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xe5d2'dfcd'd9c6'd3c0, 0xddea'd7e4'd1de'cbd9}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x8100c457, // Vsaddu.vx v8, v16, x1, v0.t - {{170, 255, 172, 255, 174, 255, 176, 255, 178, 255, 180, 255, 182, 255, 184, 255}, - {186, 255, 188, 255, 190, 255, 192, 255, 194, 255, 196, 255, 198, 255, 200, 255}, - {202, 255, 204, 255, 206, 255, 208, 255, 210, 255, 212, 255, 214, 255, 216, 255}, - {218, 255, 220, 255, 222, 255, 224, 255, 226, 255, 228, 255, 230, 255, 232, 255}, - {234, 255, 236, 255, 238, 255, 240, 255, 242, 255, 244, 255, 246, 255, 248, 255}, - {250, 255, 252, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, - {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, - {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x810ab457, // Vsaddu.vi v8, v16, -0xb, v0.t - {{245, 255, 247, 255, 249, 255, 251, 255, 253, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, - {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, - {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, - {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, - {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, - {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, - {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x850c0457, // Vsadd.vv v8, v16, v24, v0.t - {{0, 128, 6, 128, 13, 128, 18, 128, 25, 128, 30, 128, 36, 128, 42, 128}, - {48, 128, 54, 128, 61, 128, 66, 128, 73, 128, 78, 128, 84, 128, 90, 128}, - {96, 128, 102, 128, 109, 128, 114, 133, 121, 128, 126, 128, 127, 128, 127, 128}, - {127, 163, 127, 169, 127, 175, 127, 181, 127, 155, 127, 161, 127, 167, 127, 173}, - {192, 211, 198, 217, 205, 223, 210, 229, 217, 203, 222, 209, 228, 215, 234, 221}, - {240, 3, 246, 9, 253, 15, 2, 21, 9, 251, 14, 1, 20, 7, 26, 13}, - {32, 51, 38, 57, 45, 63, 50, 69, 57, 43, 62, 49, 68, 55, 74, 61}, - {80, 99, 86, 105, 93, 111, 98, 117, 105, 91, 110, 97, 116, 103, 122, 109}}, - {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8572, 0x8000, 0x8000, 0x8000, 0x8000}, - {0xa390, 0xa996, 0xaf9d, 0xb5a2, 0x9ba9, 0xa1ae, 0xa7b4, 0xadba}, - {0xd3c0, 0xd9c6, 0xdfcd, 0xe5d2, 0xcbd9, 0xd1de, 0xd7e4, 0xddea}, - {0x03f0, 0x09f6, 0x0ffd, 0x1602, 0xfc09, 0x020e, 0x0814, 0x0e1a}, - {0x3420, 0x3a26, 0x402d, 0x4632, 0x2c39, 0x323e, 0x3844, 0x3e4a}, - {0x6450, 0x6a56, 0x705d, 0x7662, 0x5c69, 0x626e, 0x6874, 0x6e7a}}, - {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8573'7f6d, 0x8000'0000, 0x8000'0000}, - {0xa997'a390, 0xb5a3'af9d, 0xa1af'9ba9, 0xadbb'a7b4}, - {0xd9c6'd3c0, 0xe5d2'dfcd, 0xd1de'cbd9, 0xddea'd7e4}, - {0x09f7'03f0, 0x1603'0ffd, 0x020e'fc09, 0x0e1b'0814}, - {0x3a27'3420, 0x4633'402d, 0x323f'2c39, 0x3e4b'3844}, - {0x6a57'6450, 0x7663'705d, 0x626f'5c69, 0x6e7b'6874}}, - {{0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8573'7f6e'7967'7360, 0x8000'0000'0000'0000}, - {0xb5a3'af9e'a997'a390, 0xadbb'a7b5'a1af'9ba9}, - {0xe5d2'dfcd'd9c6'd3c0, 0xddea'd7e4'd1de'cbd9}, - {0x1603'0ffe'09f7'03f0, 0x0e1b'0815'020e'fc09}, - {0x4633'402e'3a27'3420, 0x3e4b'3845'323f'2c39}, - {0x7663'705e'6a57'6450, 0x6e7b'6875'626f'5c69}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x8500c457, // Vsadd.vx v8, v16, x1, v0.t - {{170, 128, 172, 128, 174, 128, 176, 128, 178, 128, 180, 128, 182, 128, 184, 128}, - {186, 128, 188, 128, 190, 128, 192, 128, 194, 128, 196, 128, 198, 128, 200, 128}, - {202, 128, 204, 128, 206, 128, 208, 128, 210, 128, 212, 128, 214, 128, 216, 128}, - {218, 128, 220, 128, 222, 128, 224, 128, 226, 128, 228, 128, 230, 128, 232, 128}, - {234, 128, 236, 128, 238, 128, 240, 128, 242, 128, 244, 128, 246, 128, 248, 128}, - {250, 128, 252, 128, 254, 128, 0, 129, 2, 131, 4, 133, 6, 135, 8, 137}, - {10, 139, 12, 141, 14, 143, 16, 145, 18, 147, 20, 149, 22, 151, 24, 153}, - {26, 155, 28, 157, 30, 159, 32, 161, 34, 163, 36, 165, 38, 167, 40, 169}}, - {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, - {0x8000, 0x8000, 0x8000, 0x8200, 0x8402, 0x8604, 0x8806, 0x8a08}, - {0x8c0a, 0x8e0c, 0x900e, 0x9210, 0x9412, 0x9614, 0x9816, 0x9a18}, - {0x9c1a, 0x9e1c, 0xa01e, 0xa220, 0xa422, 0xa624, 0xa826, 0xaa28}}, - {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, - {0x8000'0000, 0x8201'7ffe, 0x8605'8402, 0x8a09'8806}, - {0x8e0d'8c0a, 0x9211'900e, 0x9615'9412, 0x9a19'9816}, - {0x9e1d'9c1a, 0xa221'a01e, 0xa625'a422, 0xaa29'a826}}, - {{0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, - {0x8201'7fff'7dfd'7bfa, 0x8a09'8807'8605'8402}, - {0x9211'900f'8e0d'8c0a, 0x9a19'9817'9615'9412}, - {0xa221'a01f'9e1d'9c1a, 0xaa29'a827'a625'a422}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x850ab457, // Vsadd.vi v8, v16, -0xb, v0.t - {{245, 128, 247, 128, 249, 128, 251, 128, 253, 128, 255, 128, 1, 130, 3, 132}, - {5, 134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148}, - {21, 150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164}, - {37, 166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180}, - {53, 182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196}, - {69, 198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212}, - {85, 214, 87, 216, 89, 218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228}, - {101, 230, 103, 232, 105, 234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244}}, - {{0x80f5, 0x82f7, 0x84f9, 0x86fb, 0x88fd, 0x8aff, 0x8d01, 0x8f03}, - {0x9105, 0x9307, 0x9509, 0x970b, 0x990d, 0x9b0f, 0x9d11, 0x9f13}, - {0xa115, 0xa317, 0xa519, 0xa71b, 0xa91d, 0xab1f, 0xad21, 0xaf23}, - {0xb125, 0xb327, 0xb529, 0xb72b, 0xb92d, 0xbb2f, 0xbd31, 0xbf33}, - {0xc135, 0xc337, 0xc539, 0xc73b, 0xc93d, 0xcb3f, 0xcd41, 0xcf43}, - {0xd145, 0xd347, 0xd549, 0xd74b, 0xd94d, 0xdb4f, 0xdd51, 0xdf53}, - {0xe155, 0xe357, 0xe559, 0xe75b, 0xe95d, 0xeb5f, 0xed61, 0xef63}, - {0xf165, 0xf367, 0xf569, 0xf76b, 0xf96d, 0xfb6f, 0xfd71, 0xff73}}, - {{0x8302'80f5, 0x8706'84f9, 0x8b0a'88fd, 0x8f0e'8d01}, - {0x9312'9105, 0x9716'9509, 0x9b1a'990d, 0x9f1e'9d11}, - {0xa322'a115, 0xa726'a519, 0xab2a'a91d, 0xaf2e'ad21}, - {0xb332'b125, 0xb736'b529, 0xbb3a'b92d, 0xbf3e'bd31}, - {0xc342'c135, 0xc746'c539, 0xcb4a'c93d, 0xcf4e'cd41}, - {0xd352'd145, 0xd756'd549, 0xdb5a'd94d, 0xdf5e'dd51}, - {0xe362'e155, 0xe766'e559, 0xeb6a'e95d, 0xef6e'ed61}, - {0xf372'f165, 0xf776'f569, 0xfb7a'f96d, 0xff7e'fd71}}, - {{0x8706'8504'8302'80f5, 0x8f0e'8d0c'8b0a'88fd}, - {0x9716'9514'9312'9105, 0x9f1e'9d1c'9b1a'990d}, - {0xa726'a524'a322'a115, 0xaf2e'ad2c'ab2a'a91d}, - {0xb736'b534'b332'b125, 0xbf3e'bd3c'bb3a'b92d}, - {0xc746'c544'c342'c135, 0xcf4e'cd4c'cb4a'c93d}, - {0xd756'd554'd352'd145, 0xdf5e'dd5c'db5a'd94d}, - {0xe766'e564'e362'e155, 0xef6e'ed6c'eb6a'e95d}, - {0xf776'f574'f372'f165, 0xff7e'fd7c'fb7a'f96d}}, - kVectorCalculationsSource); - - TestVectorFloatInstruction(0x010c1457, // vfadd.vv v8, v16, v24, v0.t - {{0x9604'9200, 0x9e0c'9a09, 0x8b0a'ae29, 0x8f35'af92}, - {0xb624'b220, 0xbe2c'ba29, 0xa634'a233, 0xae3c'aa38}, - {0xd644'd240, 0xde4c'da49, 0xc654'c251, 0xce5c'ca58}, - {0xf664'f260, 0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78}, - {0xc342'c140, 0xc746'c544, 0xcb4a'c948, 0xcf4e'cd4c}, - {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, - {0xe362'e160, 0xe766'e4fe, 0xeb6a'e968, 0xef6e'ed6c}, - {0x76e2'8cfd, 0x7eec'78fb, 0xfb7a'f978, 0xff7e'fd7c}}, - {{0x9e0c'9a09'9604'9200, 0x8f0e'8d45'9f3b'9531}, - {0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231}, - {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}, - {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0x7eec'7ae9'76e4'72e0, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x0100d457, // vfadd.vf v8, v16, f1, v0.t - {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b3'ffe9, 0x40b3'e8a9, 0x409c'2858}, - {0xc33d'2140, 0xc746'bfa4, 0xcb4a'c942, 0xcf4e'cd4c}, - {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, - {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, - {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, - {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - - TestWideningVectorFloatInstruction(0xc10c1457, // vfwadd.vv v8, v16, v24, v0.t - {{0xbac0'9240'0000'4140, 0xbbc1'9341'2000'0043}, - {0xb961'55c5'1088'0000, 0xb9e6'b5f2'4000'0000}, - {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, - {0xbcc6'9446'6d4c'8c00, 0xbdc7'9547'004f'4e8e}, - {0xc2c8'9a48'0000'0000, 0xc3c9'9b49'2000'0000}, - {0xc0ca'984a'2000'0000, 0xc1cb'994b'0000'0000}, - {0xc6cc'9e4c'0000'0000, 0xc7cd'9f4d'2000'0000}, - {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}}, - kVectorCalculationsSource); - - TestWideningVectorFloatInstruction(0xd10c1457, // vfwadd.wv v8, v16, v24, v0.t - {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000}, - {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000}, - {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, - {0xbcc6'9446'2000'0000, 0xbf3e'bd3c'ea65'4738}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - - TestWideningVectorFloatInstruction(0xd100d457, // vfwadd.wf v8, v16, f1, v0.t - {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); -} - TEST_F(Riscv64InterpreterTest, TestVectorMaskInstructions) { TestVectorMaskInstruction(128, intrinsics::InactiveProcessing::kAgnostic, @@ -8087,813 +7311,6 @@ TEST_F(Riscv64InterpreterTest, TestVectorMaskInstructions) { {0x5505'5415'07d5'5f57, 0x4055'5511'5445'5115}); } -TEST_F(Riscv64InterpreterTest, TestIota) { - TestVectorIota<false>(0x53082457, // viota.m v8, v16 - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1}, - {2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5}, - {6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9}, - {10, 10, 11, 12, 12, 12, 12, 12, 12, 13, 14, 15, 15, 15, 15, 15}, - {16, 16, 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19}, - {20, 20, 21, 21, 22, 22, 22, 22, 22, 23, 24, 24, 25, 25, 25, 25}, - {26, 26, 26, 27, 28, 28, 28, 28, 28, 29, 29, 30, 31, 31, 31, 31}, - {32, 32, 33, 34, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39}}, - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001}, - {0x0002, 0x0002, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003}, - {0x0003, 0x0004, 0x0005, 0x0005, 0x0005, 0x0005, 0x0005, 0x0005}, - {0x0006, 0x0006, 0x0006, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007}, - {0x0007, 0x0008, 0x0008, 0x0009, 0x0009, 0x0009, 0x0009, 0x0009}, - {0x000a, 0x000a, 0x000b, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c}, - {0x000c, 0x000d, 0x000e, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0001, 0x0000'0001, 0x0000'0001}, - {0x0000'0001, 0x0000'0001, 0x0000'0001, 0x0000'0001}, - {0x0000'0002, 0x0000'0002, 0x0000'0003, 0x0000'0003}, - {0x0000'0003, 0x0000'0003, 0x0000'0003, 0x0000'0003}, - {0x0000'0003, 0x0000'0004, 0x0000'0005, 0x0000'0005}, - {0x0000'0005, 0x0000'0005, 0x0000'0005, 0x0000'0005}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0001}, - {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, - {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, - {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}}, - kVectorCalculationsSource[0]); - TestVectorIota<true>(0x51082457, // viota.m v8, v16, v0.t - {{0, 0x55, 0, 0, 0x55, 0, 0x55, 0, 0, 0x55, 1, 0x55, 1, 1, 0x55, 1}, - {2, 2, 0x55, 3, 0x55, 3, 3, 0x55, 3, 0x55, 4, 4, 0x55, 4, 0x55, 4}, - {5, 0x55, 5, 0x55, 6, 6, 0x55, 6, 0x55, 6, 6, 0x55, 7, 0x55, 7, 7}, - {8, 0x55, 8, 9, 0x55, 9, 0x55, 9, 9, 0x55, 10, 0x55, 11, 0x55, 11, 11}, - {12, 0x55, 12, 0x55, 12, 12, 0x55, 12, 12, 13, 0x55, 13, 14, 14, 14, 0x55}, - {14, 0x55, 14, 14, 0x55, 15, 15, 15, 0x55, 15, 16, 16, 17, 0x55, 17, 17}, - {18, 18, 0x55, 18, 19, 19, 0x55, 19, 19, 20, 20, 0x55, 21, 0x55, 21, 0x55}, - {21, 21, 22, 0x55, 23, 23, 23, 23, 0x55, 23, 0x55, 24, 0x55, 25, 25, 0x55}}, - {{0x0000, 0x5555, 0x0000, 0x0000, 0x5555, 0x0000, 0x5555, 0x0000}, - {0x0000, 0x5555, 0x0001, 0x5555, 0x0001, 0x0001, 0x5555, 0x0001}, - {0x0002, 0x0002, 0x5555, 0x0003, 0x5555, 0x0003, 0x0003, 0x5555}, - {0x0003, 0x5555, 0x0004, 0x0004, 0x5555, 0x0004, 0x5555, 0x0004}, - {0x0005, 0x5555, 0x0005, 0x5555, 0x0006, 0x0006, 0x5555, 0x0006}, - {0x5555, 0x0006, 0x0006, 0x5555, 0x0007, 0x5555, 0x0007, 0x0007}, - {0x0008, 0x5555, 0x0008, 0x0009, 0x5555, 0x0009, 0x5555, 0x0009}, - {0x0009, 0x5555, 0x000a, 0x5555, 0x000b, 0x5555, 0x000b, 0x000b}}, - {{0x0000'0000, 0x5555'5555, 0x0000'0000, 0x0000'0000}, - {0x5555'5555, 0x0000'0000, 0x5555'5555, 0x0000'0000}, - {0x0000'0000, 0x5555'5555, 0x0000'0001, 0x5555'5555}, - {0x0000'0001, 0x0000'0001, 0x5555'5555, 0x0000'0001}, - {0x0000'0002, 0x0000'0002, 0x5555'5555, 0x0000'0003}, - {0x5555'5555, 0x0000'0003, 0x0000'0003, 0x5555'5555}, - {0x0000'0003, 0x5555'5555, 0x0000'0004, 0x0000'0004}, - {0x5555'5555, 0x0000'0004, 0x5555'5555, 0x0000'0004}}, - {{0x0000'0000'0000'0000, 0x5555'5555'5555'5555}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x5555'5555'5555'5555, 0x0000'0000'0000'0000}, - {0x5555'5555'5555'5555, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x5555'5555'5555'5555}, - {0x0000'0000'0000'0001, 0x5555'5555'5555'5555}, - {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, - {0x5555'5555'5555'5555, 0x0000'0000'0000'0001}}, - kVectorCalculationsSource[0]); -} - -TEST_F(Riscv64InterpreterTest, TestVid) { - TestVectorInstruction( - 0x5008a457, // Vid.v v8, v0.t - {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, - {32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}, - {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}, - {64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}, - {80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}, - {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111}, - {112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}}, - {{0, 1, 2, 3, 4, 5, 6, 7}, - {8, 9, 10, 11, 12, 13, 14, 15}, - {16, 17, 18, 19, 20, 21, 22, 23}, - {24, 25, 26, 27, 28, 29, 30, 31}, - {32, 33, 34, 35, 36, 37, 38, 39}, - {40, 41, 42, 43, 44, 45, 46, 47}, - {48, 49, 50, 51, 52, 53, 54, 55}, - {56, 57, 58, 59, 60, 61, 62, 63}}, - {{0, 1, 2, 3}, - {4, 5, 6, 7}, - {8, 9, 10, 11}, - {12, 13, 14, 15}, - {16, 17, 18, 19}, - {20, 21, 22, 23}, - {24, 25, 26, 27}, - {28, 29, 30, 31}}, - {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}, {12, 13}, {14, 15}}, - kVectorCalculationsSourceLegacy); -} - -TEST_F(Riscv64InterpreterTest, TestVrsub) { - TestVectorInstruction( - 0xd00c457, // Vrsub.vi v8, v16, x1, v0.t - {{170, 41, 168, 39, 166, 37, 164, 35, 162, 33, 160, 31, 158, 29, 156, 27}, - {154, 25, 152, 23, 150, 21, 148, 19, 146, 17, 144, 15, 142, 13, 140, 11}, - {138, 9, 136, 7, 134, 5, 132, 3, 130, 1, 128, 255, 126, 253, 124, 251}, - {122, 249, 120, 247, 118, 245, 116, 243, 114, 241, 112, 239, 110, 237, 108, 235}, - {106, 233, 104, 231, 102, 229, 100, 227, 98, 225, 96, 223, 94, 221, 92, 219}, - {90, 217, 88, 215, 86, 213, 84, 211, 82, 209, 80, 207, 78, 205, 76, 203}, - {74, 201, 72, 199, 70, 197, 68, 195, 66, 193, 64, 191, 62, 189, 60, 187}, - {58, 185, 56, 183, 54, 181, 52, 179, 50, 177, 48, 175, 46, 173, 44, 171}}, - {{0x29aa, 0x27a8, 0x25a6, 0x23a4, 0x21a2, 0x1fa0, 0x1d9e, 0x1b9c}, - {0x199a, 0x1798, 0x1596, 0x1394, 0x1192, 0x0f90, 0x0d8e, 0x0b8c}, - {0x098a, 0x0788, 0x0586, 0x0384, 0x0182, 0xff80, 0xfd7e, 0xfb7c}, - {0xf97a, 0xf778, 0xf576, 0xf374, 0xf172, 0xef70, 0xed6e, 0xeb6c}, - {0xe96a, 0xe768, 0xe566, 0xe364, 0xe162, 0xdf60, 0xdd5e, 0xdb5c}, - {0xd95a, 0xd758, 0xd556, 0xd354, 0xd152, 0xcf50, 0xcd4e, 0xcb4c}, - {0xc94a, 0xc748, 0xc546, 0xc344, 0xc142, 0xbf40, 0xbd3e, 0xbb3c}, - {0xb93a, 0xb738, 0xb536, 0xb334, 0xb132, 0xaf30, 0xad2e, 0xab2c}}, - {{0x27a8'29aa, 0x23a4'25a6, 0x1fa0'21a2, 0x1b9c'1d9e}, - {0x1798'199a, 0x1394'1596, 0x0f90'1192, 0x0b8c'0d8e}, - {0x0788'098a, 0x0384'0586, 0xff80'0182, 0xfb7b'fd7e}, - {0xf777'f97a, 0xf373'f576, 0xef6f'f172, 0xeb6b'ed6e}, - {0xe767'e96a, 0xe363'e566, 0xdf5f'e162, 0xdb5b'dd5e}, - {0xd757'd95a, 0xd353'd556, 0xcf4f'd152, 0xcb4b'cd4e}, - {0xc747'c94a, 0xc343'c546, 0xbf3f'c142, 0xbb3b'bd3e}, - {0xb737'b93a, 0xb333'b536, 0xaf2f'b132, 0xab2b'ad2e}}, - {{0x23a4'25a6'27a8'29aa, 0x1b9c'1d9e'1fa0'21a2}, - {0x1394'1596'1798'199a, 0x0b8c'0d8e'0f90'1192}, - {0x0384'0586'0788'098a, 0xfb7b'fd7d'ff80'0182}, - {0xf373'f575'f777'f97a, 0xeb6b'ed6d'ef6f'f172}, - {0xe363'e565'e767'e96a, 0xdb5b'dd5d'df5f'e162}, - {0xd353'd555'd757'd95a, 0xcb4b'cd4d'cf4f'd152}, - {0xc343'c545'c747'c94a, 0xbb3b'bd3d'bf3f'c142}, - {0xb333'b535'b737'b93a, 0xab2b'ad2d'af2f'b132}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0xd0ab457, // Vrsub.vi v8, v16, -0xb, v0.t - {{245, 116, 243, 114, 241, 112, 239, 110, 237, 108, 235, 106, 233, 104, 231, 102}, - {229, 100, 227, 98, 225, 96, 223, 94, 221, 92, 219, 90, 217, 88, 215, 86}, - {213, 84, 211, 82, 209, 80, 207, 78, 205, 76, 203, 74, 201, 72, 199, 70}, - {197, 68, 195, 66, 193, 64, 191, 62, 189, 60, 187, 58, 185, 56, 183, 54}, - {181, 52, 179, 50, 177, 48, 175, 46, 173, 44, 171, 42, 169, 40, 167, 38}, - {165, 36, 163, 34, 161, 32, 159, 30, 157, 28, 155, 26, 153, 24, 151, 22}, - {149, 20, 147, 18, 145, 16, 143, 14, 141, 12, 139, 10, 137, 8, 135, 6}, - {133, 4, 131, 2, 129, 0, 127, 254, 125, 252, 123, 250, 121, 248, 119, 246}}, - {{0x7ef5, 0x7cf3, 0x7af1, 0x78ef, 0x76ed, 0x74eb, 0x72e9, 0x70e7}, - {0x6ee5, 0x6ce3, 0x6ae1, 0x68df, 0x66dd, 0x64db, 0x62d9, 0x60d7}, - {0x5ed5, 0x5cd3, 0x5ad1, 0x58cf, 0x56cd, 0x54cb, 0x52c9, 0x50c7}, - {0x4ec5, 0x4cc3, 0x4ac1, 0x48bf, 0x46bd, 0x44bb, 0x42b9, 0x40b7}, - {0x3eb5, 0x3cb3, 0x3ab1, 0x38af, 0x36ad, 0x34ab, 0x32a9, 0x30a7}, - {0x2ea5, 0x2ca3, 0x2aa1, 0x289f, 0x269d, 0x249b, 0x2299, 0x2097}, - {0x1e95, 0x1c93, 0x1a91, 0x188f, 0x168d, 0x148b, 0x1289, 0x1087}, - {0x0e85, 0x0c83, 0x0a81, 0x087f, 0x067d, 0x047b, 0x0279, 0x0077}}, - {{0x7cfd'7ef5, 0x78f9'7af1, 0x74f5'76ed, 0x70f1'72e9}, - {0x6ced'6ee5, 0x68e9'6ae1, 0x64e5'66dd, 0x60e1'62d9}, - {0x5cdd'5ed5, 0x58d9'5ad1, 0x54d5'56cd, 0x50d1'52c9}, - {0x4ccd'4ec5, 0x48c9'4ac1, 0x44c5'46bd, 0x40c1'42b9}, - {0x3cbd'3eb5, 0x38b9'3ab1, 0x34b5'36ad, 0x30b1'32a9}, - {0x2cad'2ea5, 0x28a9'2aa1, 0x24a5'269d, 0x20a1'2299}, - {0x1c9d'1e95, 0x1899'1a91, 0x1495'168d, 0x1091'1289}, - {0x0c8d'0e85, 0x0889'0a81, 0x0485'067d, 0x0081'0279}}, - {{0x78f9'7afb'7cfd'7ef5, 0x70f1'72f3'74f5'76ed}, - {0x68e9'6aeb'6ced'6ee5, 0x60e1'62e3'64e5'66dd}, - {0x58d9'5adb'5cdd'5ed5, 0x50d1'52d3'54d5'56cd}, - {0x48c9'4acb'4ccd'4ec5, 0x40c1'42c3'44c5'46bd}, - {0x38b9'3abb'3cbd'3eb5, 0x30b1'32b3'34b5'36ad}, - {0x28a9'2aab'2cad'2ea5, 0x20a1'22a3'24a5'269d}, - {0x1899'1a9b'1c9d'1e95, 0x1091'1293'1495'168d}, - {0x0889'0a8b'0c8d'0e85, 0x0081'0283'0485'067d}}, - kVectorCalculationsSourceLegacy); - - TestVectorFloatInstruction(0x9d00d457, // vfrsub.vf v8, v16, f1, v0.t - {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x40b4'0000, 0x40b4'0017, 0x40b4'1757, 0x40cb'd7a8}, - {0x4348'6140, 0x4746'cae4, 0x4b4a'c94e, 0x4f4e'cd4c}, - {0x5352'd150, 0x5756'd554, 0x5b5a'd958, 0x5f5e'dd5c}, - {0x6362'e160, 0x6766'e564, 0x6b6a'e968, 0x6f6e'ed6c}, - {0x7372'f170, 0x7776'f574, 0x7b7a'f978, 0x7f7e'fd7c}}, - {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'807a'f4f2'eceb}, - {0x4746'c544'c342'c140, 0x4f4e'cd4c'cb4a'c948}, - {0x5756'd554'd352'd150, 0x5f5e'dd5c'db5a'd958}, - {0x6766'e564'e362'e160, 0x6f6e'ed6c'eb6a'e968}, - {0x7776'f574'f372'f170, 0x7f7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); -} - -TEST_F(Riscv64InterpreterTest, TestVsub) { - TestVectorInstruction( - 0x90c0457, // Vsub.vv v8, v16, v24, v0.t - {{0, 127, 254, 125, 251, 123, 250, 121, 247, 119, 246, 117, 244, 115, 242, 113}, - {240, 111, 238, 109, 235, 107, 234, 105, 231, 103, 230, 101, 228, 99, 226, 97}, - {224, 95, 222, 93, 219, 91, 218, 89, 215, 87, 214, 85, 212, 83, 210, 81}, - {208, 79, 206, 77, 203, 75, 202, 73, 199, 71, 198, 69, 196, 67, 194, 65}, - {192, 63, 190, 61, 187, 59, 186, 57, 183, 55, 182, 53, 180, 51, 178, 49}, - {176, 47, 174, 45, 171, 43, 170, 41, 167, 39, 166, 37, 164, 35, 162, 33}, - {160, 31, 158, 29, 155, 27, 154, 25, 151, 23, 150, 21, 148, 19, 146, 17}, - {144, 15, 142, 13, 139, 11, 138, 9, 135, 7, 134, 5, 132, 3, 130, 1}}, - {{0x7f00, 0x7cfe, 0x7afb, 0x78fa, 0x76f7, 0x74f6, 0x72f4, 0x70f2}, - {0x6ef0, 0x6cee, 0x6aeb, 0x68ea, 0x66e7, 0x64e6, 0x62e4, 0x60e2}, - {0x5ee0, 0x5cde, 0x5adb, 0x58da, 0x56d7, 0x54d6, 0x52d4, 0x50d2}, - {0x4ed0, 0x4cce, 0x4acb, 0x48ca, 0x46c7, 0x44c6, 0x42c4, 0x40c2}, - {0x3ec0, 0x3cbe, 0x3abb, 0x38ba, 0x36b7, 0x34b6, 0x32b4, 0x30b2}, - {0x2eb0, 0x2cae, 0x2aab, 0x28aa, 0x26a7, 0x24a6, 0x22a4, 0x20a2}, - {0x1ea0, 0x1c9e, 0x1a9b, 0x189a, 0x1697, 0x1496, 0x1294, 0x1092}, - {0x0e90, 0x0c8e, 0x0a8b, 0x088a, 0x0687, 0x0486, 0x0284, 0x0082}}, - {{0x7cfe'7f00, 0x78fa'7afb, 0x74f6'76f7, 0x70f2'72f4}, - {0x6cee'6ef0, 0x68ea'6aeb, 0x64e6'66e7, 0x60e2'62e4}, - {0x5cde'5ee0, 0x58da'5adb, 0x54d6'56d7, 0x50d2'52d4}, - {0x4cce'4ed0, 0x48ca'4acb, 0x44c6'46c7, 0x40c2'42c4}, - {0x3cbe'3ec0, 0x38ba'3abb, 0x34b6'36b7, 0x30b2'32b4}, - {0x2cae'2eb0, 0x28aa'2aab, 0x24a6'26a7, 0x20a2'22a4}, - {0x1c9e'1ea0, 0x189a'1a9b, 0x1496'1697, 0x1092'1294}, - {0x0c8e'0e90, 0x088a'0a8b, 0x0486'0687, 0x0082'0284}}, - {{0x78fa'7afb'7cfe'7f00, 0x70f2'72f4'74f6'76f7}, - {0x68ea'6aeb'6cee'6ef0, 0x60e2'62e4'64e6'66e7}, - {0x58da'5adb'5cde'5ee0, 0x50d2'52d4'54d6'56d7}, - {0x48ca'4acb'4cce'4ed0, 0x40c2'42c4'44c6'46c7}, - {0x38ba'3abb'3cbe'3ec0, 0x30b2'32b4'34b6'36b7}, - {0x28aa'2aab'2cae'2eb0, 0x20a2'22a4'24a6'26a7}, - {0x189a'1a9b'1c9e'1ea0, 0x1092'1294'1496'1697}, - {0x088a'0a8b'0c8e'0e90, 0x0082'0284'0486'0687}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x900c457, // Vsub.vx v8, v16, x1, v0.t - {{86, 215, 88, 217, 90, 219, 92, 221, 94, 223, 96, 225, 98, 227, 100, 229}, - {102, 231, 104, 233, 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, 116, 245}, - {118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 128, 1, 130, 3, 132, 5}, - {134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148, 21}, - {150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164, 37}, - {166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180, 53}, - {182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196, 69}, - {198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212, 85}}, - {{0xd656, 0xd858, 0xda5a, 0xdc5c, 0xde5e, 0xe060, 0xe262, 0xe464}, - {0xe666, 0xe868, 0xea6a, 0xec6c, 0xee6e, 0xf070, 0xf272, 0xf474}, - {0xf676, 0xf878, 0xfa7a, 0xfc7c, 0xfe7e, 0x0080, 0x0282, 0x0484}, - {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, - {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, - {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, - {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, - {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, - {{0xd857'd656, 0xdc5b'da5a, 0xe05f'de5e, 0xe463'e262}, - {0xe867'e666, 0xec6b'ea6a, 0xf06f'ee6e, 0xf473'f272}, - {0xf877'f676, 0xfc7b'fa7a, 0x007f'fe7e, 0x0484'0282}, - {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, - {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, - {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, - {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, - {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, - {{0xdc5b'da59'd857'd656, 0xe463'e261'e05f'de5e}, - {0xec6b'ea69'e867'e666, 0xf473'f271'f06f'ee6e}, - {0xfc7b'fa79'f877'f676, 0x0484'0282'007f'fe7e}, - {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, - {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, - {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, - {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, - {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction(0x890c0457, // Vssubu.vv v8, v16, v24, v0.t - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 5, 0, 3, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 175, 0, 173, 0, 171, 0, 169, 0, 199, 0, 197, 0, 195, 0, 193}, - {0, 159, 0, 157, 0, 155, 0, 153, 0, 183, 0, 181, 0, 179, 0, 177}, - {0, 143, 0, 141, 0, 139, 0, 137, 0, 167, 0, 165, 0, 163, 0, 161}, - {0, 127, 0, 125, 0, 123, 0, 121, 0, 151, 0, 149, 0, 147, 0, 145}}, - {{0x0000, 0x0000, 0x0000, 0x0000, 0x06f7, 0x04f6, 0x02f4, 0x00f2}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0xaec0, 0xacbe, 0xaabb, 0xa8ba, 0xc6b7, 0xc4b6, 0xc2b4, 0xc0b2}, - {0x9eb0, 0x9cae, 0x9aab, 0x98aa, 0xb6a7, 0xb4a6, 0xb2a4, 0xb0a2}, - {0x8ea0, 0x8c9e, 0x8a9b, 0x889a, 0xa697, 0xa496, 0xa294, 0xa092}, - {0x7e90, 0x7c8e, 0x7a8b, 0x788a, 0x9687, 0x9486, 0x9284, 0x9082}}, - {{0x0000'0000, 0x0000'0000, 0x04f6'06f7, 0x00f2'02f4}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0xacbe'aec0, 0xa8ba'aabb, 0xc4b6'c6b7, 0xc0b2'c2b4}, - {0x9cae'9eb0, 0x98aa'9aab, 0xb4a6'b6a7, 0xb0a2'b2a4}, - {0x8c9e'8ea0, 0x889a'8a9b, 0xa496'a697, 0xa092'a294}, - {0x7c8e'7e90, 0x788a'7a8b, 0x9486'9687, 0x9082'9284}}, - {{0x0000'0000'0000'0000, 0x00f2'02f4'04f6'06f7}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0xa8ba'aabb'acbe'aec0, 0xc0b2'c2b4'c4b6'c6b7}, - {0x98aa'9aab'9cae'9eb0, 0xb0a2'b2a4'b4a6'b6a7}, - {0x889a'8a9b'8c9e'8ea0, 0xa092'a294'a496'a697}, - {0x788a'7a8b'7c8e'7e90, 0x9082'9284'9486'9687}}, - kVectorCalculationsSource); - TestVectorInstruction(0x8900c457, // Vssubu.vx v8, v16, x1, v0.t - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 5}, - {0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 19, 0, 21}, - {0, 23, 0, 25, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 37}, - {0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53}, - {0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69}, - {0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85}}, - {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, - {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0080, 0x0282, 0x0484}, - {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, - {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, - {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, - {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, - {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, - {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, - {0x0000'0000, 0x0000'0000, 0x007f'fe7e, 0x0484'0282}, - {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, - {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, - {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, - {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, - {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, - {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, - {0x0000'0000'0000'0000, 0x0484'0282'007f'fe7e}, - {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, - {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, - {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, - {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, - {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x8d0c0457, // Vssub.vv v8, v16, v24, v0.t - {{0, 239, 254, 237, 251, 235, 250, 233, 247, 7, 246, 5, 244, 3, 242, 1}, - {240, 223, 238, 221, 235, 219, 234, 217, 231, 247, 230, 245, 228, 243, 226, 241}, - {224, 207, 222, 205, 219, 203, 218, 201, 215, 231, 214, 229, 212, 227, 210, 225}, - {208, 191, 206, 189, 203, 187, 202, 185, 199, 215, 198, 213, 196, 211, 194, 209}, - {127, 175, 127, 173, 127, 171, 127, 169, 127, 199, 127, 197, 127, 195, 127, 193}, - {127, 159, 127, 157, 127, 155, 127, 153, 127, 183, 127, 181, 127, 179, 127, 177}, - {127, 143, 127, 141, 127, 139, 127, 137, 127, 167, 127, 165, 127, 163, 127, 161}, - {127, 128, 127, 128, 127, 128, 127, 128, 127, 151, 127, 149, 127, 147, 127, 145}}, - {{0xef00, 0xecfe, 0xeafb, 0xe8fa, 0x06f7, 0x04f6, 0x02f4, 0x00f2}, - {0xdef0, 0xdcee, 0xdaeb, 0xd8ea, 0xf6e7, 0xf4e6, 0xf2e4, 0xf0e2}, - {0xcee0, 0xccde, 0xcadb, 0xc8da, 0xe6d7, 0xe4d6, 0xe2d4, 0xe0d2}, - {0xbed0, 0xbcce, 0xbacb, 0xb8ca, 0xd6c7, 0xd4c6, 0xd2c4, 0xd0c2}, - {0xaec0, 0xacbe, 0xaabb, 0xa8ba, 0xc6b7, 0xc4b6, 0xc2b4, 0xc0b2}, - {0x9eb0, 0x9cae, 0x9aab, 0x98aa, 0xb6a7, 0xb4a6, 0xb2a4, 0xb0a2}, - {0x8ea0, 0x8c9e, 0x8a9b, 0x889a, 0xa697, 0xa496, 0xa294, 0xa092}, - {0x8000, 0x8000, 0x8000, 0x8000, 0x9687, 0x9486, 0x9284, 0x9082}}, - {{0xecfd'ef00, 0xe8f9'eafb, 0x04f6'06f7, 0x00f2'02f4}, - {0xdced'def0, 0xd8e9'daeb, 0xf4e5'f6e7, 0xf0e1'f2e4}, - {0xccdd'cee0, 0xc8d9'cadb, 0xe4d5'e6d7, 0xe0d1'e2d4}, - {0xbccd'bed0, 0xb8c9'bacb, 0xd4c5'd6c7, 0xd0c1'd2c4}, - {0xacbe'aec0, 0xa8ba'aabb, 0xc4b6'c6b7, 0xc0b2'c2b4}, - {0x9cae'9eb0, 0x98aa'9aab, 0xb4a6'b6a7, 0xb0a2'b2a4}, - {0x8c9e'8ea0, 0x889a'8a9b, 0xa496'a697, 0xa092'a294}, - {0x8000'0000, 0x8000'0000, 0x9486'9687, 0x9082'9284}}, - {{0xe8f9'eafa'ecfd'ef00, 0x00f2'02f4'04f6'06f7}, - {0xd8e9'daea'dced'def0, 0xf0e1'f2e3'f4e5'f6e7}, - {0xc8d9'cada'ccdd'cee0, 0xe0d1'e2d3'e4d5'e6d7}, - {0xb8c9'baca'bccd'bed0, 0xd0c1'd2c3'd4c5'd6c7}, - {0xa8ba'aabb'acbe'aec0, 0xc0b2'c2b4'c4b6'c6b7}, - {0x98aa'9aab'9cae'9eb0, 0xb0a2'b2a4'b4a6'b6a7}, - {0x889a'8a9b'8c9e'8ea0, 0xa092'a294'a496'a697}, - {0x8000'0000'0000'0000, 0x9082'9284'9486'9687}}, - kVectorCalculationsSource); - TestVectorInstruction( - 0x8d00c457, // Vssub.vx v8, v16, x1, v0.t - {{86, 215, 88, 217, 90, 219, 92, 221, 94, 223, 96, 225, 98, 227, 100, 229}, - {102, 231, 104, 233, 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, 116, 245}, - {118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 127, 1, 127, 3, 127, 5}, - {127, 7, 127, 9, 127, 11, 127, 13, 127, 15, 127, 17, 127, 19, 127, 21}, - {127, 23, 127, 25, 127, 27, 127, 29, 127, 31, 127, 33, 127, 35, 127, 37}, - {127, 39, 127, 41, 127, 43, 127, 45, 127, 47, 127, 49, 127, 51, 127, 53}, - {127, 55, 127, 57, 127, 59, 127, 61, 127, 63, 127, 65, 127, 67, 127, 69}, - {127, 71, 127, 73, 127, 75, 127, 77, 127, 79, 127, 81, 127, 83, 127, 85}}, - {{0xd656, 0xd858, 0xda5a, 0xdc5c, 0xde5e, 0xe060, 0xe262, 0xe464}, - {0xe666, 0xe868, 0xea6a, 0xec6c, 0xee6e, 0xf070, 0xf272, 0xf474}, - {0xf676, 0xf878, 0xfa7a, 0xfc7c, 0xfe7e, 0x0080, 0x0282, 0x0484}, - {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, - {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, - {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, - {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, - {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, - {{0xd857'd656, 0xdc5b'da5a, 0xe05f'de5e, 0xe463'e262}, - {0xe867'e666, 0xec6b'ea6a, 0xf06f'ee6e, 0xf473'f272}, - {0xf877'f676, 0xfc7b'fa7a, 0x007f'fe7e, 0x0484'0282}, - {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, - {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, - {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, - {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, - {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, - {{0xdc5b'da59'd857'd656, 0xe463'e261'e05f'de5e}, - {0xec6b'ea69'e867'e666, 0xf473'f271'f06f'ee6e}, - {0xfc7b'fa79'f877'f676, 0x0484'0282'007f'fe7e}, - {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, - {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, - {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, - {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, - {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, - kVectorCalculationsSource); - - TestVectorFloatInstruction(0x090c1457, // vfsub.vv v8,v16,v24,v0.t - {{0x1604'9200, 0x1e0c'9a09, 0x8b0a'63e7, 0x8ece'd50c}, - {0x3624'b220, 0x3e2c'ba29, 0x2634'a22f, 0x2e3c'aa38}, - {0x5644'd240, 0x5e4c'da49, 0x4654'c251, 0x4e5c'ca58}, - {0x7664'f260, 0x7e6c'fa69, 0x6674'e271, 0x6e7c'ea78}, - {0xc342'c140, 0xc746'c544, 0xcb4a'c948, 0xcf4e'cd4c}, - {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, - {0xe362'e160, 0xe766'e5ca, 0xeb6a'e968, 0xef6e'ed6c}, - {0xf6e6'58c3, 0xfeec'7cd7, 0xfb7a'f978, 0xff7e'fd7c}}, - {{0x1e0c'9a09'9604'9200, 0x8f0e'8cd3'76d9'7cdf}, - {0x3e2c'ba29'b624'b220, 0x2e3c'aa38'a634'a231}, - {0x5e4c'da49'd644'd240, 0x4e5c'ca58'c654'c251}, - {0x7e6c'fa69'f664'f260, 0x6e7c'ea78'e674'e271}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xfeec'7ae9'76e4'72e0, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - TestVectorFloatInstruction(0x0900d457, // vfsub.vf v8,v16,f1,v0.t - {{0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, - {0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, - {0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, - {0xc0b4'0000, 0xc0b4'0017, 0xc0b4'1757, 0xc0cb'd7a8}, - {0xc348'6140, 0xc746'cae4, 0xcb4a'c94e, 0xcf4e'cd4c}, - {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, - {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, - {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, - {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - - TestWideningVectorFloatInstruction(0xc90c1457, // vfwsub.vv v8, v16, v24, v0.t - {{0x3ac0'923f'ffff'bec0, 0x3bc1'9341'1fff'ffbd}, - {0xb961'4c7c'ef78'0000, 0xb9d9'daa1'8000'0000}, - {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000}, - {0x3cc6'9445'd2b3'7400, 0x3dc7'9546'ffb0'b172}, - {0x42c8'9a48'0000'0000, 0x43c9'9b49'2000'0000}, - {0x40ca'984a'2000'0000, 0x41cb'994b'0000'0000}, - {0x46cc'9e4c'0000'0000, 0x47cd'9f4d'2000'0000}, - {0x44ce'9c4e'2000'0000, 0x45cf'9d4f'0000'0000}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0xc900d457, // vfwsub.vf v8, v16, f1, v0.t - {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0003}, - {0xc016'8000'0000'02ab, 0xc016'8000'0002'bab5}, - {0xc016'8000'02ca'c4c0, 0xc016'8002'dad4'd000}, - {0xc016'82ea'e4e0'0000, 0xc019'7af4'f000'0000}}, - kVectorCalculationsSource); - - TestWideningVectorFloatInstruction(0xd90c1457, // vfwsub.wv v8, v16, v24, v0.t - {{0x3ac0'9240'0000'0000, 0x3bc1'9341'2000'0000}, - {0x38c2'9042'2000'0000, 0x39c3'9143'0000'0000}, - {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000}, - {0x3cc6'9446'2000'0000, 0xbf3e'bd3c'8c10'2b38}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0xd900d457, // vfwsub.wf v8, v16, f1, v0.t - {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, - {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, - kVectorCalculationsSource); -} - -TEST_F(Riscv64InterpreterTest, TestVand) { - TestVectorInstruction( - 0x250c0457, // Vand.vv v8, v16, v24, v0.t - {{0, 0, 0, 2, 0, 0, 4, 6, 0, 0, 0, 2, 8, 8, 12, 14}, - {0, 0, 0, 2, 0, 0, 4, 6, 16, 16, 16, 18, 24, 24, 28, 30}, - {0, 0, 0, 2, 0, 0, 4, 6, 0, 0, 0, 2, 8, 8, 12, 14}, - {32, 32, 32, 34, 32, 32, 36, 38, 48, 48, 48, 50, 56, 56, 60, 62}, - {0, 128, 0, 130, 0, 128, 4, 134, 0, 128, 0, 130, 8, 136, 12, 142}, - {0, 128, 0, 130, 0, 128, 4, 134, 16, 144, 16, 146, 24, 152, 28, 158}, - {64, 192, 64, 194, 64, 192, 68, 198, 64, 192, 64, 194, 72, 200, 76, 206}, - {96, 224, 96, 226, 96, 224, 100, 230, 112, 240, 112, 242, 120, 248, 124, 254}}, - {{0x0000, 0x0200, 0x0000, 0x0604, 0x0000, 0x0200, 0x0808, 0x0e0c}, - {0x0000, 0x0200, 0x0000, 0x0604, 0x1010, 0x1210, 0x1818, 0x1e1c}, - {0x0000, 0x0200, 0x0000, 0x0604, 0x0000, 0x0200, 0x0808, 0x0e0c}, - {0x2020, 0x2220, 0x2020, 0x2624, 0x3030, 0x3230, 0x3838, 0x3e3c}, - {0x8000, 0x8200, 0x8000, 0x8604, 0x8000, 0x8200, 0x8808, 0x8e0c}, - {0x8000, 0x8200, 0x8000, 0x8604, 0x9010, 0x9210, 0x9818, 0x9e1c}, - {0xc040, 0xc240, 0xc040, 0xc644, 0xc040, 0xc240, 0xc848, 0xce4c}, - {0xe060, 0xe260, 0xe060, 0xe664, 0xf070, 0xf270, 0xf878, 0xfe7c}}, - {{0x0200'0000, 0x0604'0000, 0x0200'0000, 0x0e0c'0808}, - {0x0200'0000, 0x0604'0000, 0x1210'1010, 0x1e1c'1818}, - {0x0200'0000, 0x0604'0000, 0x0200'0000, 0x0e0c'0808}, - {0x2220'2020, 0x2624'2020, 0x3230'3030, 0x3e3c'3838}, - {0x8200'8000, 0x8604'8000, 0x8200'8000, 0x8e0c'8808}, - {0x8200'8000, 0x8604'8000, 0x9210'9010, 0x9e1c'9818}, - {0xc240'c040, 0xc644'c040, 0xc240'c040, 0xce4c'c848}, - {0xe260'e060, 0xe664'e060, 0xf270'f070, 0xfe7c'f878}}, - {{0x0604'0000'0200'0000, 0x0e0c'0808'0200'0000}, - {0x0604'0000'0200'0000, 0x1e1c'1818'1210'1010}, - {0x0604'0000'0200'0000, 0x0e0c'0808'0200'0000}, - {0x2624'2020'2220'2020, 0x3e3c'3838'3230'3030}, - {0x8604'8000'8200'8000, 0x8e0c'8808'8200'8000}, - {0x8604'8000'8200'8000, 0x9e1c'9818'9210'9010}, - {0xc644'c040'c240'c040, 0xce4c'c848'c240'c040}, - {0xe664'e060'e260'e060, 0xfe7c'f878'f270'f070}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction(0x2500c457, // Vand.vx v8, v16, x1, v0.t - {{0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, - {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, - {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, - {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, - {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, - {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, - {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, - {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}}, - {{0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, - {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, - {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, - {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, - {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, - {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, - {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, - {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}}, - {{0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, - {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, - {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, - {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, - {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, - {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, - {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, - {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}}, - {{0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, - {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, - {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, - {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, - {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, - {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, - {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, - {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x250ab457, // Vand.vi v8, v16, -0xb, v0.t - {{0, 129, 0, 129, 4, 133, 4, 133, 0, 129, 0, 129, 4, 133, 4, 133}, - {16, 145, 16, 145, 20, 149, 20, 149, 16, 145, 16, 145, 20, 149, 20, 149}, - {32, 161, 32, 161, 36, 165, 36, 165, 32, 161, 32, 161, 36, 165, 36, 165}, - {48, 177, 48, 177, 52, 181, 52, 181, 48, 177, 48, 177, 52, 181, 52, 181}, - {64, 193, 64, 193, 68, 197, 68, 197, 64, 193, 64, 193, 68, 197, 68, 197}, - {80, 209, 80, 209, 84, 213, 84, 213, 80, 209, 80, 209, 84, 213, 84, 213}, - {96, 225, 96, 225, 100, 229, 100, 229, 96, 225, 96, 225, 100, 229, 100, 229}, - {112, 241, 112, 241, 116, 245, 116, 245, 112, 241, 112, 241, 116, 245, 116, 245}}, - {{0x8100, 0x8300, 0x8504, 0x8704, 0x8900, 0x8b00, 0x8d04, 0x8f04}, - {0x9110, 0x9310, 0x9514, 0x9714, 0x9910, 0x9b10, 0x9d14, 0x9f14}, - {0xa120, 0xa320, 0xa524, 0xa724, 0xa920, 0xab20, 0xad24, 0xaf24}, - {0xb130, 0xb330, 0xb534, 0xb734, 0xb930, 0xbb30, 0xbd34, 0xbf34}, - {0xc140, 0xc340, 0xc544, 0xc744, 0xc940, 0xcb40, 0xcd44, 0xcf44}, - {0xd150, 0xd350, 0xd554, 0xd754, 0xd950, 0xdb50, 0xdd54, 0xdf54}, - {0xe160, 0xe360, 0xe564, 0xe764, 0xe960, 0xeb60, 0xed64, 0xef64}, - {0xf170, 0xf370, 0xf574, 0xf774, 0xf970, 0xfb70, 0xfd74, 0xff74}}, - {{0x8302'8100, 0x8706'8504, 0x8b0a'8900, 0x8f0e'8d04}, - {0x9312'9110, 0x9716'9514, 0x9b1a'9910, 0x9f1e'9d14}, - {0xa322'a120, 0xa726'a524, 0xab2a'a920, 0xaf2e'ad24}, - {0xb332'b130, 0xb736'b534, 0xbb3a'b930, 0xbf3e'bd34}, - {0xc342'c140, 0xc746'c544, 0xcb4a'c940, 0xcf4e'cd44}, - {0xd352'd150, 0xd756'd554, 0xdb5a'd950, 0xdf5e'dd54}, - {0xe362'e160, 0xe766'e564, 0xeb6a'e960, 0xef6e'ed64}, - {0xf372'f170, 0xf776'f574, 0xfb7a'f970, 0xff7e'fd74}}, - {{0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8900}, - {0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9910}, - {0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a920}, - {0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b930}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c940}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd950}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e960}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f970}}, - kVectorCalculationsSourceLegacy); -} - -TEST_F(Riscv64InterpreterTest, TestVor) { - TestVectorInstruction( - 0x290c0457, // Vor.vv v8, v16, v24, v0.t - {{0, 131, 6, 135, 13, 143, 14, 143, 25, 155, 30, 159, 28, 159, 30, 159}, - {48, 179, 54, 183, 61, 191, 62, 191, 57, 187, 62, 191, 60, 191, 62, 191}, - {96, 227, 102, 231, 109, 239, 110, 239, 121, 251, 126, 255, 124, 255, 126, 255}, - {112, 243, 118, 247, 125, 255, 126, 255, 121, 251, 126, 255, 124, 255, 126, 255}, - {192, 195, 198, 199, 205, 207, 206, 207, 217, 219, 222, 223, 220, 223, 222, 223}, - {240, 243, 246, 247, 253, 255, 254, 255, 249, 251, 254, 255, 252, 255, 254, 255}, - {224, 227, 230, 231, 237, 239, 238, 239, 249, 251, 254, 255, 252, 255, 254, 255}, - {240, 243, 246, 247, 253, 255, 254, 255, 249, 251, 254, 255, 252, 255, 254, 255}}, - {{0x8300, 0x8706, 0x8f0d, 0x8f0e, 0x9b19, 0x9f1e, 0x9f1c, 0x9f1e}, - {0xb330, 0xb736, 0xbf3d, 0xbf3e, 0xbb39, 0xbf3e, 0xbf3c, 0xbf3e}, - {0xe360, 0xe766, 0xef6d, 0xef6e, 0xfb79, 0xff7e, 0xff7c, 0xff7e}, - {0xf370, 0xf776, 0xff7d, 0xff7e, 0xfb79, 0xff7e, 0xff7c, 0xff7e}, - {0xc3c0, 0xc7c6, 0xcfcd, 0xcfce, 0xdbd9, 0xdfde, 0xdfdc, 0xdfde}, - {0xf3f0, 0xf7f6, 0xfffd, 0xfffe, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}, - {0xe3e0, 0xe7e6, 0xefed, 0xefee, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}, - {0xf3f0, 0xf7f6, 0xfffd, 0xfffe, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}}, - {{0x8706'8300, 0x8f0e'8f0d, 0x9f1e'9b19, 0x9f1e'9f1c}, - {0xb736'b330, 0xbf3e'bf3d, 0xbf3e'bb39, 0xbf3e'bf3c}, - {0xe766'e360, 0xef6e'ef6d, 0xff7e'fb79, 0xff7e'ff7c}, - {0xf776'f370, 0xff7e'ff7d, 0xff7e'fb79, 0xff7e'ff7c}, - {0xc7c6'c3c0, 0xcfce'cfcd, 0xdfde'dbd9, 0xdfde'dfdc}, - {0xf7f6'f3f0, 0xfffe'fffd, 0xfffe'fbf9, 0xfffe'fffc}, - {0xe7e6'e3e0, 0xefee'efed, 0xfffe'fbf9, 0xfffe'fffc}, - {0xf7f6'f3f0, 0xfffe'fffd, 0xfffe'fbf9, 0xfffe'fffc}}, - {{0x8f0e'8f0d'8706'8300, 0x9f1e'9f1c'9f1e'9b19}, - {0xbf3e'bf3d'b736'b330, 0xbf3e'bf3c'bf3e'bb39}, - {0xef6e'ef6d'e766'e360, 0xff7e'ff7c'ff7e'fb79}, - {0xff7e'ff7d'f776'f370, 0xff7e'ff7c'ff7e'fb79}, - {0xcfce'cfcd'c7c6'c3c0, 0xdfde'dfdc'dfde'dbd9}, - {0xfffe'fffd'f7f6'f3f0, 0xfffe'fffc'fffe'fbf9}, - {0xefee'efed'e7e6'e3e0, 0xfffe'fffc'fffe'fbf9}, - {0xfffe'fffd'f7f6'f3f0, 0xfffe'fffc'fffe'fbf9}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x2900c457, // Vor.vx v8, v16, x1, v0.t - {{170, 171, 170, 171, 174, 175, 174, 175, 170, 171, 170, 171, 174, 175, 174, 175}, - {186, 187, 186, 187, 190, 191, 190, 191, 186, 187, 186, 187, 190, 191, 190, 191}, - {170, 171, 170, 171, 174, 175, 174, 175, 170, 171, 170, 171, 174, 175, 174, 175}, - {186, 187, 186, 187, 190, 191, 190, 191, 186, 187, 186, 187, 190, 191, 190, 191}, - {234, 235, 234, 235, 238, 239, 238, 239, 234, 235, 234, 235, 238, 239, 238, 239}, - {250, 251, 250, 251, 254, 255, 254, 255, 250, 251, 250, 251, 254, 255, 254, 255}, - {234, 235, 234, 235, 238, 239, 238, 239, 234, 235, 234, 235, 238, 239, 238, 239}, - {250, 251, 250, 251, 254, 255, 254, 255, 250, 251, 250, 251, 254, 255, 254, 255}}, - {{0xabaa, 0xabaa, 0xafae, 0xafae, 0xabaa, 0xabaa, 0xafae, 0xafae}, - {0xbbba, 0xbbba, 0xbfbe, 0xbfbe, 0xbbba, 0xbbba, 0xbfbe, 0xbfbe}, - {0xabaa, 0xabaa, 0xafae, 0xafae, 0xabaa, 0xabaa, 0xafae, 0xafae}, - {0xbbba, 0xbbba, 0xbfbe, 0xbfbe, 0xbbba, 0xbbba, 0xbfbe, 0xbfbe}, - {0xebea, 0xebea, 0xefee, 0xefee, 0xebea, 0xebea, 0xefee, 0xefee}, - {0xfbfa, 0xfbfa, 0xfffe, 0xfffe, 0xfbfa, 0xfbfa, 0xfffe, 0xfffe}, - {0xebea, 0xebea, 0xefee, 0xefee, 0xebea, 0xebea, 0xefee, 0xefee}, - {0xfbfa, 0xfbfa, 0xfffe, 0xfffe, 0xfbfa, 0xfbfa, 0xfffe, 0xfffe}}, - {{0xabaa'abaa, 0xafae'afae, 0xabaa'abaa, 0xafae'afae}, - {0xbbba'bbba, 0xbfbe'bfbe, 0xbbba'bbba, 0xbfbe'bfbe}, - {0xabaa'abaa, 0xafae'afae, 0xabaa'abaa, 0xafae'afae}, - {0xbbba'bbba, 0xbfbe'bfbe, 0xbbba'bbba, 0xbfbe'bfbe}, - {0xebea'ebea, 0xefee'efee, 0xebea'ebea, 0xefee'efee}, - {0xfbfa'fbfa, 0xfffe'fffe, 0xfbfa'fbfa, 0xfffe'fffe}, - {0xebea'ebea, 0xefee'efee, 0xebea'ebea, 0xefee'efee}, - {0xfbfa'fbfa, 0xfffe'fffe, 0xfbfa'fbfa, 0xfffe'fffe}}, - {{0xafae'afae'abaa'abaa, 0xafae'afae'abaa'abaa}, - {0xbfbe'bfbe'bbba'bbba, 0xbfbe'bfbe'bbba'bbba}, - {0xafae'afae'abaa'abaa, 0xafae'afae'abaa'abaa}, - {0xbfbe'bfbe'bbba'bbba, 0xbfbe'bfbe'bbba'bbba}, - {0xefee'efee'ebea'ebea, 0xefee'efee'ebea'ebea}, - {0xfffe'fffe'fbfa'fbfa, 0xfffe'fffe'fbfa'fbfa}, - {0xefee'efee'ebea'ebea, 0xefee'efee'ebea'ebea}, - {0xfffe'fffe'fbfa'fbfa, 0xfffe'fffe'fbfa'fbfa}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x290ab457, // Vor.vi v8, v16, -0xb, v0.t - {{245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, - {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}}, - {{0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, - {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}}, - {{0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, - {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}}, - {{0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, - {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}}, - kVectorCalculationsSourceLegacy); -} - -TEST_F(Riscv64InterpreterTest, TestVxor) { - TestVectorInstruction( - 0x2d0c0457, // Vxor.vv v8, v16, v24, v0.t - {{0, 131, 6, 133, 13, 143, 10, 137, 25, 155, 30, 157, 20, 151, 18, 145}, - {48, 179, 54, 181, 61, 191, 58, 185, 41, 171, 46, 173, 36, 167, 34, 161}, - {96, 227, 102, 229, 109, 239, 106, 233, 121, 251, 126, 253, 116, 247, 114, 241}, - {80, 211, 86, 213, 93, 223, 90, 217, 73, 203, 78, 205, 68, 199, 66, 193}, - {192, 67, 198, 69, 205, 79, 202, 73, 217, 91, 222, 93, 212, 87, 210, 81}, - {240, 115, 246, 117, 253, 127, 250, 121, 233, 107, 238, 109, 228, 103, 226, 97}, - {160, 35, 166, 37, 173, 47, 170, 41, 185, 59, 190, 61, 180, 55, 178, 49}, - {144, 19, 150, 21, 157, 31, 154, 25, 137, 11, 142, 13, 132, 7, 130, 1}}, - {{0x8300, 0x8506, 0x8f0d, 0x890a, 0x9b19, 0x9d1e, 0x9714, 0x9112}, - {0xb330, 0xb536, 0xbf3d, 0xb93a, 0xab29, 0xad2e, 0xa724, 0xa122}, - {0xe360, 0xe566, 0xef6d, 0xe96a, 0xfb79, 0xfd7e, 0xf774, 0xf172}, - {0xd350, 0xd556, 0xdf5d, 0xd95a, 0xcb49, 0xcd4e, 0xc744, 0xc142}, - {0x43c0, 0x45c6, 0x4fcd, 0x49ca, 0x5bd9, 0x5dde, 0x57d4, 0x51d2}, - {0x73f0, 0x75f6, 0x7ffd, 0x79fa, 0x6be9, 0x6dee, 0x67e4, 0x61e2}, - {0x23a0, 0x25a6, 0x2fad, 0x29aa, 0x3bb9, 0x3dbe, 0x37b4, 0x31b2}, - {0x1390, 0x1596, 0x1f9d, 0x199a, 0x0b89, 0x0d8e, 0x0784, 0x0182}}, - {{0x8506'8300, 0x890a'8f0d, 0x9d1e'9b19, 0x9112'9714}, - {0xb536'b330, 0xb93a'bf3d, 0xad2e'ab29, 0xa122'a724}, - {0xe566'e360, 0xe96a'ef6d, 0xfd7e'fb79, 0xf172'f774}, - {0xd556'd350, 0xd95a'df5d, 0xcd4e'cb49, 0xc142'c744}, - {0x45c6'43c0, 0x49ca'4fcd, 0x5dde'5bd9, 0x51d2'57d4}, - {0x75f6'73f0, 0x79fa'7ffd, 0x6dee'6be9, 0x61e2'67e4}, - {0x25a6'23a0, 0x29aa'2fad, 0x3dbe'3bb9, 0x31b2'37b4}, - {0x1596'1390, 0x199a'1f9d, 0x0d8e'0b89, 0x0182'0784}}, - {{0x890a'8f0d'8506'8300, 0x9112'9714'9d1e'9b19}, - {0xb93a'bf3d'b536'b330, 0xa122'a724'ad2e'ab29}, - {0xe96a'ef6d'e566'e360, 0xf172'f774'fd7e'fb79}, - {0xd95a'df5d'd556'd350, 0xc142'c744'cd4e'cb49}, - {0x49ca'4fcd'45c6'43c0, 0x51d2'57d4'5dde'5bd9}, - {0x79fa'7ffd'75f6'73f0, 0x61e2'67e4'6dee'6be9}, - {0x29aa'2fad'25a6'23a0, 0x31b2'37b4'3dbe'3bb9}, - {0x199a'1f9d'1596'1390, 0x0182'0784'0d8e'0b89}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x2d00c457, // Vxor.vx v8, v16, x1, v0.t - {{170, 43, 168, 41, 174, 47, 172, 45, 162, 35, 160, 33, 166, 39, 164, 37}, - {186, 59, 184, 57, 190, 63, 188, 61, 178, 51, 176, 49, 182, 55, 180, 53}, - {138, 11, 136, 9, 142, 15, 140, 13, 130, 3, 128, 1, 134, 7, 132, 5}, - {154, 27, 152, 25, 158, 31, 156, 29, 146, 19, 144, 17, 150, 23, 148, 21}, - {234, 107, 232, 105, 238, 111, 236, 109, 226, 99, 224, 97, 230, 103, 228, 101}, - {250, 123, 248, 121, 254, 127, 252, 125, 242, 115, 240, 113, 246, 119, 244, 117}, - {202, 75, 200, 73, 206, 79, 204, 77, 194, 67, 192, 65, 198, 71, 196, 69}, - {218, 91, 216, 89, 222, 95, 220, 93, 210, 83, 208, 81, 214, 87, 212, 85}}, - {{0x2baa, 0x29a8, 0x2fae, 0x2dac, 0x23a2, 0x21a0, 0x27a6, 0x25a4}, - {0x3bba, 0x39b8, 0x3fbe, 0x3dbc, 0x33b2, 0x31b0, 0x37b6, 0x35b4}, - {0x0b8a, 0x0988, 0x0f8e, 0x0d8c, 0x0382, 0x0180, 0x0786, 0x0584}, - {0x1b9a, 0x1998, 0x1f9e, 0x1d9c, 0x1392, 0x1190, 0x1796, 0x1594}, - {0x6bea, 0x69e8, 0x6fee, 0x6dec, 0x63e2, 0x61e0, 0x67e6, 0x65e4}, - {0x7bfa, 0x79f8, 0x7ffe, 0x7dfc, 0x73f2, 0x71f0, 0x77f6, 0x75f4}, - {0x4bca, 0x49c8, 0x4fce, 0x4dcc, 0x43c2, 0x41c0, 0x47c6, 0x45c4}, - {0x5bda, 0x59d8, 0x5fde, 0x5ddc, 0x53d2, 0x51d0, 0x57d6, 0x55d4}}, - {{0x29a8'2baa, 0x2dac'2fae, 0x21a0'23a2, 0x25a4'27a6}, - {0x39b8'3bba, 0x3dbc'3fbe, 0x31b0'33b2, 0x35b4'37b6}, - {0x0988'0b8a, 0x0d8c'0f8e, 0x0180'0382, 0x0584'0786}, - {0x1998'1b9a, 0x1d9c'1f9e, 0x1190'1392, 0x1594'1796}, - {0x69e8'6bea, 0x6dec'6fee, 0x61e0'63e2, 0x65e4'67e6}, - {0x79f8'7bfa, 0x7dfc'7ffe, 0x71f0'73f2, 0x75f4'77f6}, - {0x49c8'4bca, 0x4dcc'4fce, 0x41c0'43c2, 0x45c4'47c6}, - {0x59d8'5bda, 0x5ddc'5fde, 0x51d0'53d2, 0x55d4'57d6}}, - {{0x2dac'2fae'29a8'2baa, 0x25a4'27a6'21a0'23a2}, - {0x3dbc'3fbe'39b8'3bba, 0x35b4'37b6'31b0'33b2}, - {0x0d8c'0f8e'0988'0b8a, 0x0584'0786'0180'0382}, - {0x1d9c'1f9e'1998'1b9a, 0x1594'1796'1190'1392}, - {0x6dec'6fee'69e8'6bea, 0x65e4'67e6'61e0'63e2}, - {0x7dfc'7ffe'79f8'7bfa, 0x75f4'77f6'71f0'73f2}, - {0x4dcc'4fce'49c8'4bca, 0x45c4'47c6'41c0'43c2}, - {0x5ddc'5fde'59d8'5bda, 0x55d4'57d6'51d0'53d2}}, - kVectorCalculationsSourceLegacy); - TestVectorInstruction( - 0x2d0ab457, // Vxor.vi v8, v16, -0xb, v0.t - {{245, 116, 247, 118, 241, 112, 243, 114, 253, 124, 255, 126, 249, 120, 251, 122}, - {229, 100, 231, 102, 225, 96, 227, 98, 237, 108, 239, 110, 233, 104, 235, 106}, - {213, 84, 215, 86, 209, 80, 211, 82, 221, 92, 223, 94, 217, 88, 219, 90}, - {197, 68, 199, 70, 193, 64, 195, 66, 205, 76, 207, 78, 201, 72, 203, 74}, - {181, 52, 183, 54, 177, 48, 179, 50, 189, 60, 191, 62, 185, 56, 187, 58}, - {165, 36, 167, 38, 161, 32, 163, 34, 173, 44, 175, 46, 169, 40, 171, 42}, - {149, 20, 151, 22, 145, 16, 147, 18, 157, 28, 159, 30, 153, 24, 155, 26}, - {133, 4, 135, 6, 129, 0, 131, 2, 141, 12, 143, 14, 137, 8, 139, 10}}, - {{0x7ef5, 0x7cf7, 0x7af1, 0x78f3, 0x76fd, 0x74ff, 0x72f9, 0x70fb}, - {0x6ee5, 0x6ce7, 0x6ae1, 0x68e3, 0x66ed, 0x64ef, 0x62e9, 0x60eb}, - {0x5ed5, 0x5cd7, 0x5ad1, 0x58d3, 0x56dd, 0x54df, 0x52d9, 0x50db}, - {0x4ec5, 0x4cc7, 0x4ac1, 0x48c3, 0x46cd, 0x44cf, 0x42c9, 0x40cb}, - {0x3eb5, 0x3cb7, 0x3ab1, 0x38b3, 0x36bd, 0x34bf, 0x32b9, 0x30bb}, - {0x2ea5, 0x2ca7, 0x2aa1, 0x28a3, 0x26ad, 0x24af, 0x22a9, 0x20ab}, - {0x1e95, 0x1c97, 0x1a91, 0x1893, 0x169d, 0x149f, 0x1299, 0x109b}, - {0x0e85, 0x0c87, 0x0a81, 0x0883, 0x068d, 0x048f, 0x0289, 0x008b}}, - {{0x7cfd'7ef5, 0x78f9'7af1, 0x74f5'76fd, 0x70f1'72f9}, - {0x6ced'6ee5, 0x68e9'6ae1, 0x64e5'66ed, 0x60e1'62e9}, - {0x5cdd'5ed5, 0x58d9'5ad1, 0x54d5'56dd, 0x50d1'52d9}, - {0x4ccd'4ec5, 0x48c9'4ac1, 0x44c5'46cd, 0x40c1'42c9}, - {0x3cbd'3eb5, 0x38b9'3ab1, 0x34b5'36bd, 0x30b1'32b9}, - {0x2cad'2ea5, 0x28a9'2aa1, 0x24a5'26ad, 0x20a1'22a9}, - {0x1c9d'1e95, 0x1899'1a91, 0x1495'169d, 0x1091'1299}, - {0x0c8d'0e85, 0x0889'0a81, 0x0485'068d, 0x0081'0289}}, - {{0x78f9'7afb'7cfd'7ef5, 0x70f1'72f3'74f5'76fd}, - {0x68e9'6aeb'6ced'6ee5, 0x60e1'62e3'64e5'66ed}, - {0x58d9'5adb'5cdd'5ed5, 0x50d1'52d3'54d5'56dd}, - {0x48c9'4acb'4ccd'4ec5, 0x40c1'42c3'44c5'46cd}, - {0x38b9'3abb'3cbd'3eb5, 0x30b1'32b3'34b5'36bd}, - {0x28a9'2aab'2cad'2ea5, 0x20a1'22a3'24a5'26ad}, - {0x1899'1a9b'1c9d'1e95, 0x1091'1293'1495'169d}, - {0x0889'0a8b'0c8d'0e85, 0x0081'0283'0485'068d}}, - kVectorCalculationsSourceLegacy); -} - TEST_F(Riscv64InterpreterTest, TestVmfeq) { TestVectorMaskTargetInstruction(0x610c1457, // Vmfeq.vv v8, v16, v24, v0.t 0x0000'0007, @@ -9680,6 +8097,524 @@ TEST_F(Riscv64InterpreterTest, TestVmacc) { kVectorCalculationsSource); } +TEST_F(Riscv64InterpreterTest, TestVfmacc) { + TestVectorFloatInstruction(0xb1881457, // vfmacc.vv v8, v16, v24, v0.t + {{0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x6a1f'cefd, 0x7629'21c4, 0x6232'9db4, 0x6e3c'70f9}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5551, 0xd66b'bbc8, 0x5555'5555, 0x5555'5037}, + {0xfaad'fde4, 0xff80'0000, 0xf2c2'c69a, 0xfecd'99e3}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}}, + {{0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x75b4'9040'f9f1'ea75, 0x6dcb'c6d1'12f0'a99b}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0xd614'2330'4af7'4c90, 0x5555'5555'5555'5555}, + {0xfff0'0000'0000'0000, 0xfe5b'5815'60f1'ac51}, + {0xfff0'0000'0000'0000, 0xfff0'0000'0000'0000}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(0xb100d457, // vfmacc.vf v8, f1, v16, v0.t + {{0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'550e, 0x5555'0ca1}, + {0x550b'37bf, 0xd895'6354, 0xdc99'df27, 0xe09c'b3a3}, + {0xe49f'8677, 0xe8a2'594a, 0xeca5'2c1d, 0xf0a7'fef0}, + {0xf4aa'd1c3, 0xf8ad'a496, 0xfcb0'7768, 0xff80'0000}}, + {{0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0xd780'0dff'a493'9082, 0xdf85'b3a5'4a3b'e0d2}, + {0xe790'194a'efe1'8677, 0xef95'bef0'9587'2c1d}, + {0xf7a0'2496'3b2c'd1c3, 0xffa5'ca3b'e0d2'7768}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfnmacc) { + TestVectorFloatInstruction(0xb5881457, // vfnmacc.vv v8, v16, v24, v0.t + {{0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xea1f'cefd, 0xf629'21c4, 0xe232'9db4, 0xee3c'70f9}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5551, 0x566b'bbc8, 0xd555'5555, 0xd555'5037}, + {0x7aad'fde4, 0x7f80'0000, 0x72c2'c69a, 0x7ecd'99e3}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + {{0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xf5b4'9040'f9f1'ea75, 0xedcb'c6d1'12f0'a99b}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0x5614'2330'4af7'4c90, 0xd555'5555'5555'5555}, + {0x7ff0'0000'0000'0000, 0x7e5b'5815'60f1'ac51}, + {0x7ff0'0000'0000'0000, 0x7ff0'0000'0000'0000}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(0xb500d457, // vfnmacc.vf v8, f1, v16, v0.t + {{0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'550e, 0xd555'0ca1}, + {0xd50b'37bf, 0x5895'6354, 0x5c99'df27, 0x609c'b3a3}, + {0x649f'8677, 0x68a2'594a, 0x6ca5'2c1d, 0x70a7'fef0}, + {0x74aa'd1c3, 0x78ad'a496, 0x7cb0'7768, 0x7f80'0000}}, + {{0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0x5780'0dff'a493'9082, 0x5f85'b3a5'4a3b'e0d2}, + {0x6790'194a'efe1'8677, 0x6f95'bef0'9587'2c1d}, + {0x77a0'2496'3b2c'd1c3, 0x7fa5'ca3b'e0d2'7768}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfwmacc) { + __m128i dst_result = {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}; + TestWideningVectorFloatInstruction(0xf1881457, // vfwmacc.vv v8, v16, v24, v0.t + {{0x3330'e53c'6480'0000, 0x34b2'786b'bbc5'4900}, + {0x3234'1766'da4a'6200, 0x33b5'cab6'2d6c'4800}, + {0x3937'92ba'5bd0'8000, 0x3ab9'666a'779a'0d00}, + {0x383b'4565'd61f'6600, 0x39bd'3935'e5bd'8800}, + {0x3f3f'423b'5522'0000, 0x40c0'ab36'1ab7'e880}, + {0x3e41'bab3'e9fa'b500, 0x3fc2'd4dc'5007'e400}, + {0x4543'f9df'a83a'4000, 0x46c5'2438'7aa3'4a80}, + {0x4446'53b6'69e6'3700, 0x45c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf100d457, // vfwmacc.vf v8, f1, v16, v0.t + {{0xb886'f0ad'0000'0000, 0xb907'a561'b400'0000}, + {0xb988'5a16'6800'0000, 0xba09'0ecb'1c00'0000}, + {0xba89'c37f'd000'0000, 0xbb0a'7834'8400'0000}, + {0xbb8b'2ce9'3800'0000, 0xbc0b'e19d'ec00'0000}, + {0xbc8c'9652'a000'0000, 0xbd0d'4b07'5400'0000}, + {0xbd8d'ffbc'0800'0000, 0xbe0e'b470'bc00'0000}, + {0xbe8f'6925'7000'0000, 0xbf10'0eed'1200'0000}, + {0xbf90'6947'6c00'0000, 0xc010'c3a1'c600'0000}}, + kVectorCalculationsSource, + dst_result); + + dst_result = {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}; + TestWideningVectorFloatInstruction(0xf1881457, // vfwmacc.vv v8, v16, v24, v0.t + {{0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'66e3'6f53'baee, 0x40c0'aec2'e784'b54d}, + {0x401c'6666'66f4'3c05, 0x401c'fd0d'48e6'a586}, + {0x4543'f9df'a83a'4000, 0x46c5'2438'7aa3'4a80}, + {0x4446'53b6'69e6'3700, 0x45c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf100d457, // vfwmacc.vf v8, f1, v16, v0.t + {{0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6657}, + {0x401c'6666'6666'5766, 0x401c'6666'6657'0c2e}, + {0x401c'6666'56b1'd3ae, 0x401c'6656'5779'5466}, + {0x401c'55fd'1efa'6666, 0x4007'4589'40cc'cccc}}, + kVectorCalculationsSource, + dst_result); +} + +TEST_F(Riscv64InterpreterTest, TestVfwnmacc) { + __m128i dst_result = {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}; + TestWideningVectorFloatInstruction(0xf5881457, // vfwnmacc.vv v8, v16, v24, v0.t + {{0xb330'e53c'6480'0000, 0xb4b2'786b'bbc5'4900}, + {0xb234'1766'da4a'6200, 0xb3b5'cab6'2d6c'4800}, + {0xb937'92ba'5bd0'8000, 0xbab9'666a'779a'0d00}, + {0xb83b'4565'd61f'6600, 0xb9bd'3935'e5bd'8800}, + {0xbf3f'423b'5522'0000, 0xc0c0'ab36'1ab7'e880}, + {0xbe41'bab3'e9fa'b500, 0xbfc2'd4dc'5007'e400}, + {0xc543'f9df'a83a'4000, 0xc6c5'2438'7aa3'4a80}, + {0xc446'53b6'69e6'3700, 0xc5c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf500d457, // vfwnmacc.vf v8, f1, v16, v0.t + {{0x3886'f0ad'0000'0000, 0x3907'a561'b400'0000}, + {0x3988'5a16'6800'0000, 0x3a09'0ecb'1c00'0000}, + {0x3a89'c37f'd000'0000, 0x3b0a'7834'8400'0000}, + {0x3b8b'2ce9'3800'0000, 0x3c0b'e19d'ec00'0000}, + {0x3c8c'9652'a000'0000, 0x3d0d'4b07'5400'0000}, + {0x3d8d'ffbc'0800'0000, 0x3e0e'b470'bc00'0000}, + {0x3e8f'6925'7000'0000, 0x3f10'0eed'1200'0000}, + {0x3f90'6947'6c00'0000, 0x4010'c3a1'c600'0000}}, + kVectorCalculationsSource, + dst_result); + + dst_result = {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}; + TestWideningVectorFloatInstruction(0xf5881457, // vfwnmacc.vv v8, v16, v24, v0.t + {{0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'66e3'6f53'baee, 0xc0c0'aec2'e784'b54d}, + {0xc01c'6666'66f4'3c05, 0xc01c'fd0d'48e6'a586}, + {0xc543'f9df'a83a'4000, 0xc6c5'2438'7aa3'4a80}, + {0xc446'53b6'69e6'3700, 0xc5c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf500d457, // vfwnmacc.vf v8, f1, v16, v0.t + {{0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6657}, + {0xc01c'6666'6666'5766, 0xc01c'6666'6657'0c2e}, + {0xc01c'6666'56b1'd3ae, 0xc01c'6656'5779'5466}, + {0xc01c'55fd'1efa'6666, 0xc007'4589'40cc'cccc}}, + kVectorCalculationsSource, + dst_result); +} + +TEST_F(Riscv64InterpreterTest, TestVfwmsac) { + __m128i dst_result = {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}; + TestWideningVectorFloatInstruction(0xf9881457, // vfwmsac.vv v8, v16, v24, v0.t + {{0x3330'e53c'6480'0000, 0x34b2'786b'bbc5'4900}, + {0x3234'1766'da4a'6200, 0x33b5'cab6'2d6c'4800}, + {0x3937'92ba'5bd0'8000, 0x3ab9'666a'779a'0d00}, + {0x383b'4565'd61f'6600, 0x39bd'3935'e5bd'8800}, + {0x3f3f'423b'5522'0000, 0x40c0'ab36'1ab7'e880}, + {0x3e41'bab3'e9fa'b500, 0x3fc2'd4dc'5007'e400}, + {0x4543'f9df'a83a'4000, 0x46c5'2438'7aa3'4a80}, + {0x4446'53b6'69e6'3700, 0x45c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf900d457, // vfwmsac.vf v8, f1, v16, v0.t + {{0xb886'f0ad'0000'0000, 0xb907'a561'b400'0000}, + {0xb988'5a16'6800'0000, 0xba09'0ecb'1c00'0000}, + {0xba89'c37f'd000'0000, 0xbb0a'7834'8400'0000}, + {0xbb8b'2ce9'3800'0000, 0xbc0b'e19d'ec00'0000}, + {0xbc8c'9652'a000'0000, 0xbd0d'4b07'5400'0000}, + {0xbd8d'ffbc'0800'0000, 0xbe0e'b470'bc00'0000}, + {0xbe8f'6925'7000'0000, 0xbf10'0eed'1200'0000}, + {0xbf90'6947'6c00'0000, 0xc010'c3a1'c600'0000}}, + kVectorCalculationsSource, + dst_result); + + dst_result = {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}; + TestWideningVectorFloatInstruction(0xf9881457, // vfwmsac.vv v8, v16, v24, v0.t + {{0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'65e9'5d79'11de, 0x40c0'a7a9'4deb'1bb3}, + {0xc01c'6666'65d8'90c7, 0xc01b'cfbf'83e6'2746}, + {0x4543'f9df'a83a'4000, 0x46c5'2438'7aa3'4a80}, + {0x4446'53b6'69e6'3700, 0x45c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xf900d457, // vfwmsac.vf v8, f1, v16, v0.t + {{0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6666}, + {0xc01c'6666'6666'6666, 0xc01c'6666'6666'6675}, + {0xc01c'6666'6666'7566, 0xc01c'6666'6675'c09e}, + {0xc01c'6666'761a'f91e, 0xc01c'6676'7553'7866}, + {0xc01c'76cf'add2'6666, 0xc026'9504'1633'3333}}, + kVectorCalculationsSource, + dst_result); +} + +TEST_F(Riscv64InterpreterTest, TestVfwnmsac) { + __m128i dst_result = {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}; + TestWideningVectorFloatInstruction(0xfd881457, // vfwnmsac.vv v8, v16, v24, v0.t + {{0xb330'e53c'6480'0000, 0xb4b2'786b'bbc5'4900}, + {0xb234'1766'da4a'6200, 0xb3b5'cab6'2d6c'4800}, + {0xb937'92ba'5bd0'8000, 0xbab9'666a'779a'0d00}, + {0xb83b'4565'd61f'6600, 0xb9bd'3935'e5bd'8800}, + {0xbf3f'423b'5522'0000, 0xc0c0'ab36'1ab7'e880}, + {0xbe41'bab3'e9fa'b500, 0xbfc2'd4dc'5007'e400}, + {0xc543'f9df'a83a'4000, 0xc6c5'2438'7aa3'4a80}, + {0xc446'53b6'69e6'3700, 0xc5c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xfd00d457, // vfwnmsac.vf v8, f1, v16, v0.t + {{0x3886'f0ad'0000'0000, 0x3907'a561'b400'0000}, + {0x3988'5a16'6800'0000, 0x3a09'0ecb'1c00'0000}, + {0x3a89'c37f'd000'0000, 0x3b0a'7834'8400'0000}, + {0x3b8b'2ce9'3800'0000, 0x3c0b'e19d'ec00'0000}, + {0x3c8c'9652'a000'0000, 0x3d0d'4b07'5400'0000}, + {0x3d8d'ffbc'0800'0000, 0x3e0e'b470'bc00'0000}, + {0x3e8f'6925'7000'0000, 0x3f10'0eed'1200'0000}, + {0x3f90'6947'6c00'0000, 0x4010'c3a1'c600'0000}}, + kVectorCalculationsSource, + dst_result); + + dst_result = {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}; + TestWideningVectorFloatInstruction(0xfd881457, // vfwnmsac.vv v8, v16, v24, v0.t + {{0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'65e9'5d79'11de, 0xc0c0'a7a9'4deb'1bb3}, + {0x401c'6666'65d8'90c7, 0x401b'cfbf'83e6'2746}, + {0xc543'f9df'a83a'4000, 0xc6c5'2438'7aa3'4a80}, + {0xc446'53b6'69e6'3700, 0xc5c7'8e1f'2e31'8400}}, + kVectorCalculationsSource, + dst_result); + TestWideningVectorFloatInstruction(0xfd00d457, // vfwnmsac.vf v8, f1, v16, v0.t + {{0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6666}, + {0x401c'6666'6666'6666, 0x401c'6666'6666'6675}, + {0x401c'6666'6666'7566, 0x401c'6666'6675'c09e}, + {0x401c'6666'761a'f91e, 0x401c'6676'7553'7866}, + {0x401c'76cf'add2'6666, 0x4026'9504'1633'3333}}, + kVectorCalculationsSource, + dst_result); +} + +TEST_F(Riscv64InterpreterTest, TestVfmsac) { + TestVectorFloatInstruction(0xb9881457, // vfmsac.vv v8, v16, v24, v0.t + {{0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0x6a1f'cefd, 0x7629'21c4, 0x6232'9db3, 0x6e3c'70f9}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5559, 0xd6ab'3339, 0xd555'5555, 0xd555'5a73}, + {0xfaad'fde4, 0xff80'0000, 0xf2c2'c69a, 0xfecd'99e3}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}}, + {{0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0x75b4'9040'f9f1'ea75, 0x6dcb'c6d1'12f0'a99b}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd614'25da'f5a1'f73b, 0xd555'5555'5555'5555}, + {0xfff0'0000'0000'0000, 0xfe5b'5815'60f1'ac51}, + {0xfff0'0000'0000'0000, 0xfff0'0000'0000'0000}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(0xb900d457, // vfmsac.vf v8, f1, v16, v0.t + {{0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'5555, 0xd555'5555}, + {0xd555'5555, 0xd555'5555, 0xd555'559c, 0xd555'9e09}, + {0xd58f'b976, 0xd898'b8aa, 0xdc99'e27d, 0xe09c'b3a6}, + {0xe49f'8678, 0xe8a2'594a, 0xeca5'2c1d, 0xf0a7'fef0}, + {0xf4aa'd1c3, 0xf8ad'a496, 0xfcb0'7768, 0xff80'0000}}, + {{0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd555'5555'5555'5555, 0xd555'5555'5555'5555}, + {0xd780'0dff'a498'e5d7, 0xdf85'b3a5'4a3b'e0d2}, + {0xe790'194a'efe1'8678, 0xef95'bef0'9587'2c1d}, + {0xf7a0'2496'3b2c'd1c3, 0xffa5'ca3b'e0d2'7768}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfnmsac) { + TestVectorFloatInstruction(0xbd881457, // vfnmsac.vv v8, v16, v24, v0.t + {{0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0xea1f'cefd, 0xf629'21c4, 0xe232'9db3, 0xee3c'70f9}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5559, 0x56ab'3339, 0x5555'5555, 0x5555'5a73}, + {0x7aad'fde4, 0x7f80'0000, 0x72c2'c69a, 0x7ecd'99e3}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + {{0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0xf5b4'9040'f9f1'ea75, 0xedcb'c6d1'12f0'a99b}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5614'25da'f5a1'f73b, 0x5555'5555'5555'5555}, + {0x7ff0'0000'0000'0000, 0x7e5b'5815'60f1'ac51}, + {0x7ff0'0000'0000'0000, 0x7ff0'0000'0000'0000}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(0xbd00d457, // vfnmsac.vf v8, f1, v16, v0.t + {{0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'5555, 0x5555'5555}, + {0x5555'5555, 0x5555'5555, 0x5555'559c, 0x5555'9e09}, + {0x558f'b976, 0x5898'b8aa, 0x5c99'e27d, 0x609c'b3a6}, + {0x649f'8678, 0x68a2'594a, 0x6ca5'2c1d, 0x70a7'fef0}, + {0x74aa'd1c3, 0x78ad'a496, 0x7cb0'7768, 0x7f80'0000}}, + {{0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}, + {0x5780'0dff'a498'e5d7, 0x5f85'b3a5'4a3b'e0d2}, + {0x6790'194a'efe1'8678, 0x6f95'bef0'9587'2c1d}, + {0x77a0'2496'3b2c'd1c3, 0x7fa5'ca3b'e0d2'7768}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfmadd) { + TestVectorFloatInstruction(0xa1881457, // vfmadd.vv v8, v16, v24, v0.t + {{0x98dd'a63a, 0x9e28'a06a, 0xa0e6'e462, 0xa4ed'95be}, + {0xb624'b220, 0xbe2c'ba29, 0xb100'd4ec, 0xb504'308a}, + {0xd644'd240, 0xde4c'da49, 0xc654'e5df, 0xce5c'ca7c}, + {0xf664'f260, 0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78}, + {0xd922'4bb5, 0xdd25'a463, 0xe128'fd11, 0xe52c'55bf}, + {0xe92f'ae6d, 0xed33'071b, 0xf136'5fc9, 0xf539'b877}, + {0xf93d'1125, 0xfd40'69d3, 0xff80'0000, 0xff80'0000}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}}, + {{0x9e0c'9a09'9d86'3e2c, 0xa474'5e08'5cb1'b0b0}, + {0xbe2c'ba29'b624'b220, 0xb484'68bd'bcbc'6610}, + {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}, + {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}, + {0xdcae'5c5b'af03'ac55, 0xe4b4'88dd'dcdc'8630}, + {0xecbe'71c6'6f19'1715, 0xf4c4'9393'3ce7'3b90}, + {0xfcce'8731'2f2e'81d5, 0xfff0'0000'0000'0000}, + {0xfff0'0000'0000'0000, 0xfff0'0000'0000'0000}}, + + kVectorCalculationsSource); + + TestVectorFloatInstruction(0xa100d457, // vfmadd.vf v8, f1, v16, v0.t + {{0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5695'fffe, 0x5695'fe62}, + {0x5694'5a5d, 0xd70b'd554, 0xdb5a'8e58, 0xdf5e'dd11}, + {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, + {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0xd756'd554'd2da'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfnmadd) { + TestVectorFloatInstruction(0xa5881457, // vfnmadd.vv v8, v16, v24, v0.t + {{0x18dd'a63a, 0x1e28'a06a, 0x20e6'e462, 0x24ed'95be}, + {0x3624'b220, 0x3e2c'ba29, 0x3100'd4ec, 0x3504'308a}, + {0x5644'd240, 0x5e4c'da49, 0x4654'e5df, 0x4e5c'ca7c}, + {0x7664'f260, 0x7e6c'fa69, 0x6674'e271, 0x6e7c'ea78}, + {0x5922'4bb5, 0x5d25'a463, 0x6128'fd11, 0x652c'55bf}, + {0x692f'ae6d, 0x6d33'071b, 0x7136'5fc9, 0x7539'b877}, + {0x793d'1125, 0x7d40'69d3, 0x7f80'0000, 0x7f80'0000}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + {{0x1e0c'9a09'9d86'3e2c, 0x2474'5e08'5cb1'b0b0}, + {0x3e2c'ba29'b624'b220, 0x3484'68bd'bcbc'6610}, + {0x5e4c'da49'd644'd240, 0x4e5c'ca58'c654'c251}, + {0x7e6c'fa69'f664'f260, 0x6e7c'ea78'e674'e271}, + {0x5cae'5c5b'af03'ac55, 0x64b4'88dd'dcdc'8630}, + {0x6cbe'71c6'6f19'1715, 0x74c4'9393'3ce7'3b90}, + {0x7cce'8731'2f2e'81d5, 0x7ff0'0000'0000'0000}, + {0x7ff0'0000'0000'0000, 0x7ff0'0000'0000'0000}}, + kVectorCalculationsSource); + + TestVectorFloatInstruction(0xa500d457, // vfmadd.vf v8, f1, v16, v0.t + {{0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd695'fffe, 0xd695'fe62}, + {0xd694'5a5d, 0x570b'd554, 0x5b5a'8e58, 0x5f5e'dd11}, + {0x6362'e160, 0x6766'e564, 0x6b6a'e968, 0x6f6e'ed6c}, + {0x7372'f170, 0x7776'f574, 0x7b7a'f978, 0x7f7e'fd7c}}, + {{0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0x5756'd554'd2da'd150, 0x5f5e'dd5c'db5a'd958}, + {0x6766'e564'e362'e160, 0x6f6e'ed6c'eb6a'e968}, + {0x7776'f574'f372'f170, 0x7f7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfmsub) { + TestVectorFloatInstruction(0xa9881457, // vfmsub.vv v8, v16, v24, v0.t + {{0x98d5'5d1a, 0x1de1'2750, 0xa0e6'e462, 0xa4ed'95be}, + {0x3624'b220, 0x3e2c'ba29, 0xb100'd4e6, 0xb504'2aa4}, + {0x5644'd240, 0x5e4c'da49, 0x4654'9ec3, 0x4e5c'ca34}, + {0x7664'f260, 0x7e6c'fa69, 0x6674'e271, 0x6e7c'ea78}, + {0xd922'4bb5, 0xdd25'a463, 0xe128'fd11, 0xe52c'55bf}, + {0xe92f'ae6d, 0xed33'071b, 0xf136'5fc9, 0xf539'b877}, + {0xf93d'1125, 0xfd40'69d3, 0xff80'0000, 0xff80'0000}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}}, + {{0x1e0c'9a09'8e82'e5d4, 0xa474'5e08'5cb1'b0b0}, + {0x3e2c'ba29'b624'b220, 0xb484'68bd'bcbc'6610}, + {0x5e4c'da49'd644'd240, 0x4e5c'ca58'c654'c251}, + {0x7e6c'fa69'f664'f260, 0x6e7c'ea78'e674'e271}, + {0xdcae'5c5b'af03'ac55, 0xe4b4'88dd'dcdc'8630}, + {0xecbe'71c6'6f19'1715, 0xf4c4'9393'3ce7'3b90}, + {0xfcce'8731'2f2e'81d5, 0xfff0'0000'0000'0000}, + {0xfff0'0000'0000'0000, 0xfff0'0000'0000'0000}}, + kVectorCalculationsSource); + + TestVectorFloatInstruction(0xa900d457, // vfmsub.vf v8, f1, v16, v0.t + {{0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0000, 0x5696'0000}, + {0x5696'0000, 0x5696'0000, 0x5696'0001, 0x5696'019d}, + {0x5697'a5a2, 0x5790'eaaa, 0x5b5b'2458, 0x5f5e'dda7}, + {0x6362'e160, 0x6766'e564, 0x6b6a'e968, 0x6f6e'ed6c}, + {0x7372'f170, 0x7776'f574, 0x7b7a'f978, 0x7f7e'fd7c}}, + {{0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x557e'0000'0000'0000, 0x557e'0000'0000'0000}, + {0x5756'd554'd3ca'd150, 0x5f5e'dd5c'db5a'd958}, + {0x6766'e564'e362'e160, 0x6f6e'ed6c'eb6a'e968}, + {0x7776'f574'f372'f170, 0x7f7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + +TEST_F(Riscv64InterpreterTest, TestVfnmsub) { + TestVectorFloatInstruction(0xad881457, // vfnmsub.vv v8, v16, v24, v0.t + {{0x18d5'5d1a, 0x9de1'2750, 0x20e6'e462, 0x24ed'95be}, + {0xb624'b220, 0xbe2c'ba29, 0x3100'd4e6, 0x3504'2aa4}, + {0xd644'd240, 0xde4c'da49, 0xc654'9ec3, 0xce5c'ca34}, + {0xf664'f260, 0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78}, + {0x5922'4bb5, 0x5d25'a463, 0x6128'fd11, 0x652c'55bf}, + {0x692f'ae6d, 0x6d33'071b, 0x7136'5fc9, 0x7539'b877}, + {0x793d'1125, 0x7d40'69d3, 0x7f80'0000, 0x7f80'0000}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + {{0x9e0c'9a09'8e82'e5d4, 0x2474'5e08'5cb1'b0b0}, + {0xbe2c'ba29'b624'b220, 0x3484'68bd'bcbc'6610}, + {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}, + {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}, + {0x5cae'5c5b'af03'ac55, 0x64b4'88dd'dcdc'8630}, + {0x6cbe'71c6'6f19'1715, 0x74c4'9393'3ce7'3b90}, + {0x7cce'8731'2f2e'81d5, 0x7ff0'0000'0000'0000}, + {0x7ff0'0000'0000'0000, 0x7ff0'0000'0000'0000}}, + kVectorCalculationsSource); + + TestVectorFloatInstruction(0xad00d457, // vfnmsub.vf v8, f1, v16, v0.t + {{0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0000, 0xd696'0000}, + {0xd696'0000, 0xd696'0000, 0xd696'0001, 0xd696'019d}, + {0xd697'a5a2, 0xd790'eaaa, 0xdb5b'2458, 0xdf5e'dda7}, + {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, + {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd57e'0000'0000'0000, 0xd57e'0000'0000'0000}, + {0xd756'd554'd3ca'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + TEST_F(Riscv64InterpreterTest, TestVnmsac) { TestVectorInstruction(0xbd882457, // vnmsac.vv v8, v16, v24, v0.t {{85, 195, 77, 147, 49, 83, 13, 3, 205, 195, 141, 147, 53, 83, 205, 3}, @@ -10042,39 +8977,42 @@ TEST_F(Riscv64InterpreterTest, TestVmin) { {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}, {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}}, kVectorCalculationsSourceLegacy); +} + +TEST_F(Riscv64InterpreterTest, TestVfmin) { TestVectorFloatInstruction(0x1100d457, // vfmin.vf v8, v16, f1, v0.t - {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, - {0x40b4'0000, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, + {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, - {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x40164'016'4016'4016, 0x4016'8000'0000'0000}, + {{0xf005'f005'f005'f005, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb}, {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}}, kVectorComparisonSource); TestVectorFloatInstruction(0x110c1457, // vfmin.vv v8,v16,v24,v0.t {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, - {0x1111'1111, 0x7fc0'0000, 0x1111'1111, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x1111'1111, 0x1111'1111, 0x1111'1111, 0x1111'1111}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x8684'8280, 0x8e8c'8a89, 0x9694'9291, 0x9e9c'9a98}, + {0xa6a4'a2a0, 0xaeac'aaa9, 0xb6b4'b2b1, 0xbebc'bab8}, {0xc6c4'c2c0, 0xcecc'cac9, 0xd6d4'd2d1, 0xdedc'dad8}, {0xe6e4'e2e0, 0xeeec'eae9, 0xf6f4'f2f1, 0xfefc'faf8}}, {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x1111'1111'1111'1111, 0x1111'1111'1111'1111}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x8e8c'8a89'8684'8280, 0x9e9c'9a98'9694'9291}, + {0xaeac'aaa9'a6a4'a2a0, 0xbebc'bab8'b6b4'b2b1}, {0xcecc'cac9'c6c4'c2c0, 0xdedc'dad8'd6d4'd2d1}, {0xeeec'eae9'e6e4'e2e0, 0xfefc'faf8'f6f4'f2f1}}, kVectorComparisonSource); @@ -10224,39 +9162,42 @@ TEST_F(Riscv64InterpreterTest, TestVmax) { {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, kVectorCalculationsSourceLegacy); +} + +TEST_F(Riscv64InterpreterTest, TestVfmax) { TestVectorFloatInstruction(0x1900d457, // vfmax.vf v8, v16, f1, v0.t - {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x7fc0'0000}, - {0x40b4'40b4, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, + {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'40b4, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}}, - {{0x4016'8000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}}, kVectorComparisonSource); TestVectorFloatInstruction(0x190c1457, // vfmax.vv v8,v16,v24,v0.t {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, - {0x40b4'40b4, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x40b4'40b4, 0x1111'1111, 0x40b4'0000, 0x1111'1111}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, {0x6664'6260, 0x6e6c'6a69, 0x7674'7271, 0x7e7c'7a78}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x8684'8280, 0x8e8c'8a89, 0x9694'9291, 0x9e9c'9a98}, + {0xa6a4'a2a0, 0xaeac'aaa9, 0xb6b4'b2b1, 0xbebc'bab8}, {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x1111'1111'1111'1111, 0x1111'1111'1111'1111}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, {0x6e6c'6a69'6664'6260, 0x7e7c'7a78'7674'7271}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x8e8c'8a89'8684'8280, 0x9e9c'9a98'9694'9291}, + {0xaeac'aaa9'a6a4'a2a0, 0xbebc'bab8'b6b4'b2b1}, {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb}, {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}}, kVectorComparisonSource); diff --git a/intrinsics/include/berberis/intrinsics/common/intrinsics_float.h b/intrinsics/include/berberis/intrinsics/common/intrinsics_float.h index c7e27b20..a227b6b4 100644 --- a/intrinsics/include/berberis/intrinsics/common/intrinsics_float.h +++ b/intrinsics/include/berberis/intrinsics/common/intrinsics_float.h @@ -110,34 +110,8 @@ class WrappedFloatType { friend inline WrappedFloatType MulAdd(const WrappedFloatType& v1, const WrappedFloatType& v2, const WrappedFloatType& v3); - - friend inline WrappedFloatType Max(WrappedFloatType op1, WrappedFloatType op2) { - if (FPClassify(op1) == FPInfo::kZero && FPClassify(op2) == FPInfo::kZero && - SignBit(op1) != SignBit(op2)) { - return WrappedFloatType(BaseType(+0.f)); - } - // If either argument is NaN - return default NaN (fmax() may return other). - if (IsNan(op1) || IsNan(op2)) { - return std::numeric_limits<WrappedFloatType>::quiet_NaN(); - } - // Note: fmax is not needed here - it differs from std::max based on operator< only if NANs are - // involved - and does wrong thing there. We have no NANs at this point thus could use std::max - return std::max(op1, op2); - } - - friend inline WrappedFloatType Min(WrappedFloatType op1, WrappedFloatType op2) { - if (FPClassify(op1) == FPInfo::kZero && FPClassify(op2) == FPInfo::kZero && - SignBit(op1) != SignBit(op2)) { - return WrappedFloatType(BaseType(-0.f)); - } - // If either argument is NaN - return default NaN (fmin() may return other). - if (IsNan(op1) || IsNan(op2)) { - return std::numeric_limits<WrappedFloatType>::quiet_NaN(); - } - // Note: fmin is not needed here - it differs from std::min based on operator< only if NANs are - // involved - and does wrong thing there. We have no NANs at this point thus could use std::min - return std::min(op1, op2); - } + friend inline WrappedFloatType Max(WrappedFloatType op1, WrappedFloatType op2); + friend inline WrappedFloatType Min(WrappedFloatType op1, WrappedFloatType op2); private: static_assert(!std::numeric_limits<BaseType>::is_exact, diff --git a/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h b/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h index 67cd4e2b..63336150 100644 --- a/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h +++ b/intrinsics/include/berberis/intrinsics/intrinsics_floating_point_impl.h @@ -275,6 +275,11 @@ std::tuple<FloatType> FNMSub(int8_t rm, } template <typename FloatType> +FloatType CanonicalizeNanTuple(std::tuple<FloatType> arg) { + return std::get<0>(CanonicalizeNan<FloatType>(std::get<0>(arg))); +} + +template <typename FloatType> FloatType RSqrtEstimate(FloatType op) { if (SignBit(op)) { // If argument is negative - return default NaN. diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index 2019aa6b..02c8da5d 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -763,6 +763,14 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType (), \ (src1, src2, src3)) +#define DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(name, ...) \ + DEFINE_ARITHMETIC_INTRINSIC( \ + V##name##vv, return ({ __VA_ARGS__; }); \ + , \ + (int8_t csr, SIMD128Register src1, SIMD128Register src2, SIMD128Register src3), \ + (csr), \ + (src1, src2, src3)) + #define DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(name, ...) \ DEFINE_ARITHMETIC_INTRINSIC(V##name##vx, return ({ __VA_ARGS__; }); \ , (SIMD128Register src1, ElementType src2), (), (src1, src2)) @@ -772,9 +780,21 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType V##name##vx, return ({ __VA_ARGS__; }); \ , (SIMD128Register src1, ElementType src2, SIMD128Register src3), (), (src1, src2, src3)) +#define DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(name, ...) \ + DEFINE_ARITHMETIC_INTRINSIC( \ + V##name##vf, return ({ __VA_ARGS__; }); \ + , \ + (int8_t csr, SIMD128Register src1, ElementType src2, SIMD128Register src3), \ + (csr), \ + (src1, src2, src3)) + #define DEFINE_1OP_ARITHMETIC_INTRINSIC_X(name, ...) \ DEFINE_ARITHMETIC_INTRINSIC(V##name##x, return ({ __VA_ARGS__; });, (ElementType src), (), (src)) +#define DEFINE_1OP_1CSR_ARITHMETIC_INTRINSIC_V(name, ...) \ + DEFINE_ARITHMETIC_INTRINSIC(V##name##v, return ({ __VA_ARGS__; }); \ + , (int8_t csr, SIMD128Register src), (csr), (src)) + #define DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(name, ...) \ DEFINE_ARITHMETIC_INTRINSIC( \ V##name##vf, return ({ __VA_ARGS__; }); \ @@ -865,6 +885,22 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType Vw##name##vx, Widenvvw, return ({ __VA_ARGS__; }); \ , (SIMD128Register src1, ElementType src2, SIMD128Register src3), (), (src1, src2, src3)) +#define DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW(name, ...) \ + DEFINE_W_ARITHMETIC_INTRINSIC( \ + Vfw##name##vv, Widenvvw, return ({ __VA_ARGS__; }); \ + , \ + (int8_t csr, SIMD128Register src1, SIMD128Register src2, SIMD128Register src3), \ + (csr), \ + (src1, src2, src3)) + +#define DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW(name, ...) \ + DEFINE_W_ARITHMETIC_INTRINSIC( \ + Vfw##name##vf, Widenvvw, return ({ __VA_ARGS__; }); \ + , \ + (int8_t csr, SIMD128Register src1, ElementType src2, SIMD128Register src3), \ + (csr), \ + (src1, src2, src3)) + #define DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV(name, ...) \ DEFINE_W_ARITHMETIC_INTRINSIC(Vn##name##wv, Narrowwv, return ({ __VA_ARGS__; }); \ , (SIMD128Register src1, SIMD128Register src2), (), (src1, src2)) @@ -903,6 +939,9 @@ DEFINE_1OP_ARITHMETIC_INTRINSIC_V(copy, auto [arg] = std::tuple{args...}; arg) DEFINE_1OP_ARITHMETIC_INTRINSIC_X(copy, auto [arg] = std::tuple{args...}; arg) DEFINE_1OP_ARITHMETIC_INTRINSIC_V(frsqrt7, RSqrtEstimate(args...)) +DEFINE_1OP_1CSR_ARITHMETIC_INTRINSIC_V(fsqrt, + CanonicalizeNanTuple(FSqrt(FPFlags::DYN, csr, args...))) + DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(add, (args + ...)) DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(add, (args + ...)) DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redsum, (args + ...)) @@ -924,37 +963,67 @@ DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV( DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VX( aadd, ElementType{std::get<0>(Aadd(csr, static_cast<typename ElementType::BaseType>(args)...))}) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fadd, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fadd, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) - -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fadd, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fadd, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV( + add, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF( + add, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV( + sub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF( + sub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV( + mul, + CanonicalizeNanTuple(FMul(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF( + mul, + CanonicalizeNanTuple(FMul(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV( + add, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF( + add, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV( + sub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF( + sub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) + +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fsub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fsub, + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(frsub, auto [arg1, arg2] = std::tuple{args...}; - std::get<0>(FSub(FPFlags::DYN, csr, arg2, arg1))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VS(osum, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VS(usum, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) + CanonicalizeNanTuple(FSub(FPFlags::DYN, csr, arg2, arg1))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VS(osum, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VS(usum, + CanonicalizeNanTuple(FAdd(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV( asub, ElementType{std::get<0>(Asub(csr, static_cast<typename ElementType::BaseType>(args)...))}) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VX( asub, ElementType{std::get<0>(Asub(csr, static_cast<typename ElementType::BaseType>(args)...))}) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fdiv, std::get<0>(FDiv(FPFlags::DYN, csr, args...))) -DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fdiv, std::get<0>(FDiv(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmul, + CanonicalizeNanTuple(FMul(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmul, + CanonicalizeNanTuple(FMul(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fdiv, + CanonicalizeNanTuple(FDiv(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fdiv, + CanonicalizeNanTuple(FDiv(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(frdiv, auto [arg1, arg2] = std::tuple{args...}; - std::get<0>(FDiv(FPFlags::DYN, csr, arg2, arg1))) + CanonicalizeNanTuple(FDiv(FPFlags::DYN, csr, arg2, arg1))) // SIMD mask either includes results with all bits set to 0 or all bits set to 1. // This way it may be used with VAnd and VAndN operations to perform masking. // Such comparison is effectively one instruction of x86-64 (via SSE or AVX) but @@ -1034,6 +1103,39 @@ DEFINE_3OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2, arg3] = std::tuple{a (-(arg2 * arg3) + arg1)) DEFINE_3OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2, arg3] = std::tuple{args...}; (-(arg2 * arg3) + arg1)) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmadd, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmadd, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fmsub, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fmsub, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fnmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fnmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fnmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fnmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fnmadd, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fnmadd, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV(fnmsub, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg3, arg2, arg1))) +DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF(fnmsub, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg3, arg2, arg1))) + DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(fmin, std::get<0>(FMin(args...))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fmin, std::get<0>(FMin(args...))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(fredmin, std::get<0>(FMin(args...))) @@ -1103,7 +1205,30 @@ DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VXW(maccsu, auto [arg1, arg2, arg3] = std: DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VXW(maccus, auto [arg1, arg2, arg3] = std::tuple{args...}; (std::get<0>(WideMultiplySignedUnsigned(arg1, arg2))) + arg3) - +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW( + macc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW( + macc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW( + nmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW( + nmacc, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW( + msac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW( + msac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FMSub(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW( + nmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) +DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW( + nmsac, auto [arg1, arg2, arg3] = std::tuple{args...}; + std::get<0>(FNMAdd(FPFlags::DYN, csr, arg2, arg1, arg3))) DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV(sr, auto [arg1, arg2] = std::tuple{args...}; (arg1 >> arg2)) DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WX(sr, auto [arg1, arg2] = std::tuple{args...}; @@ -1135,6 +1260,8 @@ DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX( #undef DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV #undef DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WX #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV +#undef DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VF +#undef DEFINE_3OP_1CSR_ARITHMETIC_INTRINSIC_VV #undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV #undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF #undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV @@ -1144,6 +1271,8 @@ DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX( #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VX #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VXW +#undef DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VVW +#undef DEFINE_3OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VXW } // namespace berberis::intrinsics diff --git a/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h index 98e3313d..05152748 100644 --- a/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h +++ b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h @@ -17,6 +17,7 @@ #ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ #define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_ +#include <cmath> #include <limits> #include "berberis/base/bit_util.h" @@ -96,6 +97,35 @@ inline FloatType ExecuteFloatOperation(uint8_t requested_rm, return operation(args...); } +// From RISC-V ISA manual: Single-Precision Floating-Point Computational Instructions. +// Covers behavior for both single and double precision floating point comparisons. +#define DEFINE_FLOAT_COMPARE_FUNC(FuncName, FloatType, ZeroVal, Intrinsic) \ + inline FloatType FuncName(FloatType op1, FloatType op2) { \ + FPInfo op1_class = FPClassify(op1); \ + FPInfo op2_class = FPClassify(op2); \ + if (op1_class == FPInfo::kZero && op2_class == FPInfo::kZero && \ + SignBit(op1) != SignBit(op2)) { \ + return FloatType(ZeroVal); \ + } \ + /* If both inputs are NaNs, the result is the canonical NaN. */ \ + if (op1_class == FPInfo::kNaN && op2_class == FPInfo::kNaN) { \ + return std::numeric_limits<FloatType>::quiet_NaN(); \ + } \ + /* If only one operand is a NaN, the result is the non-NaN operand. */ \ + if (op1_class == FPInfo::kNaN) { \ + return op2; \ + } \ + if (op2_class == FPInfo::kNaN) { \ + return op1; \ + } \ + return FloatType(Intrinsic(op1.value_, op2.value_)); \ + } +DEFINE_FLOAT_COMPARE_FUNC(Max, Float32, +0.f, std::fmax); +DEFINE_FLOAT_COMPARE_FUNC(Max, Float64, +0.f, std::fmax); +DEFINE_FLOAT_COMPARE_FUNC(Min, Float32, -0.f, std::fmin); +DEFINE_FLOAT_COMPARE_FUNC(Min, Float64, -0.f, std::fmin); +#undef DEFINE_FLOAT_COMPARE_FUNC + // We only need Negative(long double) for FMA, b/120563432 doesn't affect this function. inline long double Negative(const long double& v) { return -v; diff --git a/kernel_api/Android.bp b/kernel_api/Android.bp index b8df391b..b1acc01e 100644 --- a/kernel_api/Android.bp +++ b/kernel_api/Android.bp @@ -64,7 +64,6 @@ cc_library_static { "riscv64/open_emulation.cc", "riscv64/syscall_emulation_arch.cc", "riscv64/syscall_emulation.cc", - "riscv64/syscall_numbers.cc", "riscv64/tracing.cc", ], local_include_dirs: ["riscv64"], diff --git a/kernel_api/include/berberis/kernel_api/open_emulation.h b/kernel_api/include/berberis/kernel_api/open_emulation.h index bffaecfc..aece2146 100644 --- a/kernel_api/include/berberis/kernel_api/open_emulation.h +++ b/kernel_api/include/berberis/kernel_api/open_emulation.h @@ -33,6 +33,9 @@ int ToGuestOpenFlags(int host_flags); int OpenatForGuest(int dirfd, const char* pathname, int flags, mode_t mode); int OpenForGuest(const char* pathname, int flags, mode_t mode); +bool IsFileDescriptorEmulatedProcSelfMaps(int fd); +void CloseEmulatedProcSelfMapsFileDescriptor(int fd); + } // namespace berberis #endif // BERBERIS_KERNEL_API_OPEN_EMULATION_H_ diff --git a/kernel_api/include/berberis/kernel_api/syscall_emulation_common.h b/kernel_api/include/berberis/kernel_api/syscall_emulation_common.h index d7a3f82b..ba2a8ca4 100644 --- a/kernel_api/include/berberis/kernel_api/syscall_emulation_common.h +++ b/kernel_api/include/berberis/kernel_api/syscall_emulation_common.h @@ -17,6 +17,7 @@ #ifndef BERBERIS_KERNEL_API_SYSCALL_EMULATION_COMMON_H_ #define BERBERIS_KERNEL_API_SYSCALL_EMULATION_COMMON_H_ +#include <sys/stat.h> #include <sys/syscall.h> #include <sys/types.h> #include <unistd.h> @@ -25,6 +26,7 @@ #include "berberis/base/bit_util.h" #include "berberis/base/macros.h" +#include "berberis/guest_state/guest_addr.h" #include "berberis/kernel_api/exec_emulation.h" #include "berberis/kernel_api/fcntl_emulation.h" #include "berberis/kernel_api/open_emulation.h" @@ -35,6 +37,8 @@ namespace berberis { +void ConvertHostStatToGuestArch(const struct stat& host_stat, GuestAddr guest_stat); + inline long RunGuestSyscall___NR_clone3(long arg_1, long arg_2) { UNUSED(arg_1, arg_2); KAPI_TRACE("unimplemented syscall __NR_clone3"); @@ -42,6 +46,11 @@ inline long RunGuestSyscall___NR_clone3(long arg_1, long arg_2) { return -1; } +inline long RunGuestSyscall___NR_close(long arg_1) { + CloseEmulatedProcSelfMapsFileDescriptor(arg_1); + return syscall(__NR_close, arg_1); +} + inline long RunGuestSyscall___NR_execve(long arg_1, long arg_2, long arg_3) { return static_cast<long>(ExecveForGuest(bit_cast<const char*>(arg_1), // filename bit_cast<char* const*>(arg_2), // argv @@ -54,6 +63,37 @@ inline long RunGuestSyscall___NR_faccessat(long arg_1, long arg_2, long arg_3) { return syscall(__NR_faccessat, arg_1, arg_2, arg_3); } +inline long RunGuestSyscall___NR_fstat(long arg_1, long arg_2) { + // We are including this structure from library headers (sys/stat.h) and assume + // that it matches kernel's layout. + // TODO(b/232598137): Add a check for this. It seems like this is an issue for 32-bit + // guest syscall, since compiled with bionic this declares `struct stat64` while + // the syscall will expect `struct stat` + struct stat host_stat; + long result; + if (IsFileDescriptorEmulatedProcSelfMaps(arg_1)) { + KAPI_TRACE("Emulating fstat for /proc/self/maps"); + result = syscall(__NR_stat, "/proc/self/maps", &host_stat); + } else { + result = syscall(__NR_fstat, arg_1, &host_stat); + } + if (result != -1) { + ConvertHostStatToGuestArch(host_stat, bit_cast<GuestAddr>(arg_2)); + } + return result; +} + +inline long RunGuestSyscall___NR_fstatfs(long arg_1, long arg_2) { + if (IsFileDescriptorEmulatedProcSelfMaps(arg_1)) { + KAPI_TRACE("Emulating fstatfs for /proc/self/maps"); + // arg_2 (struct statfs*) has kernel expected layout, which is different from + // what libc may expect. E.g. this happens for 32-bit bionic where the library call + // expects struct statfs64. Thus ensure we invoke syscall, not library call. + return syscall(__NR_statfs, "/proc/self/maps", arg_2); + } + return syscall(__NR_fstatfs, arg_1, arg_2); +} + inline long RunGuestSyscall___NR_fcntl(long arg_1, long arg_2, long arg_3) { return GuestFcntl(arg_1, arg_2, arg_3); } diff --git a/kernel_api/open_emulation.cc b/kernel_api/open_emulation.cc index 483a47b3..0728ef14 100644 --- a/kernel_api/open_emulation.cc +++ b/kernel_api/open_emulation.cc @@ -17,15 +17,20 @@ #include "berberis/kernel_api/open_emulation.h" #include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> #include <cstdio> #include <cstring> -#include <string> -#include <vector> - +#include <mutex> +#include <utility> + +#include "berberis/base/arena_alloc.h" +#include "berberis/base/arena_map.h" +#include "berberis/base/arena_string.h" +#include "berberis/base/arena_vector.h" +#include "berberis/base/checks.h" #include "berberis/base/fd.h" -#include "berberis/base/file.h" -#include "berberis/base/strings.h" #include "berberis/base/tracing.h" #include "berberis/guest_os_primitives/guest_map_shadow.h" #include "berberis/guest_state/guest_addr.h" @@ -35,16 +40,91 @@ namespace berberis { namespace { +class EmulatedFileDescriptors { + public: + explicit EmulatedFileDescriptors() : fds_(&arena_) {} + + static EmulatedFileDescriptors* GetInstance() { + static EmulatedFileDescriptors g_emulated_proc_self_maps_fds; + return &g_emulated_proc_self_maps_fds; + } + + // Not copyable or movable. + EmulatedFileDescriptors(const EmulatedFileDescriptors&) = delete; + EmulatedFileDescriptors& operator=(const EmulatedFileDescriptors&) = delete; + + void Add(int fd) { + std::lock_guard lock(mutex_); + auto [unused_it, inserted] = fds_.insert(std::make_pair(fd, 0)); + if (!inserted) { + // We expect every fd to be added at most once. But if it breaks let's consider it non-fatal. + TRACE("Detected duplicated fd in EmulatedFileDescriptors"); + } + } + + bool Contains(int fd) { + std::lock_guard lock(mutex_); + return fds_.find(fd) != fds_.end(); + } + + void Remove(int fd) { + std::lock_guard lock(mutex_); + auto it = fds_.find(fd); + if (it != fds_.end()) { + fds_.erase(it); + } + } + + private: + std::mutex mutex_; + Arena arena_; + // We use it as a set because we don't have ArenaSet, so client data isn't really used. + ArenaMap<int, int> fds_; +}; + // It's macro since we use it as string literal below. #define PROC_SELF_MAPS "/proc/self/maps" +// Reader that works with custom allocator strings. Based on android::base::ReadFileToString. +template <typename String> +bool ReadProcSelfMapsToString(String& content) { + int fd = open(PROC_SELF_MAPS, O_RDONLY); + if (fd == -1) { + return false; + } + char buf[4096] __attribute__((__uninitialized__)); + ssize_t n; + while ((n = read(fd, &buf[0], sizeof(buf))) > 0) { + content.append(buf, n); + } + close(fd); + return n == 0; +} + +// String split that works with custom allocator strings. Based on android::base::Split. +template <typename String> +ArenaVector<String> SplitLines(Arena* arena, const String& content) { + ArenaVector<String> lines(arena); + size_t base = 0; + size_t found; + while (true) { + found = content.find_first_of('\n', base); + lines.emplace_back(content, base, found - base, content.get_allocator()); + if (found == content.npos) break; + base = found + 1; + } + return lines; +} + // Note that dirfd, flags and mode are only used to fallback to // host's openat in case of failure. +// Avoid mallocs since bionic tests use it under malloc_disable (b/338211718). int OpenatProcSelfMapsForGuest(int dirfd, int flags, mode_t mode) { TRACE("Openat for " PROC_SELF_MAPS); - std::string file_data; - bool success = ReadFileToString(PROC_SELF_MAPS, &file_data); + Arena arena; + ArenaString file_data(&arena); + bool success = ReadProcSelfMapsToString(file_data); if (!success) { TRACE("Cannot read " PROC_SELF_MAPS ", falling back to host's openat"); return openat(dirfd, PROC_SELF_MAPS, flags, mode); @@ -54,8 +134,8 @@ int OpenatProcSelfMapsForGuest(int dirfd, int flags, mode_t mode) { auto* maps_shadow = GuestMapShadow::GetInstance(); - std::vector<std::string> lines = Split(file_data, "\n"); - std::string guest_maps; + auto lines = SplitLines(&arena, file_data); + ArenaString guest_maps(&arena); for (size_t i = 0; i < lines.size(); i++) { uintptr_t start; uintptr_t end; @@ -83,21 +163,46 @@ int OpenatProcSelfMapsForGuest(int dirfd, int flags, mode_t mode) { guest_maps.append(lines.at(i) + "\n"); } + // Normally /proc/self/maps doesn't have newline at the end. + // It's simpler to remove it than to not add it in the loop. + CHECK_EQ(guest_maps.back(), '\n'); + guest_maps.pop_back(); + TRACE("--------\n%s\n--------", guest_maps.c_str()); WriteFullyOrDie(mem_fd, guest_maps.c_str(), guest_maps.size()); lseek(mem_fd, 0, 0); + EmulatedFileDescriptors::GetInstance()->Add(mem_fd); + return mem_fd; } +bool IsProcSelfMaps(const char* path, int flags) { + struct stat cur_stat; + struct stat proc_stat; + // This check works for /proc/self/maps itself as well as symlinks (unless AT_SYMLINK_NOFOLLOW is + // requested). As an added benefit it gracefully handles invalid pointers in path. + return stat(path, &cur_stat) == 0 && stat(PROC_SELF_MAPS, &proc_stat) == 0 && + !(S_ISLNK(cur_stat.st_mode) && (flags & AT_SYMLINK_NOFOLLOW)) && + cur_stat.st_ino == proc_stat.st_ino && cur_stat.st_dev == proc_stat.st_dev; +} + } // namespace +bool IsFileDescriptorEmulatedProcSelfMaps(int fd) { + return EmulatedFileDescriptors::GetInstance()->Contains(fd); +} + +void CloseEmulatedProcSelfMapsFileDescriptor(int fd) { + EmulatedFileDescriptors::GetInstance()->Remove(fd); +} + int OpenatForGuest(int dirfd, const char* path, int guest_flags, mode_t mode) { int host_flags = ToHostOpenFlags(guest_flags); - if (strcmp(path, PROC_SELF_MAPS) == 0) { + if (IsProcSelfMaps(path, host_flags)) { return OpenatProcSelfMapsForGuest(dirfd, host_flags, mode); } diff --git a/kernel_api/riscv64/gen_syscall_emulation_riscv64_to_x86_64-inl.h b/kernel_api/riscv64/gen_syscall_emulation_riscv64_to_x86_64-inl.h index 4e241a15..779284c8 100644 --- a/kernel_api/riscv64/gen_syscall_emulation_riscv64_to_x86_64-inl.h +++ b/kernel_api/riscv64/gen_syscall_emulation_riscv64_to_x86_64-inl.h @@ -50,7 +50,8 @@ long RunGuestSyscallImpl(long guest_nr, // custom syscall return RunGuestSyscall___NR_clone3(arg_1, arg_2); case 57: // __NR_close - return syscall(3, arg_1); + // /proc/self/maps emulation + return RunGuestSyscall___NR_close(arg_1); case 436: // __NR_close_range return syscall(436, arg_1, arg_2, arg_3); case 203: // __NR_connect @@ -143,10 +144,11 @@ long RunGuestSyscallImpl(long guest_nr, case 433: // __NR_fspick return syscall(433, arg_1, arg_2, arg_3); case 80: // __NR_fstat - // incompatible prototype + // incompatible prototype and /proc/self/maps emulation return RunGuestSyscall___NR_fstat(arg_1, arg_2); case 44: // __NR_fstatfs - return syscall(138, arg_1, arg_2); + // /proc/self/maps emulation + return RunGuestSyscall___NR_fstatfs(arg_1, arg_2); case 82: // __NR_fsync return syscall(74, arg_1); case 46: // __NR_ftruncate diff --git a/kernel_api/riscv64/syscall_emulation.cc b/kernel_api/riscv64/syscall_emulation.cc index f9ad1388..62c4e328 100644 --- a/kernel_api/riscv64/syscall_emulation.cc +++ b/kernel_api/riscv64/syscall_emulation.cc @@ -27,6 +27,7 @@ #include "berberis/base/scoped_errno.h" #include "berberis/base/tracing.h" #include "berberis/guest_os_primitives/scoped_pending_signals.h" +#include "berberis/guest_state/guest_addr.h" #include "berberis/guest_state/guest_state.h" #include "berberis/instrument/syscall.h" #include "berberis/kernel_api/main_executable_real_path_emulation.h" @@ -42,23 +43,6 @@ namespace berberis { namespace { -void ConvertHostStatToGuest(const struct stat& host_stat, Guest_stat* guest_stat) { - guest_stat->st_dev = host_stat.st_dev; - guest_stat->st_ino = host_stat.st_ino; - guest_stat->st_mode = host_stat.st_mode; - guest_stat->st_nlink = host_stat.st_nlink; - guest_stat->st_uid = host_stat.st_uid; - guest_stat->st_gid = host_stat.st_gid; - guest_stat->st_rdev = host_stat.st_rdev; - guest_stat->st_size = host_stat.st_size; - guest_stat->st_blksize = host_stat.st_blksize; - guest_stat->st_blocks = host_stat.st_blocks; - guest_stat->st_blocks = host_stat.st_blocks; - guest_stat->st_atim = host_stat.st_atim; - guest_stat->st_mtim = host_stat.st_mtim; - guest_stat->st_ctim = host_stat.st_ctim; -} - int FstatatForGuest(int dirfd, const char* path, struct stat* buf, int flags) { const char* real_path = nullptr; if ((flags & AT_SYMLINK_NOFOLLOW) == 0) { @@ -108,15 +92,6 @@ long RunGuestSyscall___NR_fadvise64(long arg_1, long arg_2, long arg_3, long arg return syscall(__NR_fadvise64, arg_1, arg_2, arg_3, arg_4); } -long RunGuestSyscall___NR_fstat(long arg_1, long arg_2) { - struct stat host_stat; - long result = syscall(__NR_fstat, arg_1, &host_stat); - if (result != -1) { - ConvertHostStatToGuest(host_stat, bit_cast<Guest_stat*>(arg_2)); - } - return result; -} - long RunGuestSyscall___NR_ioctl(long arg_1, long arg_2, long arg_3) { // TODO(b/128614662): translate! KAPI_TRACE("unimplemented ioctl 0x%lx, running host syscall as is", arg_2); @@ -130,7 +105,7 @@ long RunGuestSyscall___NR_newfstatat(long arg_1, long arg_2, long arg_3, long ar &host_stat, static_cast<int>(arg_4)); // flags if (result != -1) { - ConvertHostStatToGuest(host_stat, bit_cast<Guest_stat*>(arg_3)); + ConvertHostStatToGuestArch(host_stat, bit_cast<GuestAddr>(arg_3)); } return result; } diff --git a/kernel_api/riscv64/syscall_emulation_arch.cc b/kernel_api/riscv64/syscall_emulation_arch.cc index af0c7dd2..df2b565b 100644 --- a/kernel_api/riscv64/syscall_emulation_arch.cc +++ b/kernel_api/riscv64/syscall_emulation_arch.cc @@ -14,14 +14,19 @@ * limitations under the License. */ +#include <sys/stat.h> + #include <cstddef> #include <tuple> #include <utility> +#include "berberis/guest_state/guest_addr.h" #include "berberis/kernel_api/exec_emulation.h" #include "berberis/kernel_api/fcntl_emulation.h" #include "berberis/kernel_api/sys_ptrace_emulation.h" +#include "riscv64/guest_types.h" + namespace berberis { std::pair<const char*, size_t> GetGuestPlatformVarPrefixWithSize() { @@ -37,4 +42,21 @@ std::tuple<bool, int> PtraceForGuestArch(int, pid_t, void*, void*) { return {false, -1}; } +void ConvertHostStatToGuestArch(const struct stat& host_stat, GuestAddr guest_addr) { + auto* guest_stat = ToHostAddr<Guest_stat>(guest_addr); + guest_stat->st_dev = host_stat.st_dev; + guest_stat->st_ino = host_stat.st_ino; + guest_stat->st_mode = host_stat.st_mode; + guest_stat->st_nlink = host_stat.st_nlink; + guest_stat->st_uid = host_stat.st_uid; + guest_stat->st_gid = host_stat.st_gid; + guest_stat->st_rdev = host_stat.st_rdev; + guest_stat->st_size = host_stat.st_size; + guest_stat->st_blksize = host_stat.st_blksize; + guest_stat->st_blocks = host_stat.st_blocks; + guest_stat->st_atim = host_stat.st_atim; + guest_stat->st_mtim = host_stat.st_mtim; + guest_stat->st_ctim = host_stat.st_ctim; +} + } // namespace berberis diff --git a/kernel_api/sys_prctl_emulation.cc b/kernel_api/sys_prctl_emulation.cc index 9721323a..db31888c 100644 --- a/kernel_api/sys_prctl_emulation.cc +++ b/kernel_api/sys_prctl_emulation.cc @@ -24,7 +24,7 @@ #include "berberis/base/bit_util.h" #include "berberis/base/checks.h" -#include "berberis/kernel_api/syscall_numbers.h" +#include "berberis/guest_os_primitives/syscall_numbers.h" namespace berberis { diff --git a/kernel_api/tools/README.md b/kernel_api/tools/README.md index f2023654..59154dd5 100644 --- a/kernel_api/tools/README.md +++ b/kernel_api/tools/README.md @@ -69,3 +69,22 @@ Run the script to generate code for translation function: gen_kernel_syscalls_translation.py riscv64 x86_64 \ > kernel_api/gen_syscall_emulation_riscv64_to_x86_64-inl.h ``` + +### Generate system call numbers + +Each guest architecture must provide two files for system call numbers: +1. `gen_syscall_numbers_arch.h`, which contains constants for that guest architecture's syscall + numbers. This file is generated by `gen_kernel_syscalls_numbers.py`. +2. `gen_syscall_numbers.cc`, which contains function definitions for mapping between host and guest + syscall numbers. This file is generated by `gen_kernel_syscalls_mapping.py`. + +Both of these files are generated from `kernel_syscalls.json` and should be regenerated whenever +that file changes. The command-line arguments for both scripts are `<src-arch> <dst-arch> +<path-to-kernel_syscalls.json>`: + +``` +./gen_kernel_syscalls_numbers.py riscv64 x86_64 ./kernel_syscalls.json \ +> ../../guest_os_primitives/riscv64/include/berberis/guest_os_primitives/gen_syscall_numbers_arch.h +./gen_kernel_syscalls_mapping.py riscv64 x86_64 ./kernel_syscalls.json \ +> ../../guest_os_primitives/riscv64/gen_syscall_numbers.cc +``` diff --git a/kernel_api/tools/custom_syscalls.json b/kernel_api/tools/custom_syscalls.json index b91ecce0..f2956c7e 100644 --- a/kernel_api/tools/custom_syscalls.json +++ b/kernel_api/tools/custom_syscalls.json @@ -52,6 +52,9 @@ "__NR_clone3": { "custom_reason": "custom syscall" }, + "__NR_close": { + "custom_reason": "/proc/self/maps emulation" + }, "__NR_connect": { "x86": { "custom_reason": "socketcall", @@ -79,6 +82,17 @@ "__NR_fork": { "custom_reason": "custom syscall" }, + "__NR_fstat": { + "x86": { + "custom_reason": "/proc/self/maps emulation" + }, + "x86_64": { + "custom_reason": "incompatible prototype and /proc/self/maps emulation" + } + }, + "__NR_fstatfs": { + "custom_reason": "/proc/self/maps emulation" + }, "__NR_fstatfs64": { "arm": { "custom_reason": "statfs64 size mismatch" diff --git a/kernel_api/tools/gen_kernel_syscalls_mapping.py b/kernel_api/tools/gen_kernel_syscalls_mapping.py new file mode 100755 index 00000000..73ff614b --- /dev/null +++ b/kernel_api/tools/gen_kernel_syscalls_mapping.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# +# Copyright (C) 2024 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +import sys + + +def _print_header(arch): + print("""\ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "berberis/guest_os_primitives/gen_syscall_numbers.h" + +namespace berberis {""") + + +def _print_footer(arch): + print("""\ + +} // namespace berberis""") + + +def _print_mapping(name, src_arch, dst_arch, kernel_syscalls): + print("""\ + +int %s(int nr) { + switch (nr) {""" % (name)) + + for nr, syscall in sorted(kernel_syscalls.items()): + if src_arch in syscall: + if dst_arch in syscall: + print(' case %s: // %s' % (syscall[src_arch]['id'], nr)) + print(' return %s;' % (syscall[dst_arch]['id'])) + else: + print(' case %s: // %s - missing on %s' % (syscall[src_arch]['id'], nr, dst_arch)) + print(' return -1;') + + print("""\ + default: + return -1; + } +}""") + + +def main(argv): + src_arch = argv[1] + dst_arch = argv[2] + + with open(argv[3]) as json_file: + kernel_syscalls = json.load(json_file) + + # TODO(b/232598137): merge custom syscalls? + + display_src_arch = src_arch.upper() + + _print_header(display_src_arch) + _print_mapping('ToHostSyscallNumber', src_arch, dst_arch, kernel_syscalls) + _print_mapping('ToGuestSyscallNumber', dst_arch, src_arch, kernel_syscalls) + _print_footer(display_src_arch) + + return 0 + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/kernel_api/tools/gen_kernel_syscalls_numbers.py b/kernel_api/tools/gen_kernel_syscalls_numbers.py index da3f127d..38faccfb 100755 --- a/kernel_api/tools/gen_kernel_syscalls_numbers.py +++ b/kernel_api/tools/gen_kernel_syscalls_numbers.py @@ -37,10 +37,10 @@ def _print_header(arch): * limitations under the License. */ -#ifndef BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_%s_H_ -#define BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_%s_H_ +#ifndef BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_ +#define BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_ -namespace berberis {""" % (arch, arch)) +namespace berberis {""") def _print_footer(arch): @@ -48,7 +48,7 @@ def _print_footer(arch): } // namespace berberis -#endif // BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_%s_H_""" % (arch)) +#endif // BERBERIS_GUEST_OS_PRIMITIVES_GEN_SYSCALL_NUMBERS_ARCH_H_""") def _print_enum(arch, kernel_syscalls): @@ -64,28 +64,6 @@ enum {""") print('};') -def _print_mapping(name, src_arch, dst_arch, kernel_syscalls): - print("""\ - -inline int %s(int nr) { - switch (nr) {""" % (name)) - - for nr, syscall in sorted(kernel_syscalls.items()): - if src_arch in syscall: - if dst_arch in syscall: - print(' case %s: // %s' % (syscall[src_arch]['id'], nr)) - print(' return %s;' % (syscall[dst_arch]['id'])) - else: - print(' case %s: // %s - missing on %s' % (syscall[src_arch]['id'], nr, dst_arch)) - print(' return -1;') - - print("""\ - default: - return -1; - } -}""") - - def main(argv): src_arch = argv[1] dst_arch = argv[2] @@ -99,8 +77,6 @@ def main(argv): _print_header(display_src_arch) _print_enum(src_arch, kernel_syscalls) - _print_mapping('ToHostSyscallNumber', src_arch, dst_arch, kernel_syscalls) - _print_mapping('ToGuestSyscallNumber', dst_arch, src_arch, kernel_syscalls) _print_footer(display_src_arch) return 0 diff --git a/lite_translator/riscv64_to_x86_64/lite_translator.cc b/lite_translator/riscv64_to_x86_64/lite_translator.cc index 91091b65..707bb30a 100644 --- a/lite_translator/riscv64_to_x86_64/lite_translator.cc +++ b/lite_translator/riscv64_to_x86_64/lite_translator.cc @@ -465,7 +465,7 @@ Register LiteTranslator::UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Regi Undefined(); return {}; } - return arg; + return res; } Register LiteTranslator::UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr) { diff --git a/test_utils/Android.bp b/test_utils/Android.bp index 96120c0b..64f189a2 100644 --- a/test_utils/Android.bp +++ b/test_utils/Android.bp @@ -33,3 +33,14 @@ cc_library_headers { "libberberis_runtime_primitives_headers", ], } + +cc_test_library { + name: "libberberis_tests_main", + defaults: ["berberis_test_library_defaults"], + host_supported: true, + header_libs: [ + "berberis_test_utils_headers", + "libberberis_runtime_headers", + ], + srcs: ["tests_main.cc"], +} diff --git a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h index da2a50f9..d865ccfd 100644 --- a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h +++ b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h @@ -45,6 +45,8 @@ #error "One of TESTING_INTERPRETER, TESTING_LITE_TRANSLATOR, TESTING_HEAVY_OPTIMIZER must be defined #endif +namespace { + // TODO(b/276787675): remove these files from interpreter when they are no longer needed there. // Maybe extract FPvalueToFPReg and TupleMap to a separate header? inline constexpr class FPValueToFPReg { @@ -83,62 +85,85 @@ decltype(auto) TupleMap(const ContainerType& container, const Transformer& trans return result; } +void RaiseFeExceptForGuestFlags(uint8_t riscv_fflags) { + EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0); + if (riscv_fflags & FPFlags::NX) { + EXPECT_EQ(feraiseexcept(FE_INEXACT), 0); + } + if (riscv_fflags & FPFlags::UF) { + EXPECT_EQ(feraiseexcept(FE_UNDERFLOW), 0); + } + if (riscv_fflags & FPFlags::OF) { + EXPECT_EQ(feraiseexcept(FE_OVERFLOW), 0); + } + if (riscv_fflags & FPFlags::DZ) { + EXPECT_EQ(feraiseexcept(FE_DIVBYZERO), 0); + } + if (riscv_fflags & FPFlags::NV) { + EXPECT_EQ(feraiseexcept(FE_INVALID), 0); + } +} + +void TestFeExceptForGuestFlags(uint8_t riscv_fflags) { + EXPECT_EQ(bool(riscv_fflags & FPFlags::NX), bool(fetestexcept(FE_INEXACT))); + EXPECT_EQ(bool(riscv_fflags & FPFlags::UF), bool(fetestexcept(FE_UNDERFLOW))); + EXPECT_EQ(bool(riscv_fflags & FPFlags::OF), bool(fetestexcept(FE_OVERFLOW))); + EXPECT_EQ(bool(riscv_fflags & FPFlags::DZ), bool(fetestexcept(FE_DIVBYZERO))); + EXPECT_EQ(bool(riscv_fflags & FPFlags::NV), bool(fetestexcept(FE_INVALID))); +} + +} // namespace + class TESTSUITE : public ::testing::Test { public: TESTSUITE() : state_{ .cpu = {.vtype = uint64_t{1} << 63, .frm = intrinsics::GuestModeFromHostRounding()}} {} + template <uint8_t kInsnSize = 4> + void RunInstruction(const uint32_t& insn_bytes) { + state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); + EXPECT_TRUE(RunOneInstruction<kInsnSize>(&state_, state_.cpu.insn_addr + kInsnSize)); + } + // Compressed Instructions. template <RegisterType register_type, uint64_t expected_result, uint8_t kTargetReg> void TestCompressedStore(uint16_t insn_bytes, uint64_t offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; store_area_ = 0; SetXReg<kTargetReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset)); SetReg<register_type, 9>(state_.cpu, kDataToLoad); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(store_area_, expected_result); } template <RegisterType register_type, uint64_t expected_result, uint8_t kSourceReg> void TestCompressedLoad(uint16_t insn_bytes, uint64_t offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; SetXReg<kSourceReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset)); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result); } void TestCAddi(uint16_t insn_bytes, uint64_t expected_increment) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; SetXReg<2>(state_.cpu, 1); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_increment); } void TestCAddi16sp(uint16_t insn_bytes, uint64_t expected_offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; SetXReg<2>(state_.cpu, 1); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_offset); } void TestLi(uint32_t insn_bytes, uint64_t expected_result) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } void TestCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; SetXReg<2>(state_.cpu, 1); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<9>(state_.cpu), 1 + expected_offset); } @@ -159,19 +184,16 @@ class TESTSUITE : public ::testing::Test { void TestCMiscAlu(uint16_t insn_bytes, std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { for (auto [arg1, arg2, expected_result] : args) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetXReg<8>(state_.cpu, arg1); SetXReg<9>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<8>(state_.cpu), expected_result); } } void TestCMiscAluImm(uint16_t insn_bytes, uint64_t value, uint64_t expected_result) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; SetXReg<9>(state_.cpu, value); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<9>(state_.cpu), expected_result); } @@ -191,44 +213,40 @@ class TESTSUITE : public ::testing::Test { void TestCOp(uint32_t insn_bytes, std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { for (auto [arg1, arg2, expected_result] : args) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetXReg<1>(state_.cpu, arg1); SetXReg<2>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + 2)); + RunInstruction<2>(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } // Non-Compressed Instructions. + void TestFFlagsOnGuestAndHost(uint8_t expected_guest_fflags) { + // Read fflags register. + RunInstruction(0x00102173); // frflags x2 + EXPECT_EQ(GetXReg<2>(state_.cpu), expected_guest_fflags); + + // Check corresponding fenv exception flags on host. + TestFeExceptForGuestFlags(expected_guest_fflags); + } + void TestFCsr(uint32_t insn_bytes, uint8_t fcsr_to_set, uint8_t expected_fcsr, uint8_t expected_cpustate_frm) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; state_.cpu.frm = 0b100u; // Pass non-zero frm to ensure that we don't accidentally rely on it being zero. SetXReg<3>(state_.cpu, fcsr_to_set); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<2>(state_.cpu), 0b1000'0000ULL | expected_fcsr); EXPECT_EQ(state_.cpu.frm, expected_cpustate_frm); } - void TestFFlags(uint32_t insn_bytes, uint8_t fflags_to_set, uint8_t expected_fflags) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; - SetXReg<3>(state_.cpu, fflags_to_set); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); - EXPECT_EQ(GetXReg<2>(state_.cpu), expected_fflags); - } - void TestFrm(uint32_t insn_bytes, uint8_t frm_to_set, uint8_t expected_rm) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; state_.cpu.frm = 0b001u; SetXReg<3>(state_.cpu, frm_to_set); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<2>(state_.cpu), 0b001u); EXPECT_EQ(state_.cpu.frm, expected_rm); } @@ -236,10 +254,9 @@ class TESTSUITE : public ::testing::Test { void TestOp(uint32_t insn_bytes, std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) { for (auto [arg1, arg2, expected_result] : args) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetXReg<2>(state_.cpu, arg1); SetXReg<3>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } @@ -247,10 +264,9 @@ class TESTSUITE : public ::testing::Test { template <typename... Types> void TestOpFp(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<2>(state_.cpu, arg1); SetFReg<3>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); } } @@ -260,24 +276,19 @@ class TESTSUITE : public ::testing::Test { for (auto [arg1, imm, expected_result] : args) { CHECK_LE(imm, 63); uint32_t insn_bytes_with_immediate = insn_bytes | imm << 20; - state_.cpu.insn_addr = bit_cast<GuestAddr>(&insn_bytes_with_immediate); SetXReg<2>(state_.cpu, arg1); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes_with_immediate); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } void TestAuipc(uint32_t insn_bytes, uint64_t expected_offset) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); - EXPECT_EQ(GetXReg<1>(state_.cpu), expected_offset + code_start); + RunInstruction(insn_bytes); + EXPECT_EQ(GetXReg<1>(state_.cpu), expected_offset + ToGuestAddr(&insn_bytes)); } void TestLui(uint32_t insn_bytes, uint64_t expected_result) { - auto code_start = ToGuestAddr(&insn_bytes); - state_.cpu.insn_addr = code_start; - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } @@ -302,10 +313,9 @@ class TESTSUITE : public ::testing::Test { } void TestLoad(uint32_t insn_bytes, uint64_t expected_result) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); // Offset is always 8. SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8)); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } @@ -326,23 +336,21 @@ class TESTSUITE : public ::testing::Test { } void TestStore(uint32_t insn_bytes, uint64_t expected_result) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); // Offset is always 8. SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8)); SetXReg<2>(state_.cpu, kDataToStore); store_area_ = 0; - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(store_area_, expected_result); } template <typename... Types> void TestFma(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg1, arg2, arg3, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<2>(state_.cpu, arg1); SetFReg<3>(state_.cpu, arg2); SetFReg<4>(state_.cpu, arg3); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); } } @@ -408,12 +416,11 @@ class TESTSUITE : public ::testing::Test { uint64_t arg2, uint64_t expected_result, uint64_t expected_memory) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); // Copy arg1 into store_area_ store_area_ = arg1; SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_))); SetXReg<3>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); EXPECT_EQ(store_area_, expected_memory); } @@ -435,9 +442,8 @@ class TESTSUITE : public ::testing::Test { void TestFmvFloatToInteger(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<1>(state_.cpu, arg); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } @@ -446,9 +452,8 @@ class TESTSUITE : public ::testing::Test { void TestFmvIntegerToFloat(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg, expected_result] : args) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetXReg<1>(state_.cpu, arg); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), kFPValueToFPReg(expected_result)); } } @@ -457,10 +462,9 @@ class TESTSUITE : public ::testing::Test { void TestOpFpGpRegisterTarget(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<2>(state_.cpu, arg1); SetFReg<3>(state_.cpu, arg2); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } @@ -469,9 +473,8 @@ class TESTSUITE : public ::testing::Test { void TestOpFpGpRegisterTargetSingleInput(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<2>(state_.cpu, arg); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result); } } @@ -480,9 +483,8 @@ class TESTSUITE : public ::testing::Test { void TestOpFpGpRegisterSourceSingleInput(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetXReg<2>(state_.cpu, arg); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); } } @@ -490,28 +492,25 @@ class TESTSUITE : public ::testing::Test { template <typename... Types> void TestOpFpSingleInput(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) { for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); SetFReg<2>(state_.cpu, arg); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); } } void TestLoadFp(uint32_t insn_bytes, uint64_t expected_result) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); // Offset is always 8. SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8)); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result); } void TestStoreFp(uint32_t insn_bytes, uint64_t expected_result) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); // Offset is always 8. SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8)); SetFReg<2>(state_.cpu, kDataToStore); store_area_ = 0; - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); EXPECT_EQ(store_area_, expected_result); } @@ -520,13 +519,12 @@ class TESTSUITE : public ::testing::Test { std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>> args) { for (auto [vl_orig, vtype_orig, avl, vtype_new, vl_expected, vtype_expected] : args) { - state_.cpu.insn_addr = ToGuestAddr(&insn_bytes); state_.cpu.vl = vl_orig; state_.cpu.vtype = vtype_orig; SetXReg<1>(state_.cpu, ~0ULL); SetXReg<2>(state_.cpu, avl); SetXReg<3>(state_.cpu, vtype_new); - EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4)); + RunInstruction(insn_bytes); if (insn_bytes & 0b11111'0000000) { EXPECT_EQ(GetXReg<1>(state_.cpu), vl_expected); } else { @@ -1182,107 +1180,128 @@ TEST_F(TESTSUITE, CsrInstructions) { TestFrm(0x0020f173, 0, 0); } -TEST_F(TESTSUITE, FCsrRegister) { - fenv_t saved_environment; - EXPECT_EQ(fegetenv(&saved_environment), 0); +constexpr uint8_t kFPFlagsAll = FPFlags::NX | FPFlags::UF | FPFlags::OF | FPFlags::DZ | FPFlags::NV; +// Ensure all trailing bits are set in kFPFlagsAll so that all combinations are possible. +static_assert(__builtin_ctz(~kFPFlagsAll) == 5); - for (uint8_t riscv_fflags = 0; riscv_fflags < 32; riscv_fflags += 1) { - EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0); - if (riscv_fflags & FPFlags::NX) { - EXPECT_EQ(feraiseexcept(FE_INEXACT), 0); - } - if (riscv_fflags & FPFlags::UF) { - EXPECT_EQ(feraiseexcept(FE_UNDERFLOW), 0); - } - if (riscv_fflags & FPFlags::OF) { - EXPECT_EQ(feraiseexcept(FE_OVERFLOW), 0); - } - if (riscv_fflags & FPFlags::DZ) { - EXPECT_EQ(feraiseexcept(FE_DIVBYZERO), 0); - } - if (riscv_fflags & FPFlags::NV) { - EXPECT_EQ(feraiseexcept(FE_INVALID), 0); - } - TestFCsr(0x00319173, 0, riscv_fflags, 0); +// Automatically saves and restores fenv throughout the lifetime of a parent scope. +class ScopedFenv { + public: + ScopedFenv() { EXPECT_EQ(fegetenv(&env_), 0); } + ~ScopedFenv() { EXPECT_EQ(fesetenv(&env_), 0); } + + private: + fenv_t env_; +}; + +TEST_F(TESTSUITE, FFlagsRead) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(fflags); + RunInstruction(0x00102173); // frflags x2 + EXPECT_EQ(GetXReg<2>(state_.cpu), fflags); } +} - for (bool immediate_source : {true, false}) { - for (uint8_t riscv_fflags = 0; riscv_fflags < 32; ++riscv_fflags) { - EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0); - if (immediate_source) { - TestFCsr(0x00305173 | (riscv_fflags << 15), 0, 0, 0); - } else { - TestFCsr(0x00319173, 0b100'0000 | riscv_fflags, 0, 2); - } - EXPECT_EQ(bool(riscv_fflags & FPFlags::NX), bool(fetestexcept(FE_INEXACT))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::UF), bool(fetestexcept(FE_UNDERFLOW))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::OF), bool(fetestexcept(FE_OVERFLOW))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::DZ), bool(fetestexcept(FE_DIVBYZERO))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::NV), bool(fetestexcept(FE_INVALID))); - } +TEST_F(TESTSUITE, FFlagsSwap) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(fflags); + // After swapping in 0 for flags, read fflags to verify. + SetXReg<3>(state_.cpu, 0); + RunInstruction(0x00119173); // fsflags x2, x3 + EXPECT_EQ(GetXReg<2>(state_.cpu), fflags); + TestFFlagsOnGuestAndHost(0u); } +} - EXPECT_EQ(fesetenv(&saved_environment), 0); +TEST_F(TESTSUITE, FFlagsSwapImmediate) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(fflags); + // After swapping in 0 for flags, read fflags to verify. + RunInstruction(0x00105173); // fsflags x2, 0 + EXPECT_EQ(GetXReg<2>(state_.cpu), fflags); + TestFFlagsOnGuestAndHost(0u); + } } -TEST_F(TESTSUITE, FFlagsRegister) { - fenv_t saved_environment; - EXPECT_EQ(fegetenv(&saved_environment), 0); +TEST_F(TESTSUITE, FFlagsWrite) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + SetXReg<3>(state_.cpu, fflags); + RunInstruction(0x00119073); // fsflags x3 + TestFFlagsOnGuestAndHost(fflags); + } +} - for (uint8_t riscv_fflags = 0; riscv_fflags < 32; riscv_fflags += 1) { - EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0); - if (riscv_fflags & FPFlags::NX) { - EXPECT_EQ(feraiseexcept(FE_INEXACT), 0); - } - if (riscv_fflags & FPFlags::UF) { - EXPECT_EQ(feraiseexcept(FE_UNDERFLOW), 0); - } - if (riscv_fflags & FPFlags::OF) { - EXPECT_EQ(feraiseexcept(FE_OVERFLOW), 0); - } - if (riscv_fflags & FPFlags::DZ) { - EXPECT_EQ(feraiseexcept(FE_DIVBYZERO), 0); - } - if (riscv_fflags & FPFlags::NV) { - EXPECT_EQ(feraiseexcept(FE_INVALID), 0); - } - TestFFlags(0x00105173, 0, riscv_fflags); +TEST_F(TESTSUITE, FFlagsWriteImmediate) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RunInstruction(0x00105073 | fflags << 15); // fsflagsi 0 (+ fflags) + TestFFlagsOnGuestAndHost(fflags); + } +} + +TEST_F(TESTSUITE, FFlagsClearBits) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(kFPFlagsAll); + SetXReg<3>(state_.cpu, fflags); + RunInstruction(0x0011b073); // csrc fflags, x3 + // Read fflags to verify previous bitwise clear operation. + TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll)); + } +} + +TEST_F(TESTSUITE, FFlagsClearBitsImmediate) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(kFPFlagsAll); + RunInstruction(0x00107073 | fflags << 15); // csrci fflags, 0 (+ fflags) + // Read fflags to verify previous bitwise clear operation. + TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll)); + } +} + +TEST_F(TESTSUITE, FCsrRegister) { + ScopedFenv fenv; + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { + RaiseFeExceptForGuestFlags(fflags); + + // Read and verify fflags, then replace with all flags. + TestFCsr(0x00319173 /* fscsr x2,x3 */, fflags, fflags, 0); + + // Only read fcsr and verify fflags. + TestFCsr(0x00302173 /* frcsr x2 */, /* ignored */ 0, fflags, /* expected_frm= */ 0b100u); } for (bool immediate_source : {true, false}) { - for (uint8_t riscv_fflags = 0; riscv_fflags < 32; ++riscv_fflags) { + for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) { EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0); if (immediate_source) { - TestFFlags(0x00105173 | (riscv_fflags << 15), 0, 0); + TestFCsr(0x00305173 /* csrrwi x2,fcsr,0 */ | (fflags << 15), 0, 0, 0); } else { - TestFFlags(0x00119173, riscv_fflags, 0); + TestFCsr(0x00319173 /* fscsr x2,x3 */, 0b100'0000 | fflags, 0, /* expected_frm= */ 0b010u); } - EXPECT_EQ(bool(riscv_fflags & FPFlags::NX), bool(fetestexcept(FE_INEXACT))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::UF), bool(fetestexcept(FE_UNDERFLOW))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::OF), bool(fetestexcept(FE_OVERFLOW))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::DZ), bool(fetestexcept(FE_DIVBYZERO))); - EXPECT_EQ(bool(riscv_fflags & FPFlags::NV), bool(fetestexcept(FE_INVALID))); + TestFFlagsOnGuestAndHost(fflags); } } - - EXPECT_EQ(fesetenv(&saved_environment), 0); } TEST_F(TESTSUITE, FsrRegister) { ScopedRoundingMode scoped_rounding_mode; - int rounding[][2] = { - {0, FE_TONEAREST}, - {1, FE_TOWARDZERO}, - {2, FE_DOWNWARD}, - {3, FE_UPWARD}, - {4, FE_TOWARDZERO}, - // Only low three bits must be affecting output (for forward compatibility). - {8, FE_TONEAREST}, - {9, FE_TOWARDZERO}, - {10, FE_DOWNWARD}, - {11, FE_UPWARD}, - {12, FE_TOWARDZERO} - }; + int rounding[][2] = {{0, FE_TONEAREST}, + {1, FE_TOWARDZERO}, + {2, FE_DOWNWARD}, + {3, FE_UPWARD}, + {4, FE_TOWARDZERO}, + // Only low three bits must be affecting output (for forward compatibility). + {8, FE_TONEAREST}, + {9, FE_TOWARDZERO}, + {10, FE_DOWNWARD}, + {11, FE_UPWARD}, + {12, FE_TOWARDZERO}}; for (bool immediate_source : {true, false}) { for (auto [guest_rounding, host_rounding] : rounding) { if (immediate_source) { @@ -1897,6 +1916,60 @@ TEST_F(TESTSUITE, Fmv) { {std::tuple{bit_cast<uint64_t>(1.0), 1.0}, {bit_cast<uint64_t>(-1.0), -1.0}}); } +const uint32_t kPosNanFloat = kFPValueToFPReg(std::numeric_limits<float>::quiet_NaN()); +const uint32_t kNegNanFloat = kFPValueToFPReg(-std::numeric_limits<float>::quiet_NaN()); +const uint64_t kPosNanDouble = kFPValueToFPReg(std::numeric_limits<double>::quiet_NaN()); +const uint64_t kNegNanDouble = kFPValueToFPReg(-std::numeric_limits<double>::quiet_NaN()); +constexpr uint64_t kMaskFloatBits = (uint64_t{1} << 32) - 1; + +TEST_F(TESTSUITE, FabsSinglePrecisionNanPosToPos) { + SetFReg<2>(state_.cpu, kPosNanFloat); + RunInstruction(0x202120d3); // fabs.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat); +} + +TEST_F(TESTSUITE, FabsSinglePrecisionNanNegToPos) { + SetFReg<2>(state_.cpu, kNegNanFloat); + RunInstruction(0x202120d3); // fabs.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat); +} + +TEST_F(TESTSUITE, FabsDoublePrecisionNanPosToPos) { + SetFReg<2>(state_.cpu, kPosNanDouble); + RunInstruction(0x222120d3); // fabs.d f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble); +} + +TEST_F(TESTSUITE, FabsDoublePrecisionNanNegToPos) { + SetFReg<2>(state_.cpu, kNegNanDouble); + RunInstruction(0x222120d3); // fabs.d f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble); +} + +TEST_F(TESTSUITE, FnegSinglePrecisionNanPosToNeg) { + SetFReg<2>(state_.cpu, kPosNanFloat); + RunInstruction(0x202110d3); // fneg.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kNegNanFloat); +} + +TEST_F(TESTSUITE, FnegSinglePrecisionNanNegToPos) { + SetFReg<2>(state_.cpu, kNegNanFloat); + RunInstruction(0x202110d3); // fneg.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat); +} + +TEST_F(TESTSUITE, FnegDoublePrecisionNanPosToNeg) { + SetFReg<2>(state_.cpu, kPosNanDouble); + RunInstruction(0x222110d3); // fneg.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu), kNegNanDouble); +} + +TEST_F(TESTSUITE, FnegDoublePrecisionNanNegToPos) { + SetFReg<2>(state_.cpu, kNegNanDouble); + RunInstruction(0x222110d3); // fneg.s f1, f2 + EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble); +} + TEST_F(TESTSUITE, OpFpFcvt) { // Fcvt.S.D TestOpFpSingleInput(0x401170d3, {std::tuple{1.0, 1.0f}}); diff --git a/kernel_api/riscv64/syscall_numbers.cc b/test_utils/tests_main.cc index 6d14a158..f8a2e42f 100644 --- a/kernel_api/riscv64/syscall_numbers.cc +++ b/test_utils/tests_main.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 The Android Open Source Project + * Copyright (C) 2024 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "berberis/base/checks.h" +#include "gtest/gtest.h" -namespace berberis { +#include "berberis/runtime/berberis.h" -int ToHostSyscallNumber(int) { - FATAL("Not implemented ToHostSyscallNumber"); +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + berberis::InitBerberis(); + return RUN_ALL_TESTS(); } - -} // namespace berberis diff --git a/tests/inline_asm_tests/main_riscv64.cc b/tests/inline_asm_tests/main_riscv64.cc index a7566f1d..740c55ec 100644 --- a/tests/inline_asm_tests/main_riscv64.cc +++ b/tests/inline_asm_tests/main_riscv64.cc @@ -17,7 +17,12 @@ #include "gtest/gtest.h" #include <cstdint> +#include <cstdlib> +#include <cstring> +#include <iomanip> +#include <iostream> #include <tuple> +#include <utility> namespace { @@ -26,42 +31,261 @@ constexpr T BitUtilLog2(T x) { return __builtin_ctz(x); } -// TODO(b/301577077): Maybe use __uint128_t instead. -// Or provide a more versatile wrapper, that one can easily init, copy and compare. -using __v2du = uint64_t[2]; - -constexpr __v2du kVectorCalculationsSource[16] = { - {0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8908}, - {0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9918}, - {0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a928}, - {0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b938}, - {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, - {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, - {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, - {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}, - - {0x9e0c'9a09'9604'9200, 0x8e1c'8a18'8614'8211}, - {0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231}, - {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}, - {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}, - {0x1e8c'1a89'1684'1280, 0x0e9c'0a98'0694'0291}, - {0x3eac'3aa9'36a4'32a0, 0x2ebc'2ab8'26b4'22b1}, - {0x5ecc'5ac9'56c4'52c0, 0x4edc'4ad8'46d4'42d1}, - {0x7eec'7ae9'76e4'72e0, 0x6efc'6af8'66f4'62f1}, +using uint8_16_t = std::tuple<uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t, + uint8_t>; +using uint16_8_t = + std::tuple<uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t>; +using uint32_4_t = std::tuple<uint32_t, uint32_t, uint32_t, uint32_t>; +using uint64_2_t = std::tuple<uint64_t, uint64_t>; + +enum PrintModeEndianess { kLittleEndian, kBigEndian }; + +// A wrapper around __uint128 which can be constructed from a pair of uint64_t literals. +class SIMD128 { + public: + SIMD128(){}; + + constexpr SIMD128(uint8_16_t u8) : uint8_{u8} {}; + constexpr SIMD128(uint16_8_t u16) : uint16_{u16} {}; + constexpr SIMD128(uint32_4_t u32) : uint32_{u32} {}; + constexpr SIMD128(uint64_2_t u64) : uint64_{u64} {}; + constexpr SIMD128(__uint128_t u128) : u128_{u128} {}; + + [[nodiscard]] constexpr __uint128_t Get() const { return u128_; } + + constexpr SIMD128& operator=(const SIMD128& other) { + u128_ = other.u128_; + return *this; + }; + constexpr SIMD128& operator|=(const SIMD128& other) { + u128_ |= other.u128_; + return *this; + } + + constexpr bool operator==(const SIMD128& other) const { return u128_ == other.u128_; } + constexpr bool operator!=(const SIMD128& other) const { return u128_ != other.u128_; } + constexpr SIMD128 operator>>(size_t shift_amount) const { return u128_ >> shift_amount; } + constexpr SIMD128 operator<<(size_t shift_amount) const { return u128_ << shift_amount; } + constexpr SIMD128 operator&(SIMD128 other) const { return u128_ & other.u128_; } + constexpr SIMD128 operator|(SIMD128 other) const { return u128_ | other.u128_; } + constexpr SIMD128 operator^(SIMD128 other) const { return u128_ ^ other.u128_; } + constexpr SIMD128 operator~() const { return ~u128_; } + friend std::ostream& operator<<(std::ostream& os, const SIMD128& simd); + + template <size_t N> + std::ostream& Print(std::ostream& os) const { + if constexpr (kSimd128PrintMode == kBigEndian) { + os << std::uppercase << std::hex << std::setw(4) << std::setfill('0') << std::get<N>(uint16_); + if constexpr (N > 0) { + os << '\''; + } + } else { + os << std::uppercase << std::hex << std::setw(2) << std::setfill('0') + << static_cast<int>(std::get<N * 2>(uint8_)); + os << std::uppercase << std::hex << std::setw(2) << std::setfill('0') + << static_cast<int>(std::get<N * 2 + 1>(uint8_)); + if constexpr (N < 7) { + os << '\''; + } + } + return os; + } + + template <size_t... N> + std::ostream& PrintEach(std::ostream& os, std::index_sequence<N...>) const { + os << "0x"; + if constexpr (kSimd128PrintMode == kBigEndian) { + (Print<7 - N>(os), ...); + } else { + (Print<N>(os), ...); + } + return os; + } + + private: + union { +#ifdef __GNUC__ + [[gnu::may_alias]] uint8_16_t uint8_; + [[gnu::may_alias]] uint16_8_t uint16_; + [[gnu::may_alias]] uint32_4_t uint32_; + [[gnu::may_alias]] uint64_2_t uint64_; + [[gnu::may_alias]] __uint128_t u128_; +#endif + }; + + // Support for BIG_ENDIAN or LITTLE_ENDIAN printing of SIMD128 values. Change this value + // if you want to see failure results in LITTLE_ENDIAN. + static constexpr const PrintModeEndianess kSimd128PrintMode = kBigEndian; +}; + +// Helps produce easy to read output on failed tests. +std::ostream& operator<<(std::ostream& os, const SIMD128& simd) { + return simd.PrintEach(os, std::make_index_sequence<8>()); +} + +constexpr SIMD128 kVectorCalculationsSourceLegacy[16] = { + {{0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8908}}, + {{0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9918}}, + {{0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a928}}, + {{0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b938}}, + {{0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}}, + {{0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}}, + {{0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}}, + {{0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + + {{0x0e0c'0a09'0604'0200, 0x1e1c'1a18'1614'1211}}, + {{0x2e2c'2a29'2624'2220, 0x3e3c'3a38'3634'3231}}, + {{0x4e4c'4a49'4644'4240, 0x5e5c'5a58'5654'5251}}, + {{0x6e6c'6a69'6664'6260, 0x7e7c'7a78'7674'7271}}, + {{0x8e8c'8a89'8684'8280, 0x9e9c'9a98'9694'9291}}, + {{0xaeac'aaa9'a6a4'a2a0, 0xbebc'bab8'b6b4'b2b1}}, + {{0xcecc'cac9'c6c4'c2c0, 0xdedc'dad8'd6d4'd2d1}}, + {{0xeeec'eae9'e6e4'e2e0, 0xfefc'faf8'f6f4'f2f1}}, +}; + +constexpr SIMD128 kVectorCalculationsSource[16] = { + {{0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8908}}, + {{0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9918}}, + {{0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a928}}, + {{0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b938}}, + {{0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}}, + {{0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}}, + {{0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}}, + {{0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + + {{0x9e0c'9a09'9604'9200, 0x8e1c'8a18'8614'8211}}, + {{0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231}}, + {{0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}}, + {{0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}}, + {{0x1e8c'1a89'1684'1280, 0x0e9c'0a98'0694'0291}}, + {{0x3eac'3aa9'36a4'32a0, 0x2ebc'2ab8'26b4'22b1}}, + {{0x5ecc'5ac9'56c4'52c0, 0x4edc'4ad8'46d4'42d1}}, + {{0x7eec'7ae9'76e4'72e0, 0x6efc'6af8'66f4'62f1}}, }; // Easily recognizable bit pattern for target register. -constexpr __v2du kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}; -constexpr __v2du kAgnosticResult = {~uint64_t{0U}, ~uint64_t{0U}}; +constexpr SIMD128 kUndisturbedResult{{0x5555'5555'5555'5555, 0x5555'5555'5555'5555}}; + +SIMD128 GetAgnosticResult() { + static const bool kRvvAgnosticIsUndisturbed = getenv("RVV_AGNOSTIC_IS_UNDISTURBED") != nullptr; + if (kRvvAgnosticIsUndisturbed) { + return kUndisturbedResult; + } + return {{~uint64_t{0U}, ~uint64_t{0U}}}; +} + +const SIMD128 kAgnosticResult = GetAgnosticResult(); // Mask in form suitable for storing in v0 and use in v0.t form. -static constexpr __v2du kMask = {0xd5ad'd6b5'ad6b'b5ad, 0x6af7'57bb'deed'7bb5}; +static constexpr SIMD128 kMask{{0xd5ad'd6b5'ad6b'b5ad, 0x6af7'57bb'deed'7bb5}}; +// Mask used with vsew = 0 (8bit) elements. +static constexpr SIMD128 kMaskInt8[8] = { + {{255, 0, 255, 255, 0, 255, 0, 255, 255, 0, 255, 0, 255, 255, 0, 255}}, + {{255, 255, 0, 255, 0, 255, 255, 0, 255, 0, 255, 255, 0, 255, 0, 255}}, + {{255, 0, 255, 0, 255, 255, 0, 255, 0, 255, 255, 0, 255, 0, 255, 255}}, + {{255, 0, 255, 255, 0, 255, 0, 255, 255, 0, 255, 0, 255, 0, 255, 255}}, + {{255, 0, 255, 0, 255, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 0}}, + {{255, 0, 255, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 0, 255, 255}}, + {{255, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 0, 255, 0, 255, 0}}, + {{255, 255, 255, 0, 255, 255, 255, 255, 0, 255, 0, 255, 0, 255, 255, 0}}, +}; +// Mask used with vsew = 1 (16bit) elements. +static constexpr SIMD128 kMaskInt16[8] = { + {{0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff}}, + {{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff}}, + {{0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000}}, + {{0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff}}, + {{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff}}, + {{0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff}}, + {{0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff}}, + {{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff}}, +}; +// Mask used with vsew = 2 (32bit) elements. +static constexpr SIMD128 kMaskInt32[8] = { + {{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff}}, + {{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff}}, + {{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000}}, + {{0xffff'ffff, 0xffff'ffff, 0x0000'0000, 0xffff'ffff}}, + {{0xffff'ffff, 0xffff'ffff, 0x0000'0000, 0xffff'ffff}}, + {{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0x0000'0000}}, + {{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff}}, + {{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff}}, +}; +// Mask used with vsew = 3 (64bit) elements. +static constexpr SIMD128 kMaskInt64[8] = { + {{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff}}, + {{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000}}, + {{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff}}, +}; +// To verify operations without masking. +static constexpr SIMD128 kNoMask[8] = { + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, +}; + +// Half of sub-register lmul. +static constexpr SIMD128 kFractionMaskInt8[5] = { + {{255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Half of 1/8 reg = 1/16 + {{255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Half of 1/4 reg = 1/8 + {{255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Half of 1/2 reg = 1/4 + {{255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0}}, // Half of full reg = 1/2 + {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, // Full reg +}; + +template <typename ElementType> +auto MaskForElem() { + if constexpr (std::is_same_v<ElementType, uint8_t>) { + return kMaskInt8; + } else if constexpr (std::is_same_v<ElementType, uint16_t>) { + return kMaskInt16; + } else if constexpr (std::is_same_v<ElementType, uint32_t>) { + return kMaskInt32; + } else if constexpr (std::is_same_v<ElementType, uint64_t>) { + return kMaskInt64; + } else { + static_assert(false); + } +} + +template <bool kIsMasked, typename ElementType> +auto MaskForElemIfMasked() { + if constexpr (!kIsMasked) { + return kNoMask; + } else { + return MaskForElem<ElementType>(); + } +} using ExecInsnFunc = void (*)(); void RunTwoVectorArgsOneRes(ExecInsnFunc exec_insn, - const __v2du* src, - __v2du* res, + const SIMD128* src, + SIMD128* res, uint64_t vtype, uint64_t vlmax) { uint64_t vstart, vl; @@ -124,11 +348,300 @@ void RunTwoVectorArgsOneRes(ExecInsnFunc exec_insn, EXPECT_EQ(vl, vlmax); } +// Supports ExecInsnFuncs that fit the following [inputs...] -> output formats: +// vector -> vector +// vector, vector -> vector +// vector, scalar -> vector +// vector, float -> vector +// Vectors will be used in v16 first, then v24 +// scalar and float will be filled from scalar_src, and will use t0 and ft0, +// respectively. +void RunCommonVectorFunc(ExecInsnFunc exec_insn, + const SIMD128* src, + SIMD128* res, + uint64_t scalar_src, + uint64_t vstart, + uint64_t vtype, + uint64_t vlin) { + uint64_t vl = vlin; + // Mask register is, unconditionally, v0, and we need 8 or 24 to handle full 8-registers + // inputs thus we use v8..v15 for destination and place sources into v24..v31. + asm( // Load arguments and undisturbed result. + "vsetvli t0, zero, e64, m8, ta, ma\n\t" + "vle64.v v8, (%[res])\n\t" + "vle64.v v16, (%[src])\n\t" + "addi t0, %[src], 128\n\t" + "vle64.v v24, (t0)\n\t" + // Load mask. + "vsetvli t0, zero, e64, m1, ta, ma\n\t" + "vle64.v v0, (%[mask])\n\t" + // Execute tested instruction. + "vsetvl t0, %[vl], %[vtype]\n\t" + "csrw vstart, %[vstart]\n\t" + "mv t0, %[scalar_src]\n\t" + "fmv.d.x ft0, %[scalar_src]\n\t" + "jalr %[exec_insn]\n\t" + // Save vstart and vl just after insn execution for checks. + "csrr %[vstart], vstart\n\t" + "csrr %[vl], vl\n\t" + // Store the result. + "vsetvli t0, zero, e64, m8, ta, ma\n\t" + "vse64.v v8, (%[res])\n\t" + : [vstart] "=r"(vstart), [vl] "=r"(vl) + : [exec_insn] "r"(exec_insn), + [src] "r"(src), + [res] "r"(res), + [vtype] "r"(vtype), + "0"(vstart), + "1"(vl), + [mask] "r"(&kMask), + [scalar_src] "r"(scalar_src) + : "t0", + "ra", + "ft0", + "v0", + "v8", + "v9", + "v10", + "v11", + "v12", + "v13", + "v14", + "v15", + "v16", + "v17", + "v18", + "v19", + "v20", + "v21", + "v22", + "v23", + "v24", + "v25", + "v26", + "v27", + "v28", + "v29", + "v30", + "v31", + "memory"); + // Every vector instruction must set vstart to 0, but shouldn't touch vl. + EXPECT_EQ(vstart, 0); + EXPECT_EQ(vl, vlin); +} + +enum class TestVectorInstructionKind { kInteger, kFloat }; +enum class TestVectorInstructionMode { kDefault, kWidening, kNarrowing, kVMerge }; + +template <TestVectorInstructionKind kTestVectorInstructionKind, + TestVectorInstructionMode kTestVectorInstructionMode, + typename... ExpectedResultType, + size_t... kResultsCount> +void TestVectorInstructionInternal(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const SIMD128 dst_result, + const SIMD128 (&source)[16], + const ExpectedResultType (&... expected_result)[kResultsCount]) { + auto Verify = [&source, dst_result](ExecInsnFunc exec_insn, + uint8_t vsew, + const auto& expected_result, + const auto& mask) { + uint64_t scalar_src = 0; + if constexpr (kTestVectorInstructionKind == TestVectorInstructionKind::kInteger) { + // Set t0 for vx instructions. + scalar_src = 0xaaaa'aaaa'aaaa'aaaa; + } else { + // We only support Float32/Float64 for float instructions, but there are conversion + // instructions that work with double width floats. + // These instructions never use float registers though and thus we don't need to store + // anything into ft0 register, if they are used. + // For Float32/Float64 case we load 5.625 of the appropriate type into ft0. + ASSERT_LE(vsew, 3); + if (vsew == 2) { + scalar_src = 0xffff'ffff'40b4'0000; // float 5.625 + } else if (vsew == 3) { + scalar_src = 0x4016'8000'0000'0000; // double 5.625 + } + } + for (uint8_t vlmul = 0; vlmul < 8; ++vlmul) { + if constexpr (kTestVectorInstructionMode == TestVectorInstructionMode::kNarrowing || + kTestVectorInstructionMode == TestVectorInstructionMode::kWidening) { + // Incompatible vlmul for narrowing. + if (vlmul == 3) { + continue; + } + } + for (uint8_t vta = 0; vta < 2; ++vta) { + for (uint8_t vma = 0; vma < 2; ++vma) { + uint64_t vtype = (vma << 7) | (vta << 6) | (vsew << 3) | vlmul; + uint64_t vlmax = 0; + asm("vsetvl %0, zero, %1" : "=r"(vlmax) : "r"(vtype)); + // Incompatible vsew and vlmax. Skip it. + if (vlmax == 0) { + continue; + } + uint8_t emul = + (vlmul + (kTestVectorInstructionMode == TestVectorInstructionMode::kWidening)) & + 0b111; + + // To make tests quick enough we don't test vstart and vl change with small register + // sets. Only with vlmul == 2 (4 registers) we set vstart and vl to skip half of first + // register, last register and half of next-to last register. + // Don't use vlmul == 3 because that one may not be supported if instruction widens the + // result. + uint64_t vstart; + uint64_t vl; + if (emul == 2) { + vstart = vlmax / 8; + vl = (vlmax * 5) / 8; + } else { + vstart = 0; + vl = vlmax; + } + + SIMD128 result[8]; + // Set expected_result vector registers into 0b01010101… pattern. + // Set undisturbed result vector registers. + std::fill_n(result, 8, dst_result); + + RunCommonVectorFunc(exec_insn, &source[0], &result[0], scalar_src, vstart, vtype, vl); + + // Values for inactive elements (i.e. corresponding mask bit is 0). + SIMD128 expected_inactive[8]; + if constexpr (kTestVectorInstructionMode == TestVectorInstructionMode::kVMerge) { + // vs2 is the start of the source vector register group. + // Note: copy_n input/output args are backwards compared to fill_n below. + std::copy_n(source, 8, expected_inactive); + } else { + // For most instructions, follow basic inactive processing rules based on vma flag. + std::fill_n(expected_inactive, 8, (vma ? kAgnosticResult : dst_result)); + } + + if (emul < 4) { + for (size_t index = 0; index < 1 << emul; ++index) { + if (index == 0 && emul == 2) { + EXPECT_EQ(result[index], + ((dst_result & kFractionMaskInt8[3]) | + (SIMD128{expected_result[index]} & mask[index] & ~kFractionMaskInt8[3]) | + (expected_inactive[index] & ~mask[index] & ~kFractionMaskInt8[3]))); + } else if (index == 2 && emul == 2) { + EXPECT_EQ(result[index], + ((SIMD128{expected_result[index]} & mask[index] & kFractionMaskInt8[3]) | + (expected_inactive[index] & ~mask[index] & kFractionMaskInt8[3]) | + ((vta ? kAgnosticResult : dst_result) & ~kFractionMaskInt8[3]))); + } else if (index == 3 && emul == 2 && vta) { + EXPECT_EQ(result[index], kAgnosticResult); + } else if (index == 3 && emul == 2) { + EXPECT_EQ(result[index], dst_result); + } else { + EXPECT_EQ(result[index], + ((SIMD128{expected_result[index]} & mask[index]) | + ((expected_inactive[index] & ~mask[index])))); + } + } + } else { + EXPECT_EQ(result[0], + ((SIMD128{expected_result[0]} & mask[0] & kFractionMaskInt8[emul - 4]) | + (expected_inactive[0] & ~mask[0] & kFractionMaskInt8[emul - 4]) | + ((vta ? kAgnosticResult : dst_result) & ~kFractionMaskInt8[emul - 4]))); + } + } + } + } + }; + + ((Verify(exec_insn, + BitUtilLog2(sizeof(std::tuple_element_t<0, ExpectedResultType>)) - + (kTestVectorInstructionMode == TestVectorInstructionMode::kWidening), + expected_result, + kNoMask), + Verify(exec_masked_insn, + BitUtilLog2(sizeof(std::tuple_element_t<0, ExpectedResultType>)) - + (kTestVectorInstructionMode == TestVectorInstructionMode::kWidening), + expected_result, + MaskForElem<std::tuple_element_t<0, ExpectedResultType>>())), + ...); +} + +template <TestVectorInstructionKind kTestVectorInstructionKind, + TestVectorInstructionMode kTestVectorInstructionMode, + typename... ExpectedResultType, + size_t... kResultsCount> +void TestVectorInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const SIMD128 (&source)[16], + const ExpectedResultType (&... expected_result)[kResultsCount]) { + TestVectorInstructionInternal<kTestVectorInstructionKind, kTestVectorInstructionMode>( + exec_insn, exec_masked_insn, kUndisturbedResult, source, expected_result...); +} + +void TestVectorInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint8_16_t (&expected_result_int8)[8], + const uint16_8_t (&expected_result_int16)[8], + const uint32_4_t (&expected_result_int32)[8], + const uint64_2_t (&expected_result_int64)[8], + const SIMD128 (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kInteger, TestVectorInstructionMode::kDefault>( + exec_insn, + exec_masked_insn, + source, + expected_result_int8, + expected_result_int16, + expected_result_int32, + expected_result_int64); +} + +void TestVectorFloatInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint32_4_t (&expected_result_int32)[8], + const uint64_2_t (&expected_result_int64)[8], + const SIMD128 (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kDefault>( + exec_insn, exec_masked_insn, source, expected_result_int32, expected_result_int64); +} + +void TestNarrowingVectorFloatInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint32_4_t (&expected_result_int32)[4], + const SIMD128 (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( + exec_insn, exec_masked_insn, source, expected_result_int32); +} + +void TestNarrowingVectorFloatInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint16_8_t (&expected_result_int16)[4], + const uint32_4_t (&expected_result_int32)[4], + const SIMD128 (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>( + exec_insn, exec_masked_insn, source, expected_result_int16, expected_result_int32); +} + +void TestWideningVectorFloatInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint64_2_t (&expected_result_int64)[8], + const SIMD128 (&source)[16], + SIMD128 dst_result = kUndisturbedResult) { + TestVectorInstructionInternal<TestVectorInstructionKind::kFloat, + TestVectorInstructionMode::kWidening>( + exec_insn, exec_masked_insn, dst_result, source, expected_result_int64); +} + +void TestWideningVectorFloatInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint32_4_t (&expected_result_int32)[8], + const uint64_2_t (&expected_result_int64)[8], + const SIMD128 (&source)[16]) { + TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kWidening>( + exec_insn, exec_masked_insn, source, expected_result_int32, expected_result_int64); +} + template <typename... ExpectedResultType> void TestVectorReductionInstruction( ExecInsnFunc exec_insn, ExecInsnFunc exec_masked_insn, - const __v2du (&source)[16], + const SIMD128 (&source)[16], std::tuple<const ExpectedResultType (&)[8], const ExpectedResultType (&)[8]>... expected_result) { // Each expected_result input to this function is the vd[0] value of the reduction, for each @@ -148,18 +661,18 @@ void TestVectorReductionInstruction( continue; } - __v2du result[8]; + SIMD128 result[8]; // Set undisturbed result vector registers. for (size_t index = 0; index < 8; ++index) { - memcpy(&result[index], &kUndisturbedResult, sizeof(result[index])); + result[index] = kUndisturbedResult; } // Exectations for reductions are for swapped source arguments. - __v2du sources[16]{}; - memcpy(&sources[0], &kVectorCalculationsSource[8], sizeof(sources[0]) * 8); - memcpy(&sources[8], &kVectorCalculationsSource[0], sizeof(sources[0]) * 8); + SIMD128 two_sources[16]{}; + memcpy(&two_sources[0], &source[8], sizeof(two_sources[0]) * 8); + memcpy(&two_sources[8], &source[0], sizeof(two_sources[0]) * 8); - RunTwoVectorArgsOneRes(exec_insn, &sources[0], &result[0], vtype, vlmax); + RunTwoVectorArgsOneRes(exec_insn, &two_sources[0], &result[0], vtype, vlmax); // Reduction instructions are unique in that they produce a scalar // output to a single vector register as opposed to a register group. @@ -174,20 +687,15 @@ void TestVectorReductionInstruction( // Verify that the destination register holds the reduction in the // first element and the tail policy applies to the remaining. - __uint128_t expected_result_register; - if (vta) { - memcpy(&expected_result_register, &kAgnosticResult, sizeof(expected_result_register)); - } else { - memcpy(&expected_result_register, &kUndisturbedResult, sizeof(expected_result_register)); - } + SIMD128 expected_result_register = vta ? kAgnosticResult : kUndisturbedResult; size_t vsew_bits = 8 << vsew; expected_result_register = (expected_result_register >> vsew_bits) << vsew_bits; expected_result_register |= expected_result; - EXPECT_TRUE(memcmp(&result[0], &expected_result_register, sizeof(result[0])) == 0); + EXPECT_EQ(result[0], expected_result_register) << " vtype=" << vtype; // Verify all non-destination registers are undisturbed. for (size_t index = 1; index < 8; ++index) { - EXPECT_TRUE(memcmp(&result[index], &kUndisturbedResult, sizeof(result[index])) == 0); + EXPECT_EQ(result[index], kUndisturbedResult) << " vtype=" << vtype; } } } @@ -208,6 +716,23 @@ void TestVectorReductionInstruction( void TestVectorReductionInstruction(ExecInsnFunc exec_insn, ExecInsnFunc exec_masked_insn, + const uint32_t (&expected_result_vd0_int32)[8], + const uint64_t (&expected_result_vd0_int64)[8], + const uint32_t (&expected_result_vd0_with_mask_int32)[8], + const uint64_t (&expected_result_vd0_with_mask_int64)[8], + const SIMD128 (&source)[16]) { + TestVectorReductionInstruction( + exec_insn, + exec_masked_insn, + source, + std::tuple<const uint32_t(&)[8], const uint32_t(&)[8]>{expected_result_vd0_int32, + expected_result_vd0_with_mask_int32}, + std::tuple<const uint64_t(&)[8], const uint64_t(&)[8]>{expected_result_vd0_int64, + expected_result_vd0_with_mask_int64}); +} + +void TestVectorReductionInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, const uint8_t (&expected_result_vd0_int8)[8], const uint16_t (&expected_result_vd0_int16)[8], const uint32_t (&expected_result_vd0_int32)[8], @@ -216,7 +741,7 @@ void TestVectorReductionInstruction(ExecInsnFunc exec_insn, const uint16_t (&expected_result_vd0_with_mask_int16)[8], const uint32_t (&expected_result_vd0_with_mask_int32)[8], const uint64_t (&expected_result_vd0_with_mask_int64)[8], - const __v2du (&source)[16]) { + const SIMD128 (&source)[16]) { TestVectorReductionInstruction( exec_insn, exec_masked_insn, @@ -231,16 +756,119 @@ void TestVectorReductionInstruction(ExecInsnFunc exec_insn, expected_result_vd0_with_mask_int64}); } -[[gnu::naked]] void ExecVredsum() { - asm("vredsum.vs v8,v16,v24\n\t" - "ret\n\t"); +template <bool kIsMasked, typename... ExpectedResultType, size_t... kResultsCount> +void TestVectorIota(ExecInsnFunc exec_insn, + const SIMD128 (&source)[16], + const ExpectedResultType (&... expected_result)[kResultsCount]) { + auto Verify = [&source](ExecInsnFunc exec_insn, + uint8_t vsew, + const auto& expected_result, + auto elem_mask) { + for (uint8_t vlmul = 0; vlmul < 8; ++vlmul) { + for (uint8_t vta = 0; vta < 2; ++vta) { + for (uint8_t vma = 0; vma < 2; ++vma) { + uint64_t vtype = (vma << 7) | (vta << 6) | (vsew << 3) | vlmul; + uint64_t vlmax = 0; + asm("vsetvl %0, zero, %1" : "=r"(vlmax) : "r"(vtype)); + if (vlmax == 0) { + continue; + } + + for (uint8_t vl = 0; vl < vlmax; vl += vlmax) { + // To make tests quick enough we don't test vl change with small register sets. Only + // with vlmul == 2 (4 registers) we set vl to skip last register and half of next-to + // last register. + uint64_t vlin; + if (vlmul == 2 && vl == vlmax) { + vlin = 5 * vlmax / 8; + } else { + vlin = vl; + } + + SIMD128 result[8]; + // Set expected_result vector registers into 0b01010101… pattern. + // Set undisturbed result vector registers. + std::fill_n(result, 8, kUndisturbedResult); + + RunCommonVectorFunc(exec_insn, &source[0], &result[0], 0, 0, vtype, vlin); + + SIMD128 expected_inactive[8]; + std::fill_n(expected_inactive, 8, (vma ? kAgnosticResult : kUndisturbedResult)); + + // vl of 0 should never change dst registers + if (vl == 0) { + for (size_t index = 0; index < 8; ++index) { + EXPECT_EQ(result[index], kUndisturbedResult); + } + } else if (vlmul < 4) { + for (size_t index = 0; index < 1 << vlmul; ++index) { + for (size_t index = 0; index < 1 << vlmul; ++index) { + if (index == 2 && vlmul == 2) { + EXPECT_EQ( + result[index], + ((SIMD128{expected_result[index]} & elem_mask[index] & + kFractionMaskInt8[3]) | + (expected_inactive[index] & ~elem_mask[index] & kFractionMaskInt8[3]) | + ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[3]))); + } else if (index == 3 && vlmul == 2) { + EXPECT_EQ(result[index], vta ? kAgnosticResult : kUndisturbedResult); + } else { + EXPECT_EQ(result[index], + ((SIMD128{expected_result[index]} & elem_mask[index]) | + (expected_inactive[index] & ~elem_mask[index]))); + } + } + } + } else { + // vlmul >= 4 only uses 1 register + EXPECT_EQ( + result[0], + ((SIMD128{expected_result[0]} & elem_mask[0] & kFractionMaskInt8[vlmul - 4]) | + (expected_inactive[0] & ~elem_mask[0] & kFractionMaskInt8[vlmul - 4]) | + ((vta ? kAgnosticResult : kUndisturbedResult) & ~kFractionMaskInt8[vlmul - 4]))); + } + } + } + } + } + }; + + (Verify(exec_insn, + BitUtilLog2(sizeof(std::tuple_element_t<0, ExpectedResultType>)), + expected_result, + MaskForElemIfMasked<kIsMasked, std::tuple_element_t<0, ExpectedResultType>>()), + ...); } -[[gnu::naked]] void ExecMaskedVredsum() { - asm("vredsum.vs v8,v16,v24,v0.t\n\t" - "ret\n\t"); +template <bool kIsMasked> +void TestVectorIota(ExecInsnFunc exec_insn, + const uint8_16_t (&expected_result_int8)[8], + const uint16_8_t (&expected_result_int16)[8], + const uint32_4_t (&expected_result_int32)[8], + const uint64_2_t (&expected_result_int64)[8], + const SIMD128 (&source)[16]) { + TestVectorIota<kIsMasked>(exec_insn, + source, + expected_result_int8, + expected_result_int16, + expected_result_int32, + expected_result_int64); } +// clang-format off +#define DEFINE_TWO_ARG_ONE_RES_FUNCTION(Name, Asm) \ + [[gnu::naked]] void Exec##Name() { \ + asm(#Asm " v8,v16,v24\n\t" \ + "ret\n\t"); \ + } \ + [[gnu::naked]] void ExecMasked##Name() { \ + asm(#Asm " v8,v16,v24,v0.t\n\t" \ + "ret\n\t"); \ + } +// clang-format on + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredsum, vredsum.vs) + TEST(InlineAsmTestRiscv64, TestVredsum) { TestVectorReductionInstruction( ExecVredsum, @@ -292,4 +920,2685 @@ TEST(InlineAsmTestRiscv64, TestVredsum) { kVectorCalculationsSource); } +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vfredosum, vfredosum.vs) + +TEST(InlineAsmTestRiscv64, TestVfredosum) { + TestVectorReductionInstruction(ExecVfredosum, + ExecMaskedVfredosum, + // expected_result_vd0_int32 + {0x9e0c'9a8e, + 0xbe2c'bace, + 0xfe6c'fb4e, + 0x7e6b'fc4d, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9e0c'9a8e}, + // expected_result_vd0_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xfe6c'fa69'f664'f260, + 0x7eec'5def'0cee'0dee, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + // expected_result_vd0_with_mask_int32 + {0x9604'929d, + 0xbe2c'ba29, + 0xfe6c'fb4e, + 0x7e6b'fa84, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9604'9200}, + // expected_result_vd0_with_mask_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xee7c'ea78'e674'e271, + 0x6efc'4e0d'ee0d'ee0f, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vfredusum, vfredusum.vs) + +// Currently Vfredusum is implemented as Vfredosum (as explicitly permitted by RVV 1.0). +// If we would implement some speedups which would change results then we may need to alter tests. +TEST(InlineAsmTestRiscv64, TestVfredusum) { + TestVectorReductionInstruction(ExecVfredusum, + ExecMaskedVfredusum, + // expected_result_vd0_int32 + {0x9e0c'9a8e, + 0xbe2c'bace, + 0xfe6c'fb4e, + 0x7e6b'fc4d, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9e0c'9a8e}, + // expected_result_vd0_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xfe6c'fa69'f664'f260, + 0x7eec'5def'0cee'0dee, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + // expected_result_vd0_with_mask_int32 + {0x9604'929d, + 0xbe2c'ba29, + 0xfe6c'fb4e, + 0x7e6b'fa84, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9604'9200}, + // expected_result_vd0_with_mask_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xee7c'ea78'e674'e271, + 0x6efc'4e0d'ee0d'ee0f, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredand, vredand.vs) + +TEST(InlineAsmTestRiscv64, TestVredand) { + TestVectorReductionInstruction( + ExecVredand, + ExecMaskedVredand, + // expected_result_vd0_int8 + {0, 0, 0, 0, /* unused */ 0, 0, 0, 0}, + // expected_result_vd0_int16 + {0x8000, 0x8000, 0x8000, 0x0000, /* unused */ 0, 0x8000, 0x8000, 0x8000}, + // expected_result_vd0_int32 + {0x8200'8000, + 0x8200'8000, + 0x8200'8000, + 0x0200'0000, + /* unused */ 0, + /* unused */ 0, + 0x8200'8000, + 0x8200'8000}, + // expected_result_vd0_int64 + {0x8604'8000'8200'8000, + 0x8604'8000'8200'8000, + 0x8604'8000'8200'8000, + 0x0604'0000'0200'0000, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8604'8000'8200'8000}, + // expected_result_vd0_with_mask_int8 + {0, 0, 0, 0, /* unused */ 0, 0, 0, 0}, + // expected_result_vd0_with_mask_int16 + {0x8000, 0x8000, 0x8000, 0x0000, /* unused */ 0, 0x8000, 0x8000, 0x8000}, + // expected_result_vd0_with_mask_int32 + {0x8200'8000, + 0x8200'8000, + 0x8200'8000, + 0x0200'0000, + /* unused */ 0, + /* unused */ 0, + 0x8200'8000, + 0x8200'8000}, + // expected_result_vd0_with_mask_int64 + {0x8604'8000'8200'8000, + 0x8604'8000'8200'8000, + 0x8604'8000'8200'8000, + 0x0604'0000'0200'0000, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8604'8000'8200'8000}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredor, vredor.vs) + +TEST(InlineAsmTestRiscv64, TestVredor) { + TestVectorReductionInstruction( + ExecVredor, + ExecMaskedVredor, + // expected_result_vd0_int8 + {159, 191, 255, 255, /* unused */ 0, 146, 150, 159}, + // expected_result_vd0_int16 + {0x9f1d, 0xbf3d, 0xff7d, 0xfffd, /* unused */ 0, 0x9300, 0x9704, 0x9f0d}, + // expected_result_vd0_int32 + {0x9f1e'9b19, + 0xbf3e'bb39, + 0xff7e'fb79, + 0xfffe'fbf9, + /* unused */ 0, + /* unused */ 0, + 0x9706'9300, + 0x9f0e'9b09}, + // expected_result_vd0_int64 + {0x9f1e'9f1d'9716'9311, + 0xbf3e'bf3d'b736'b331, + 0xff7e'ff7d'f776'f371, + 0xfffe'fffd'f7f6'f3f1, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9f0e'9f0d'9706'9300}, + // expected_result_vd0_with_mask_int8 + {159, 191, 255, 255, /* unused */ 0, 0, 150, 158}, + // expected_result_vd0_with_mask_int16 + {0x9f1d, 0xbf3d, 0xff7d, 0xfffd, /* unused */ 0, 0x9300, 0x9300, 0x9f0d}, + // expected_result_vd0_with_mask_int32 + {0x9f1e'9b19, + 0xbf3e'bb39, + 0xff7e'fb79, + 0xfffe'fbf9, + /* unused */ 0, + /* unused */ 0, + 0x9706'9300, + 0x9706'9300}, + // expected_result_vd0_with_mask_int64 + {0x9f0e'9f0d'9706'9300, + 0xbf3e'bf3d'b736'b331, + 0xff7e'ff7d'f776'f371, + 0xfffe'fffd'f7f6'f3f1, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9f0e'9f0d'9706'9300}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredxor, vredxor.vs) + +TEST(InlineAsmTestRiscv64, TestVredxor) { + TestVectorReductionInstruction( + ExecVredxor, + ExecMaskedVredxor, + // expected_result_vd0_int8 + {0, 0, 0, 0, /* unused */ 0, 146, 0, 1}, + // expected_result_vd0_int16 + {0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x1300, 0x8504, 0x8101}, + // expected_result_vd0_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x8302'8100, + /* unused */ 0, + /* unused */ 0, + 0x1506'1300, + 0x8b0a'8909}, + // expected_result_vd0_int64 + {0x9716'9515'9312'9111, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x190a'1f0d'1506'1300}, + // expected_result_vd0_with_mask_int8 + {143, 154, 150, 43, /* unused */ 0, 0, 146, 150}, + // expected_result_vd0_with_mask_int16 + {0x1f0d, 0xbd3d, 0x9514, 0x8d0d, /* unused */ 0, 0x1300, 0x1300, 0x1705}, + // expected_result_vd0_with_mask_int32 + {0x1d0e'1b09, + 0x0d1e'0b18, + 0xfb7a'f978, + 0xab2a'a929, + /* unused */ 0, + /* unused */ 0, + 0x1506'1300, + 0x1506'1300}, + // expected_result_vd0_with_mask_int64 + {0x190a'1f0d'1506'1300, + 0x091a'0f1c'0516'0311, + 0x293a'2f3c'2536'2331, + 0x77f6'75f5'73f2'71f1, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x190a'1f0d'1506'1300}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredminu, vredminu.vs) + +TEST(InlineAsmTestRiscv64, TestVredminu) { + TestVectorReductionInstruction( + ExecVredminu, + ExecMaskedVredminu, + // expected_result_vd0_int8 + {0, 0, 0, 0, /* unused */ 0, 0, 0, 0}, + // expected_result_vd0_int16 + {0x8100, 0x8100, 0x8100, 0x0291, /* unused */ 0, 0x8100, 0x8100, 0x8100}, + // expected_result_vd0_int32 + {0x83028100, + 0x83028100, + 0x83028100, + 0x06940291, + /* unused */ 0, + /* unused */ 0, + 0x83028100, + 0x83028100}, + // expected_result_vd0_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x0e9c'0a98'0694'0291, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + // expected_result_vd0_with_mask_int8 + {0, 0, 0, 0, /* unused */ 0, 0, 0, 0}, + // expected_result_vd0_with_mask_int16 + {0x8100, 0x8100, 0x8100, 0x0291, /* unused */ 0, 0x8100, 0x8100, 0x8100}, + // expected_result_vd0_with_mask_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x0e9c'0a98, + /* unused */ 0, + /* unused */ 0, + 0x8302'8100, + 0x8302'8100}, + // expected_result_vd0_with_mask_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x1e8c'1a89'1684'1280, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredmin, vredmin.vs) + +TEST(InlineAsmTestRiscv64, TestVredmin) { + TestVectorReductionInstruction( + ExecVredmin, + ExecMaskedVredmin, + // expected_result_vd0_int8 + {130, 130, 130, 128, /* unused */ 0, 146, 146, 146}, + // expected_result_vd0_int16 + {0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x8100, 0x8100, 0x8100}, + // expected_result_vd0_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x8302'8100, + /* unused */ 0, + /* unused */ 0, + 0x8302'8100, + 0x8302'8100}, + // expected_result_vd0_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + // expected_result_vd0_with_mask_int8 + {138, 138, 138, 128, /* unused */ 0, 0, 150, 150}, + // expected_result_vd0_with_mask_int16 + {0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x8100, 0x8100, 0x8100}, + // expected_result_vd0_with_mask_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x8302'8100, + /* unused */ 0, + /* unused */ 0, + 0x8302'8100, + 0x8302'8100}, + // expected_result_vd0_with_mask_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vfredmin, vfredmin.vs) + +TEST(InlineAsmTestRiscv64, TestVfredmin) { + TestVectorReductionInstruction(ExecVfredmin, + ExecMaskedVfredmin, + // expected_result_vd0_int32 + {0x9e0c'9a09, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0xfe6c'fa69, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9e0c'9a09}, + // expected_result_vd0_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xfe6c'fa69'f664'f260, + 0xfe6c'fa69'f664'f260, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + // expected_result_vd0_with_mask_int32 + {0x9604'9200, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0xfe6c'fa69, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9604'9200}, + // expected_result_vd0_with_mask_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xee7c'ea78'e674'e271, + 0xee7c'ea78'e674'e271, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredmaxu, vredmaxu.vs) + +TEST(InlineAsmTestRiscv64, TestVredmaxu) { + TestVectorReductionInstruction( + ExecVredmaxu, + ExecMaskedVredmaxu, + // expected_result_vd0_int8 + {158, 190, 254, 254, /* unused */ 0, 146, 150, 158}, + // expected_result_vd0_int16 + {0x9e0c, 0xbe2c, 0xfe6c, 0xfe6c, /* unused */ 0, 0x9200, 0x9604, 0x9e0c}, + // expected_result_vd0_int32 + {0x9e0c'9a09, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0xfe6c'fa69, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9e0c'9a09}, + // expected_result_vd0_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xfe6c'fa69'f664'f260, + 0xfe6c'fa69'f664'f260, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + // expected_result_vd0_with_mask_int8 + {158, 186, 254, 254, /* unused */ 0, 0, 150, 158}, + // expected_result_vd0_with_mask_int16 + {0x9e0c, 0xba29, 0xfe6c, 0xfe6c, /* unused */ 0, 0x9200, 0x9200, 0x9e0c}, + // expected_result_vd0_with_mask_int32 + {0x9604'9200, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0xfe6c'fa69, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9604'9200}, + // expected_result_vd0_with_mask_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xee7c'ea78'e674'e271, + 0xee7c'ea78'e674'e271, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vredmax, vredmax.vs) + +TEST(InlineAsmTestRiscv64, TestVredmax) { + TestVectorReductionInstruction( + ExecVredmax, + ExecMaskedVredmax, + // expected_result_vd0_int8 + {28, 60, 124, 126, /* unused */ 0, 0, 4, 12}, + // expected_result_vd0_int16 + {0x9e0c, 0xbe2c, 0xfe6c, 0x7eec, /* unused */ 0, 0x9200, 0x9604, 0x9e0c}, + // expected_result_vd0_int32 + {0x9e0c'9a09, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0x7eec'7ae9, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9e0c'9a09}, + // expected_result_vd0_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xfe6c'fa69'f664'f260, + 0x7eec'7ae9'76e4'72e0, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + // expected_result_vd0_with_mask_int8 + {24, 52, 124, 126, /* unused */ 0, 0, 4, 4}, + // expected_result_vd0_with_mask_int16 + {0x9e0c, 0xba29, 0xfe6c, 0x7ae9, /* unused */ 0, 0x9200, 0x9200, 0x9e0c}, + // expected_result_vd0_with_mask_int32 + {0x9604'9200, + 0xbe2c'ba29, + 0xfe6c'fa69, + 0x7eec'7ae9, + /* unused */ 0, + /* unused */ 0, + 0x9604'9200, + 0x9604'9200}, + // expected_result_vd0_with_mask_int64 + {0x9e0c'9a09'9604'9200, + 0xbe2c'ba29'b624'b220, + 0xee7c'ea78'e674'e271, + 0x6efc'6af8'66f4'62f1, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x9e0c'9a09'9604'9200}, + kVectorCalculationsSource); +} + +DEFINE_TWO_ARG_ONE_RES_FUNCTION(Vfredmax, vfredmax.vs) + +TEST(InlineAsmTestRiscv64, TestVfredmax) { + TestVectorReductionInstruction(ExecVfredmax, + ExecMaskedVfredmax, + // expected_result_vd0_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x7eec'7ae9, + /* unused */ 0, + /* unused */ 0, + 0x8302'8100, + 0x8302'8100}, + // expected_result_vd0_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x7eec'7ae9'76e4'72e0, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + // expected_result_vd0_with_mask_int32 + {0x8302'8100, + 0x8302'8100, + 0x8302'8100, + 0x7eec'7ae9, + /* unused */ 0, + /* unused */ 0, + 0x8302'8100, + 0x8302'8100}, + // expected_result_vd0_with_mask_int64 + {0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x8706'8504'8302'8100, + 0x6efc'6af8'66f4'62f1, + /* unused */ 0, + /* unused */ 0, + /* unused */ 0, + 0x8706'8504'8302'8100}, + kVectorCalculationsSource); +} + +#undef DEFINE_TWO_ARG_ONE_RES_FUNCTION + +[[gnu::naked]] void ExecVfsqrtv() { + asm("vfsqrt.v v8,v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfsqrtv() { + asm("vfsqrt.v v8,v24,v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVfsqrtv) { + TestVectorFloatInstruction(ExecVfsqrtv, + ExecMaskedVfsqrtv, + {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, + {0x2b02'052b, 0x2f05'ea47, 0x2309'a451, 0x270d'53b1}, + {0x3b10'f937, 0x3f14'7a09, 0x3317'd8b1, 0x371b'31d0}, + {0x4b1e'85c1, 0x4f21'bb83, 0x4324'd4da, 0x4727'ebbf}, + {0x5b2b'0054, 0x5f2d'fb2f, 0x5330'dd9e, 0x5733'bf97}}, + {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x2f3d'fd15'c59f'19b3, 0x2745'2e80'5593'4661}, + {0x3f4e'0e34'c013'd37a, 0x3755'3a9e'ffea'ec9f}, + {0x4f5e'1f49'ff52'69b6, 0x4765'46b6'c2dc'cddd}, + {0x5f6e'3055'93df'fb07, 0x5775'52c7'aa27'df73}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVfcvtxufv() { + asm("vfcvt.xu.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtxufv() { + asm("vfcvt.xu.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfcvtxfv() { + asm("vfcvt.x.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtxfv() { + asm("vfcvt.x.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfcvtfxuv() { + asm("vfcvt.f.xu.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtfxuv() { + asm("vfcvt.f.xu.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfcvtfxv() { + asm("vfcvt.f.x.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtfxv() { + asm("vfcvt.f.x.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfcvtrtzxuf() { + asm("vfcvt.rtz.xu.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtrtzxuf() { + asm("vfcvt.rtz.xu.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfcvtrtzxf() { + asm("vfcvt.rtz.x.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfcvtrtzxf() { + asm("vfcvt.rtz.x.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtxufv() { + asm("vfwcvt.xu.f.v v8, v28\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtxufv() { + asm("vfwcvt.xu.f.v v8, v28, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtxfv() { + asm("vfwcvt.x.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtxfv() { + asm("vfwcvt.x.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtffv() { + asm("vfwcvt.f.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtffv() { + asm("vfwcvt.f.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtfxuv() { + asm("vfwcvt.f.xu.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtfxuv() { + asm("vfwcvt.f.xu.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtfxv() { + asm("vfwcvt.f.x.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtfxv() { + asm("vfwcvt.f.x.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtrtzxuf() { + asm("vfwcvt.rtz.xu.f.v v8, v28\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtrtzxuf() { + asm("vfwcvt.rtz.xu.f.v v8, v28, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwcvtrtzxf() { + asm("vfwcvt.rtz.x.f.v v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwcvtrtzxf() { + asm("vfwcvt.rtz.x.f.v v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtxufw() { + asm("vfncvt.xu.f.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtxufw() { + asm("vfncvt.xu.f.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtxfw() { + asm("vfncvt.x.f.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtxfw() { + asm("vfncvt.x.f.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtffw() { + asm("vfncvt.f.f.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtffw() { + asm("vfncvt.f.f.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtfxuw() { + asm("vfncvt.f.xu.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtfxuw() { + asm("vfncvt.f.xu.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtfxw() { + asm("vfncvt.f.x.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtfxw() { + asm("vfncvt.f.x.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtrtzxuf() { + asm("vfncvt.rtz.xu.f.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtrtzxuf() { + asm("vfncvt.rtz.xu.f.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfncvtrtzxfw() { + asm("vfncvt.rtz.x.f.w v8, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfncvtrtzxfw() { + asm("vfncvt.rtz.x.f.w v8, v24, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVfcvtxfv) { + TestVectorFloatInstruction(ExecVfcvtxufv, + ExecMaskedVfcvtxufv, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfcvtxfv, + ExecMaskedVfcvtxfv, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0xffff'cacf, 0xc8cd'6a00}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}, + {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfcvtfxuv, + ExecMaskedVfcvtfxuv, + {{0x4f16'0492, 0x4f1e'0c9a, 0x4f06'1482, 0x4f0e'1c8a}, + {0x4f36'24b2, 0x4f3e'2cba, 0x4f26'34a2, 0x4f2e'3caa}, + {0x4f56'44d2, 0x4f5e'4cda, 0x4f46'54c2, 0x4f4e'5cca}, + {0x4f76'64f2, 0x4f7e'6cfa, 0x4f66'74e2, 0x4f6e'7cea}, + {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa}, + {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab}, + {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896}, + {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}}, + {{0x43e3'c193'4132'c092, 0x43e1'c391'4310'c290}, + {0x43e7'c597'4536'c496, 0x43e5'c795'4714'c694}, + {0x43eb'c99b'493a'c89a, 0x43e9'cb99'4b18'ca98}, + {0x43ef'cd9f'4d3e'cc9e, 0x43ed'cf9d'4f1c'ce9c}, + {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805}, + {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11}, + {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511}, + {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfcvtfxv, + ExecMaskedVfcvtfxv, + {{0xced3'f6dc, 0xcec3'e6cc, 0xcef3'd6fc, 0xcee3'c6ec}, + {0xce93'b69c, 0xce83'a68c, 0xceb3'96bc, 0xcea3'86ac}, + {0xce26'ecb7, 0xce06'cc97, 0xce66'acf7, 0xce46'8cd7}, + {0xcd19'b0da, 0xcbc9'82cc, 0xcdcc'58ec, 0xcd8c'18ac}, + {0x4db4'2094, 0x4df4'60d4, 0x4cd2'8052, 0x4d69'c0aa}, + {0x4e5a'90ca, 0x4e7a'b0eb, 0x4e1a'd08b, 0x4e3a'f0ab}, + {0x4ead'88a6, 0x4ebd'98b6, 0x4e8d'a886, 0x4e9d'b896}, + {0x4eed'c8e6, 0x4efd'd8f6, 0x4ecd'e8c6, 0x4edd'f8d6}}, + {{0xc3d8'7cd9'7d9a'7edc, 0xc3dc'78dd'79de'7adf}, + {0xc3d0'74d1'7592'76d3, 0xc3d4'70d5'71d6'72d7}, + {0xc3c0'd992'db14'dd97, 0xc3c8'd19a'd39c'd59f}, + {0xc379'3059'6099'b0da, 0xc3b1'8315'8719'8b1e}, + {0x43be'8c1a'8916'8412, 0x43ad'3815'300d'2805}, + {0x43cf'561d'549b'5219, 0x43c7'5e15'5c13'5a11}, + {0x43d7'b316'b255'b115, 0x43d3'b712'b611'b511}, + {0x43df'bb1e'ba5d'b91d, 0x43db'bf1a'be19'bd19}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfcvtrtzxuf, + ExecMaskedVfcvtrtzxuf, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0x0000'6a21, 0x6e25'6c00}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfcvtrtzxf, + ExecMaskedVfcvtrtzxf, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0xffff'cad0, 0xc8cd'6a00}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x0000'6a21, 0x6e25'6c00}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}, + {0x7fff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtxufv, + ExecMaskedVfwcvtxufv, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'6229'6000'0000, 0x662d'6480'0000'0000}, + {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtxfv, + ExecMaskedVfwcvtxfv, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000}, + {0xffff'ffff'ffff'cacf, 0xffff'ffff'c8cd'6a00}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtffv, + ExecMaskedVfwcvtffv, + {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000}, + {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000}, + {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, + {0xbcc6'9446'2000'0000, 0xbdc7'9547'0000'0000}, + {0xc2c8'9a48'0000'0000, 0xc3c9'9b49'2000'0000}, + {0xc0ca'984a'2000'0000, 0xc1cb'994b'0000'0000}, + {0xc6cc'9e4c'0000'0000, 0xc7cd'9f4d'2000'0000}, + {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtfxuv, + ExecMaskedVfwcvtfxuv, + {{0x4712'0000, 0x4716'0400, 0x471a'0900, 0x471e'0c00}, + {0x4702'1100, 0x4706'1400, 0x470a'1800, 0x470e'1c00}, + {0x4732'2000, 0x4736'2400, 0x473a'2900, 0x473e'2c00}, + {0x4722'3100, 0x4726'3400, 0x472a'3800, 0x472e'3c00}, + {0x4752'4000, 0x4756'4400, 0x475a'4900, 0x475e'4c00}, + {0x4742'5100, 0x4746'5400, 0x474a'5800, 0x474e'5c00}, + {0x4772'6000, 0x4776'6400, 0x477a'6900, 0x477e'6c00}, + {0x4762'7100, 0x4766'7400, 0x476a'7800, 0x476e'7c00}}, + {{0x41e2'c092'4000'0000, 0x41e3'c193'4120'0000}, + {0x41e0'c290'4220'0000, 0x41e1'c391'4300'0000}, + {0x41e6'c496'4400'0000, 0x41e7'c597'4520'0000}, + {0x41e4'c694'4620'0000, 0x41e5'c795'4700'0000}, + {0x41ea'c89a'4800'0000, 0x41eb'c99b'4920'0000}, + {0x41e8'ca98'4a20'0000, 0x41e9'cb99'4b00'0000}, + {0x41ee'cc9e'4c00'0000, 0x41ef'cd9f'4d20'0000}, + {0x41ec'ce9c'4e20'0000, 0x41ed'cf9d'4f00'0000}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtfxv, + ExecMaskedVfwcvtfxv, + {{0xc6dc'0000, 0xc6d3'f800, 0xc6cb'ee00, 0xc6c3'e800}, + {0xc6fb'de00, 0xc6f3'd800, 0xc6eb'd000, 0xc6e3'c800}, + {0xc69b'c000, 0xc693'b800, 0xc68b'ae00, 0xc683'a800}, + {0xc6bb'9e00, 0xc6b3'9800, 0xc6ab'9000, 0xc6a3'8800}, + {0xc637'0000, 0xc626'f000, 0xc616'dc00, 0xc606'd000}, + {0xc676'bc00, 0xc666'b000, 0xc656'a000, 0xc646'9000}, + {0xc55a'0000, 0xc519'c000, 0xc4b2'e000, 0xc3ca'0000}, + {0xc5ec'7800, 0xc5cc'6000, 0xc5ac'4000, 0xc58c'2000}}, + {{0xc1da'7edb'8000'0000, 0xc1d8'7cd9'7dc0'0000}, + {0xc1de'7adf'7bc0'0000, 0xc1dc'78dd'7a00'0000}, + {0xc1d2'76d3'7800'0000, 0xc1d0'74d1'75c0'0000}, + {0xc1d6'72d7'73c0'0000, 0xc1d4'70d5'7200'0000}, + {0xc1c4'dd96'e000'0000, 0xc1c0'd992'db80'0000}, + {0xc1cc'd59e'd780'0000, 0xc1c8'd19a'd400'0000}, + {0xc1a3'361b'4000'0000, 0xc179'3059'7000'0000}, + {0xc1b9'8b1d'8f00'0000, 0xc1b1'8315'8800'0000}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtrtzxuf, + ExecMaskedVfwcvtrtzxuf, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'6229'6000'0000, 0x662d'6480'0000'0000}, + {0x0000'0000'0000'6a21, 0x0000'0000'6e25'6c00}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwcvtrtzxf, + ExecMaskedVfwcvtrtzxf, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xffff'cecb'7000'0000, 0xccc9'6dc0'0000'0000}, + {0xffff'ffff'ffff'cad0, 0xffff'ffff'c8cd'6a00}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + ExecVfncvtxufw, + ExecMaskedVfncvtxufw, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + ExecVfncvtxfw, + ExecMaskedVfncvtxfw, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x8000, 0x8000, 0xcacf, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(ExecVfncvtffw, + ExecMaskedVfncvtffw, + {{0x8000'0000, 0x8000'0000, 0xb165'd14e, 0x8000'0000}, + {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000}, + {0x0000'0000, 0x0000'0000, 0x3561'd54a, 0x0000'0000}, + {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(ExecVfncvtfxuw, + ExecMaskedVfncvtfxuw, + {{0x5f1e'0c9a, 0x5f0e'1c8a, 0x5f3e'2cba, 0x5f2e'3caa}, + {0x5f5e'4cda, 0x5f4e'5cca, 0x5f7e'6cfa, 0x5f6e'7cea}, + {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, + {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction(ExecVfncvtfxw, + ExecMaskedVfncvtfxw, + {{0xdec3'e6cc, 0xdee3'c6ec, 0xde83'a68c, 0xdea3'86ac}, + {0xde06'cc97, 0xde46'8cd7, 0xdbc9'82cb, 0xdd8c'18ac}, + {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab}, + {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + ExecVfncvtrtzxuf, + ExecMaskedVfncvtrtzxuf, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + kVectorCalculationsSource); + TestNarrowingVectorFloatInstruction( + ExecVfncvtrtzxfw, + ExecMaskedVfncvtrtzxfw, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x8000, 0x8000, 0xcad0, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVid() { + asm("vid.v v8\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVid() { + asm("vid.v v8, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVid) { + TestVectorInstruction( + ExecVid, + ExecMaskedVid, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + {32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}, + {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}, + {64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}, + {96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111}, + {112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127}}, + {{0, 1, 2, 3, 4, 5, 6, 7}, + {8, 9, 10, 11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20, 21, 22, 23}, + {24, 25, 26, 27, 28, 29, 30, 31}, + {32, 33, 34, 35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44, 45, 46, 47}, + {48, 49, 50, 51, 52, 53, 54, 55}, + {56, 57, 58, 59, 60, 61, 62, 63}}, + {{0, 1, 2, 3}, + {4, 5, 6, 7}, + {8, 9, 10, 11}, + {12, 13, 14, 15}, + {16, 17, 18, 19}, + {20, 21, 22, 23}, + {24, 25, 26, 27}, + {28, 29, 30, 31}}, + {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}, {12, 13}, {14, 15}}, + kVectorCalculationsSourceLegacy); +} + +[[gnu::naked]] void ExecViotam() { + asm("viota.m v8, v16\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedViotam() { + asm("viota.m v8, v16, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestIota) { + TestVectorIota<false>(ExecViotam, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5}, + {6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9}, + {10, 10, 11, 12, 12, 12, 12, 12, 12, 13, 14, 15, 15, 15, 15, 15}, + {16, 16, 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19}, + {20, 20, 21, 21, 22, 22, 22, 22, 22, 23, 24, 24, 25, 25, 25, 25}, + {26, 26, 26, 27, 28, 28, 28, 28, 28, 29, 29, 30, 31, 31, 31, 31}, + {32, 32, 33, 34, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39}}, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001}, + {0x0002, 0x0002, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003}, + {0x0003, 0x0004, 0x0005, 0x0005, 0x0005, 0x0005, 0x0005, 0x0005}, + {0x0006, 0x0006, 0x0006, 0x0007, 0x0007, 0x0007, 0x0007, 0x0007}, + {0x0007, 0x0008, 0x0008, 0x0009, 0x0009, 0x0009, 0x0009, 0x0009}, + {0x000a, 0x000a, 0x000b, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c}, + {0x000c, 0x000d, 0x000e, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0001, 0x0000'0001, 0x0000'0001}, + {0x0000'0001, 0x0000'0001, 0x0000'0001, 0x0000'0001}, + {0x0000'0002, 0x0000'0002, 0x0000'0003, 0x0000'0003}, + {0x0000'0003, 0x0000'0003, 0x0000'0003, 0x0000'0003}, + {0x0000'0003, 0x0000'0004, 0x0000'0005, 0x0000'0005}, + {0x0000'0005, 0x0000'0005, 0x0000'0005, 0x0000'0005}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0001}, + {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, + {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, + {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}}, + kVectorCalculationsSource); + TestVectorIota<true>(ExecMaskedViotam, + {{0, 0x55, 0, 0, 0x55, 0, 0x55, 0, 0, 0x55, 1, 0x55, 1, 1, 0x55, 1}, + {2, 2, 0x55, 3, 0x55, 3, 3, 0x55, 3, 0x55, 4, 4, 0x55, 4, 0x55, 4}, + {5, 0x55, 5, 0x55, 6, 6, 0x55, 6, 0x55, 6, 6, 0x55, 7, 0x55, 7, 7}, + {8, 0x55, 8, 9, 0x55, 9, 0x55, 9, 9, 0x55, 10, 0x55, 11, 0x55, 11, 11}, + {12, 0x55, 12, 0x55, 12, 12, 0x55, 12, 12, 13, 0x55, 13, 14, 14, 14, 0x55}, + {14, 0x55, 14, 14, 0x55, 15, 15, 15, 0x55, 15, 16, 16, 17, 0x55, 17, 17}, + {18, 18, 0x55, 18, 19, 19, 0x55, 19, 19, 20, 20, 0x55, 21, 0x55, 21, 0x55}, + {21, 21, 22, 0x55, 23, 23, 23, 23, 0x55, 23, 0x55, 24, 0x55, 25, 25, 0x55}}, + {{0x0000, 0x5555, 0x0000, 0x0000, 0x5555, 0x0000, 0x5555, 0x0000}, + {0x0000, 0x5555, 0x0001, 0x5555, 0x0001, 0x0001, 0x5555, 0x0001}, + {0x0002, 0x0002, 0x5555, 0x0003, 0x5555, 0x0003, 0x0003, 0x5555}, + {0x0003, 0x5555, 0x0004, 0x0004, 0x5555, 0x0004, 0x5555, 0x0004}, + {0x0005, 0x5555, 0x0005, 0x5555, 0x0006, 0x0006, 0x5555, 0x0006}, + {0x5555, 0x0006, 0x0006, 0x5555, 0x0007, 0x5555, 0x0007, 0x0007}, + {0x0008, 0x5555, 0x0008, 0x0009, 0x5555, 0x0009, 0x5555, 0x0009}, + {0x0009, 0x5555, 0x000a, 0x5555, 0x000b, 0x5555, 0x000b, 0x000b}}, + {{0x0000'0000, 0x5555'5555, 0x0000'0000, 0x0000'0000}, + {0x5555'5555, 0x0000'0000, 0x5555'5555, 0x0000'0000}, + {0x0000'0000, 0x5555'5555, 0x0000'0001, 0x5555'5555}, + {0x0000'0001, 0x0000'0001, 0x5555'5555, 0x0000'0001}, + {0x0000'0002, 0x0000'0002, 0x5555'5555, 0x0000'0003}, + {0x5555'5555, 0x0000'0003, 0x0000'0003, 0x5555'5555}, + {0x0000'0003, 0x5555'5555, 0x0000'0004, 0x0000'0004}, + {0x5555'5555, 0x0000'0004, 0x5555'5555, 0x0000'0004}}, + {{0x0000'0000'0000'0000, 0x5555'5555'5555'5555}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x5555'5555'5555'5555, 0x0000'0000'0000'0000}, + {0x5555'5555'5555'5555, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x5555'5555'5555'5555}, + {0x0000'0000'0000'0001, 0x5555'5555'5555'5555}, + {0x0000'0000'0000'0001, 0x0000'0000'0000'0001}, + {0x5555'5555'5555'5555, 0x0000'0000'0000'0001}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVrsubvx() { + asm("vrsub.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVrsubvx() { + asm("vrsub.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVrsubvi() { + asm("vrsub.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVrsubvi() { + asm("vrsub.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfrsubvf() { + asm("vfrsub.vf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfrsubvf() { + asm("vfrsub.vf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVrsub) { + TestVectorInstruction( + ExecVrsubvx, + ExecMaskedVrsubvx, + {{170, 41, 168, 39, 166, 37, 164, 35, 162, 33, 160, 31, 158, 29, 156, 27}, + {154, 25, 152, 23, 150, 21, 148, 19, 146, 17, 144, 15, 142, 13, 140, 11}, + {138, 9, 136, 7, 134, 5, 132, 3, 130, 1, 128, 255, 126, 253, 124, 251}, + {122, 249, 120, 247, 118, 245, 116, 243, 114, 241, 112, 239, 110, 237, 108, 235}, + {106, 233, 104, 231, 102, 229, 100, 227, 98, 225, 96, 223, 94, 221, 92, 219}, + {90, 217, 88, 215, 86, 213, 84, 211, 82, 209, 80, 207, 78, 205, 76, 203}, + {74, 201, 72, 199, 70, 197, 68, 195, 66, 193, 64, 191, 62, 189, 60, 187}, + {58, 185, 56, 183, 54, 181, 52, 179, 50, 177, 48, 175, 46, 173, 44, 171}}, + {{0x29aa, 0x27a8, 0x25a6, 0x23a4, 0x21a2, 0x1fa0, 0x1d9e, 0x1b9c}, + {0x199a, 0x1798, 0x1596, 0x1394, 0x1192, 0x0f90, 0x0d8e, 0x0b8c}, + {0x098a, 0x0788, 0x0586, 0x0384, 0x0182, 0xff80, 0xfd7e, 0xfb7c}, + {0xf97a, 0xf778, 0xf576, 0xf374, 0xf172, 0xef70, 0xed6e, 0xeb6c}, + {0xe96a, 0xe768, 0xe566, 0xe364, 0xe162, 0xdf60, 0xdd5e, 0xdb5c}, + {0xd95a, 0xd758, 0xd556, 0xd354, 0xd152, 0xcf50, 0xcd4e, 0xcb4c}, + {0xc94a, 0xc748, 0xc546, 0xc344, 0xc142, 0xbf40, 0xbd3e, 0xbb3c}, + {0xb93a, 0xb738, 0xb536, 0xb334, 0xb132, 0xaf30, 0xad2e, 0xab2c}}, + {{0x27a8'29aa, 0x23a4'25a6, 0x1fa0'21a2, 0x1b9c'1d9e}, + {0x1798'199a, 0x1394'1596, 0x0f90'1192, 0x0b8c'0d8e}, + {0x0788'098a, 0x0384'0586, 0xff80'0182, 0xfb7b'fd7e}, + {0xf777'f97a, 0xf373'f576, 0xef6f'f172, 0xeb6b'ed6e}, + {0xe767'e96a, 0xe363'e566, 0xdf5f'e162, 0xdb5b'dd5e}, + {0xd757'd95a, 0xd353'd556, 0xcf4f'd152, 0xcb4b'cd4e}, + {0xc747'c94a, 0xc343'c546, 0xbf3f'c142, 0xbb3b'bd3e}, + {0xb737'b93a, 0xb333'b536, 0xaf2f'b132, 0xab2b'ad2e}}, + {{0x23a4'25a6'27a8'29aa, 0x1b9c'1d9e'1fa0'21a2}, + {0x1394'1596'1798'199a, 0x0b8c'0d8e'0f90'1192}, + {0x0384'0586'0788'098a, 0xfb7b'fd7d'ff80'0182}, + {0xf373'f575'f777'f97a, 0xeb6b'ed6d'ef6f'f172}, + {0xe363'e565'e767'e96a, 0xdb5b'dd5d'df5f'e162}, + {0xd353'd555'd757'd95a, 0xcb4b'cd4d'cf4f'd152}, + {0xc343'c545'c747'c94a, 0xbb3b'bd3d'bf3f'c142}, + {0xb333'b535'b737'b93a, 0xab2b'ad2d'af2f'b132}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVrsubvi, + ExecMaskedVrsubvi, + {{245, 116, 243, 114, 241, 112, 239, 110, 237, 108, 235, 106, 233, 104, 231, 102}, + {229, 100, 227, 98, 225, 96, 223, 94, 221, 92, 219, 90, 217, 88, 215, 86}, + {213, 84, 211, 82, 209, 80, 207, 78, 205, 76, 203, 74, 201, 72, 199, 70}, + {197, 68, 195, 66, 193, 64, 191, 62, 189, 60, 187, 58, 185, 56, 183, 54}, + {181, 52, 179, 50, 177, 48, 175, 46, 173, 44, 171, 42, 169, 40, 167, 38}, + {165, 36, 163, 34, 161, 32, 159, 30, 157, 28, 155, 26, 153, 24, 151, 22}, + {149, 20, 147, 18, 145, 16, 143, 14, 141, 12, 139, 10, 137, 8, 135, 6}, + {133, 4, 131, 2, 129, 0, 127, 254, 125, 252, 123, 250, 121, 248, 119, 246}}, + {{0x7ef5, 0x7cf3, 0x7af1, 0x78ef, 0x76ed, 0x74eb, 0x72e9, 0x70e7}, + {0x6ee5, 0x6ce3, 0x6ae1, 0x68df, 0x66dd, 0x64db, 0x62d9, 0x60d7}, + {0x5ed5, 0x5cd3, 0x5ad1, 0x58cf, 0x56cd, 0x54cb, 0x52c9, 0x50c7}, + {0x4ec5, 0x4cc3, 0x4ac1, 0x48bf, 0x46bd, 0x44bb, 0x42b9, 0x40b7}, + {0x3eb5, 0x3cb3, 0x3ab1, 0x38af, 0x36ad, 0x34ab, 0x32a9, 0x30a7}, + {0x2ea5, 0x2ca3, 0x2aa1, 0x289f, 0x269d, 0x249b, 0x2299, 0x2097}, + {0x1e95, 0x1c93, 0x1a91, 0x188f, 0x168d, 0x148b, 0x1289, 0x1087}, + {0x0e85, 0x0c83, 0x0a81, 0x087f, 0x067d, 0x047b, 0x0279, 0x0077}}, + {{0x7cfd'7ef5, 0x78f9'7af1, 0x74f5'76ed, 0x70f1'72e9}, + {0x6ced'6ee5, 0x68e9'6ae1, 0x64e5'66dd, 0x60e1'62d9}, + {0x5cdd'5ed5, 0x58d9'5ad1, 0x54d5'56cd, 0x50d1'52c9}, + {0x4ccd'4ec5, 0x48c9'4ac1, 0x44c5'46bd, 0x40c1'42b9}, + {0x3cbd'3eb5, 0x38b9'3ab1, 0x34b5'36ad, 0x30b1'32a9}, + {0x2cad'2ea5, 0x28a9'2aa1, 0x24a5'269d, 0x20a1'2299}, + {0x1c9d'1e95, 0x1899'1a91, 0x1495'168d, 0x1091'1289}, + {0x0c8d'0e85, 0x0889'0a81, 0x0485'067d, 0x0081'0279}}, + {{0x78f9'7afb'7cfd'7ef5, 0x70f1'72f3'74f5'76ed}, + {0x68e9'6aeb'6ced'6ee5, 0x60e1'62e3'64e5'66dd}, + {0x58d9'5adb'5cdd'5ed5, 0x50d1'52d3'54d5'56cd}, + {0x48c9'4acb'4ccd'4ec5, 0x40c1'42c3'44c5'46bd}, + {0x38b9'3abb'3cbd'3eb5, 0x30b1'32b3'34b5'36ad}, + {0x28a9'2aab'2cad'2ea5, 0x20a1'22a3'24a5'269d}, + {0x1899'1a9b'1c9d'1e95, 0x1091'1293'1495'168d}, + {0x0889'0a8b'0c8d'0e85, 0x0081'0283'0485'067d}}, + kVectorCalculationsSourceLegacy); + + TestVectorFloatInstruction(ExecVfrsubvf, + ExecMaskedVfrsubvf, + {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0017, 0x40b4'1757, 0x40cb'd7a8}, + {0x4348'6140, 0x4746'cae4, 0x4b4a'c94e, 0x4f4e'cd4c}, + {0x5352'd150, 0x5756'd554, 0x5b5a'd958, 0x5f5e'dd5c}, + {0x6362'e160, 0x6766'e564, 0x6b6a'e968, 0x6f6e'ed6c}, + {0x7372'f170, 0x7776'f574, 0x7b7a'f978, 0x7f7e'fd7c}}, + {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'807a'f4f2'eceb}, + {0x4746'c544'c342'c140, 0x4f4e'cd4c'cb4a'c948}, + {0x5756'd554'd352'd150, 0x5f5e'dd5c'db5a'd958}, + {0x6766'e564'e362'e160, 0x6f6e'ed6c'eb6a'e968}, + {0x7776'f574'f372'f170, 0x7f7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVaddvv() { + asm("vadd.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVaddvv() { + asm("vadd.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVaddvx() { + asm("vadd.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVaddvx() { + asm("vadd.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVaddvi() { + asm("vadd.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVaddvi() { + asm("vadd.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsadduvv() { + asm("vsaddu.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsadduvv() { + asm("vsaddu.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsadduvx() { + asm("vsaddu.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsadduvx() { + asm("vsaddu.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsadduvi() { + asm("vsaddu.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsadduvi() { + asm("vsaddu.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsaddvv() { + asm("vsadd.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsaddvv() { + asm("vsadd.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsaddvx() { + asm("vsadd.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsaddvx() { + asm("vsadd.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVsaddvi() { + asm("vsadd.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVsaddvi() { + asm("vsadd.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfaddvv() { + asm("vfadd.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfaddvv() { + asm("vfadd.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfaddvf() { + asm("vfadd.vf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfaddvf() { + asm("vfadd.vf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwaddvv() { + asm("vfwadd.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwaddvv() { + asm("vfwadd.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwaddwv() { + asm("vfwadd.wv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwaddwv() { + asm("vfwadd.wv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwaddwf() { + asm("vfwadd.wf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwaddwf() { + asm("vfwadd.wf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVadd) { + TestVectorInstruction( + ExecVaddvv, + ExecMaskedVaddvv, + {{0, 131, 6, 137, 13, 143, 18, 149, 25, 155, 30, 161, 36, 167, 42, 173}, + {48, 179, 54, 185, 61, 191, 66, 197, 73, 203, 78, 209, 84, 215, 90, 221}, + {96, 227, 102, 233, 109, 239, 114, 245, 121, 251, 126, 1, 132, 7, 138, 13}, + {144, 19, 150, 25, 157, 31, 162, 37, 169, 43, 174, 49, 180, 55, 186, 61}, + {192, 67, 198, 73, 205, 79, 210, 85, 217, 91, 222, 97, 228, 103, 234, 109}, + {240, 115, 246, 121, 253, 127, 2, 133, 9, 139, 14, 145, 20, 151, 26, 157}, + {32, 163, 38, 169, 45, 175, 50, 181, 57, 187, 62, 193, 68, 199, 74, 205}, + {80, 211, 86, 217, 93, 223, 98, 229, 105, 235, 110, 241, 116, 247, 122, 253}}, + {{0x8300, 0x8906, 0x8f0d, 0x9512, 0x9b19, 0xa11e, 0xa724, 0xad2a}, + {0xb330, 0xb936, 0xbf3d, 0xc542, 0xcb49, 0xd14e, 0xd754, 0xdd5a}, + {0xe360, 0xe966, 0xef6d, 0xf572, 0xfb79, 0x017e, 0x0784, 0x0d8a}, + {0x1390, 0x1996, 0x1f9d, 0x25a2, 0x2ba9, 0x31ae, 0x37b4, 0x3dba}, + {0x43c0, 0x49c6, 0x4fcd, 0x55d2, 0x5bd9, 0x61de, 0x67e4, 0x6dea}, + {0x73f0, 0x79f6, 0x7ffd, 0x8602, 0x8c09, 0x920e, 0x9814, 0x9e1a}, + {0xa420, 0xaa26, 0xb02d, 0xb632, 0xbc39, 0xc23e, 0xc844, 0xce4a}, + {0xd450, 0xda56, 0xe05d, 0xe662, 0xec69, 0xf26e, 0xf874, 0xfe7a}}, + {{0x8906'8300, 0x9512'8f0d, 0xa11e'9b19, 0xad2a'a724}, + {0xb936'b330, 0xc542'bf3d, 0xd14e'cb49, 0xdd5a'd754}, + {0xe966'e360, 0xf572'ef6d, 0x017e'fb79, 0x0d8b'0784}, + {0x1997'1390, 0x25a3'1f9d, 0x31af'2ba9, 0x3dbb'37b4}, + {0x49c7'43c0, 0x55d3'4fcd, 0x61df'5bd9, 0x6deb'67e4}, + {0x79f7'73f0, 0x8603'7ffd, 0x920f'8c09, 0x9e1b'9814}, + {0xaa27'a420, 0xb633'b02d, 0xc23f'bc39, 0xce4b'c844}, + {0xda57'd450, 0xe663'e05d, 0xf26f'ec69, 0xfe7b'f874}}, + {{0x9512'8f0d'8906'8300, 0xad2a'a724'a11e'9b19}, + {0xc542'bf3d'b936'b330, 0xdd5a'd754'd14e'cb49}, + {0xf572'ef6d'e966'e360, 0x0d8b'0785'017e'fb79}, + {0x25a3'1f9e'1997'1390, 0x3dbb'37b5'31af'2ba9}, + {0x55d3'4fce'49c7'43c0, 0x6deb'67e5'61df'5bd9}, + {0x8603'7ffe'79f7'73f0, 0x9e1b'9815'920f'8c09}, + {0xb633'b02e'aa27'a420, 0xce4b'c845'c23f'bc39}, + {0xe663'e05e'da57'd450, 0xfe7b'f875'f26f'ec69}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVaddvx, + ExecMaskedVaddvx, + {{170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180, 53, 182, 55, 184, 57}, + {186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196, 69, 198, 71, 200, 73}, + {202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212, 85, 214, 87, 216, 89}, + {218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228, 101, 230, 103, 232, 105}, + {234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244, 117, 246, 119, 248, 121}, + {250, 123, 252, 125, 254, 127, 0, 129, 2, 131, 4, 133, 6, 135, 8, 137}, + {10, 139, 12, 141, 14, 143, 16, 145, 18, 147, 20, 149, 22, 151, 24, 153}, + {26, 155, 28, 157, 30, 159, 32, 161, 34, 163, 36, 165, 38, 167, 40, 169}}, + {{0x2baa, 0x2dac, 0x2fae, 0x31b0, 0x33b2, 0x35b4, 0x37b6, 0x39b8}, + {0x3bba, 0x3dbc, 0x3fbe, 0x41c0, 0x43c2, 0x45c4, 0x47c6, 0x49c8}, + {0x4bca, 0x4dcc, 0x4fce, 0x51d0, 0x53d2, 0x55d4, 0x57d6, 0x59d8}, + {0x5bda, 0x5ddc, 0x5fde, 0x61e0, 0x63e2, 0x65e4, 0x67e6, 0x69e8}, + {0x6bea, 0x6dec, 0x6fee, 0x71f0, 0x73f2, 0x75f4, 0x77f6, 0x79f8}, + {0x7bfa, 0x7dfc, 0x7ffe, 0x8200, 0x8402, 0x8604, 0x8806, 0x8a08}, + {0x8c0a, 0x8e0c, 0x900e, 0x9210, 0x9412, 0x9614, 0x9816, 0x9a18}, + {0x9c1a, 0x9e1c, 0xa01e, 0xa220, 0xa422, 0xa624, 0xa826, 0xaa28}}, + {{0x2dad'2baa, 0x31b1'2fae, 0x35b5'33b2, 0x39b9'37b6}, + {0x3dbd'3bba, 0x41c1'3fbe, 0x45c5'43c2, 0x49c9'47c6}, + {0x4dcd'4bca, 0x51d1'4fce, 0x55d5'53d2, 0x59d9'57d6}, + {0x5ddd'5bda, 0x61e1'5fde, 0x65e5'63e2, 0x69e9'67e6}, + {0x6ded'6bea, 0x71f1'6fee, 0x75f5'73f2, 0x79f9'77f6}, + {0x7dfd'7bfa, 0x8201'7ffe, 0x8605'8402, 0x8a09'8806}, + {0x8e0d'8c0a, 0x9211'900e, 0x9615'9412, 0x9a19'9816}, + {0x9e1d'9c1a, 0xa221'a01e, 0xa625'a422, 0xaa29'a826}}, + {{0x31b1'2faf'2dad'2baa, 0x39b9'37b7'35b5'33b2}, + {0x41c1'3fbf'3dbd'3bba, 0x49c9'47c7'45c5'43c2}, + {0x51d1'4fcf'4dcd'4bca, 0x59d9'57d7'55d5'53d2}, + {0x61e1'5fdf'5ddd'5bda, 0x69e9'67e7'65e5'63e2}, + {0x71f1'6fef'6ded'6bea, 0x79f9'77f7'75f5'73f2}, + {0x8201'7fff'7dfd'7bfa, 0x8a09'8807'8605'8402}, + {0x9211'900f'8e0d'8c0a, 0x9a19'9817'9615'9412}, + {0xa221'a01f'9e1d'9c1a, 0xaa29'a827'a625'a422}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVaddvi, + ExecMaskedVaddvi, + {{245, 118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 128, 1, 130, 3, 132}, + {5, 134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148}, + {21, 150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164}, + {37, 166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180}, + {53, 182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196}, + {69, 198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212}, + {85, 214, 87, 216, 89, 218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228}, + {101, 230, 103, 232, 105, 234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244}}, + {{0x80f5, 0x82f7, 0x84f9, 0x86fb, 0x88fd, 0x8aff, 0x8d01, 0x8f03}, + {0x9105, 0x9307, 0x9509, 0x970b, 0x990d, 0x9b0f, 0x9d11, 0x9f13}, + {0xa115, 0xa317, 0xa519, 0xa71b, 0xa91d, 0xab1f, 0xad21, 0xaf23}, + {0xb125, 0xb327, 0xb529, 0xb72b, 0xb92d, 0xbb2f, 0xbd31, 0xbf33}, + {0xc135, 0xc337, 0xc539, 0xc73b, 0xc93d, 0xcb3f, 0xcd41, 0xcf43}, + {0xd145, 0xd347, 0xd549, 0xd74b, 0xd94d, 0xdb4f, 0xdd51, 0xdf53}, + {0xe155, 0xe357, 0xe559, 0xe75b, 0xe95d, 0xeb5f, 0xed61, 0xef63}, + {0xf165, 0xf367, 0xf569, 0xf76b, 0xf96d, 0xfb6f, 0xfd71, 0xff73}}, + {{0x8302'80f5, 0x8706'84f9, 0x8b0a'88fd, 0x8f0e'8d01}, + {0x9312'9105, 0x9716'9509, 0x9b1a'990d, 0x9f1e'9d11}, + {0xa322'a115, 0xa726'a519, 0xab2a'a91d, 0xaf2e'ad21}, + {0xb332'b125, 0xb736'b529, 0xbb3a'b92d, 0xbf3e'bd31}, + {0xc342'c135, 0xc746'c539, 0xcb4a'c93d, 0xcf4e'cd41}, + {0xd352'd145, 0xd756'd549, 0xdb5a'd94d, 0xdf5e'dd51}, + {0xe362'e155, 0xe766'e559, 0xeb6a'e95d, 0xef6e'ed61}, + {0xf372'f165, 0xf776'f569, 0xfb7a'f96d, 0xff7e'fd71}}, + {{0x8706'8504'8302'80f5, 0x8f0e'8d0c'8b0a'88fd}, + {0x9716'9514'9312'9105, 0x9f1e'9d1c'9b1a'990d}, + {0xa726'a524'a322'a115, 0xaf2e'ad2c'ab2a'a91d}, + {0xb736'b534'b332'b125, 0xbf3e'bd3c'bb3a'b92d}, + {0xc746'c544'c342'c135, 0xcf4e'cd4c'cb4a'c93d}, + {0xd756'd554'd352'd145, 0xdf5e'dd5c'db5a'd94d}, + {0xe766'e564'e362'e155, 0xef6e'ed6c'eb6a'e95d}, + {0xf776'f574'f372'f165, 0xff7e'fd7c'fb7a'f96d}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVsadduvv, + ExecMaskedVsadduvv, + {{0, 255, 6, 255, 13, 255, 18, 255, 25, 255, 30, 255, 36, 255, 42, 255}, + {48, 255, 54, 255, 61, 255, 66, 255, 73, 255, 78, 255, 84, 255, 90, 255}, + {96, 255, 102, 255, 109, 255, 114, 255, 121, 255, 126, 255, 132, 255, 138, 255}, + {144, 255, 150, 255, 157, 255, 162, 255, 169, 255, 174, 255, 180, 255, 186, 255}, + {192, 211, 198, 217, 205, 223, 210, 229, 217, 203, 222, 209, 228, 215, 234, 221}, + {240, 255, 246, 255, 253, 255, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, + {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xd3c0, 0xd9c6, 0xdfcd, 0xe5d2, 0xcbd9, 0xd1de, 0xd7e4, 0xddea}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xfc09, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xd9c6'd3c0, 0xe5d2'dfcd, 0xd1de'cbd9, 0xddea'd7e4}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xe5d2'dfcd'd9c6'd3c0, 0xddea'd7e4'd1de'cbd9}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVsadduvx, + ExecMaskedVsadduvx, + {{170, 255, 172, 255, 174, 255, 176, 255, 178, 255, 180, 255, 182, 255, 184, 255}, + {186, 255, 188, 255, 190, 255, 192, 255, 194, 255, 196, 255, 198, 255, 200, 255}, + {202, 255, 204, 255, 206, 255, 208, 255, 210, 255, 212, 255, 214, 255, 216, 255}, + {218, 255, 220, 255, 222, 255, 224, 255, 226, 255, 228, 255, 230, 255, 232, 255}, + {234, 255, 236, 255, 238, 255, 240, 255, 242, 255, 244, 255, 246, 255, 248, 255}, + {250, 255, 252, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, + {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVsadduvi, + ExecMaskedVsadduvi, + {{245, 255, 247, 255, 249, 255, 251, 255, 253, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, + {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, + {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}, + {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}, + {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}}, + {{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}, + {0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVsaddvv, + ExecMaskedVsaddvv, + {{0, 128, 6, 128, 13, 128, 18, 128, 25, 128, 30, 128, 36, 128, 42, 128}, + {48, 128, 54, 128, 61, 128, 66, 128, 73, 128, 78, 128, 84, 128, 90, 128}, + {96, 128, 102, 128, 109, 128, 114, 133, 121, 128, 126, 128, 127, 128, 127, 128}, + {127, 163, 127, 169, 127, 175, 127, 181, 127, 155, 127, 161, 127, 167, 127, 173}, + {192, 211, 198, 217, 205, 223, 210, 229, 217, 203, 222, 209, 228, 215, 234, 221}, + {240, 3, 246, 9, 253, 15, 2, 21, 9, 251, 14, 1, 20, 7, 26, 13}, + {32, 51, 38, 57, 45, 63, 50, 69, 57, 43, 62, 49, 68, 55, 74, 61}, + {80, 99, 86, 105, 93, 111, 98, 117, 105, 91, 110, 97, 116, 103, 122, 109}}, + {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8572, 0x8000, 0x8000, 0x8000, 0x8000}, + {0xa390, 0xa996, 0xaf9d, 0xb5a2, 0x9ba9, 0xa1ae, 0xa7b4, 0xadba}, + {0xd3c0, 0xd9c6, 0xdfcd, 0xe5d2, 0xcbd9, 0xd1de, 0xd7e4, 0xddea}, + {0x03f0, 0x09f6, 0x0ffd, 0x1602, 0xfc09, 0x020e, 0x0814, 0x0e1a}, + {0x3420, 0x3a26, 0x402d, 0x4632, 0x2c39, 0x323e, 0x3844, 0x3e4a}, + {0x6450, 0x6a56, 0x705d, 0x7662, 0x5c69, 0x626e, 0x6874, 0x6e7a}}, + {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8573'7f6d, 0x8000'0000, 0x8000'0000}, + {0xa997'a390, 0xb5a3'af9d, 0xa1af'9ba9, 0xadbb'a7b4}, + {0xd9c6'd3c0, 0xe5d2'dfcd, 0xd1de'cbd9, 0xddea'd7e4}, + {0x09f7'03f0, 0x1603'0ffd, 0x020e'fc09, 0x0e1b'0814}, + {0x3a27'3420, 0x4633'402d, 0x323f'2c39, 0x3e4b'3844}, + {0x6a57'6450, 0x7663'705d, 0x626f'5c69, 0x6e7b'6874}}, + {{0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8573'7f6e'7967'7360, 0x8000'0000'0000'0000}, + {0xb5a3'af9e'a997'a390, 0xadbb'a7b5'a1af'9ba9}, + {0xe5d2'dfcd'd9c6'd3c0, 0xddea'd7e4'd1de'cbd9}, + {0x1603'0ffe'09f7'03f0, 0x0e1b'0815'020e'fc09}, + {0x4633'402e'3a27'3420, 0x3e4b'3845'323f'2c39}, + {0x7663'705e'6a57'6450, 0x6e7b'6875'626f'5c69}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVsaddvx, + ExecMaskedVsaddvx, + {{170, 128, 172, 128, 174, 128, 176, 128, 178, 128, 180, 128, 182, 128, 184, 128}, + {186, 128, 188, 128, 190, 128, 192, 128, 194, 128, 196, 128, 198, 128, 200, 128}, + {202, 128, 204, 128, 206, 128, 208, 128, 210, 128, 212, 128, 214, 128, 216, 128}, + {218, 128, 220, 128, 222, 128, 224, 128, 226, 128, 228, 128, 230, 128, 232, 128}, + {234, 128, 236, 128, 238, 128, 240, 128, 242, 128, 244, 128, 246, 128, 248, 128}, + {250, 128, 252, 128, 254, 128, 0, 129, 2, 131, 4, 133, 6, 135, 8, 137}, + {10, 139, 12, 141, 14, 143, 16, 145, 18, 147, 20, 149, 22, 151, 24, 153}, + {26, 155, 28, 157, 30, 159, 32, 161, 34, 163, 36, 165, 38, 167, 40, 169}}, + {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}, + {0x8000, 0x8000, 0x8000, 0x8200, 0x8402, 0x8604, 0x8806, 0x8a08}, + {0x8c0a, 0x8e0c, 0x900e, 0x9210, 0x9412, 0x9614, 0x9816, 0x9a18}, + {0x9c1a, 0x9e1c, 0xa01e, 0xa220, 0xa422, 0xa624, 0xa826, 0xaa28}}, + {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}, + {0x8000'0000, 0x8201'7ffe, 0x8605'8402, 0x8a09'8806}, + {0x8e0d'8c0a, 0x9211'900e, 0x9615'9412, 0x9a19'9816}, + {0x9e1d'9c1a, 0xa221'a01e, 0xa625'a422, 0xaa29'a826}}, + {{0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8000'0000'0000'0000, 0x8000'0000'0000'0000}, + {0x8201'7fff'7dfd'7bfa, 0x8a09'8807'8605'8402}, + {0x9211'900f'8e0d'8c0a, 0x9a19'9817'9615'9412}, + {0xa221'a01f'9e1d'9c1a, 0xaa29'a827'a625'a422}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVsaddvi, + ExecMaskedVsaddvi, + {{245, 128, 247, 128, 249, 128, 251, 128, 253, 128, 255, 128, 1, 130, 3, 132}, + {5, 134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148}, + {21, 150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164}, + {37, 166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180}, + {53, 182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196}, + {69, 198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212}, + {85, 214, 87, 216, 89, 218, 91, 220, 93, 222, 95, 224, 97, 226, 99, 228}, + {101, 230, 103, 232, 105, 234, 107, 236, 109, 238, 111, 240, 113, 242, 115, 244}}, + {{0x80f5, 0x82f7, 0x84f9, 0x86fb, 0x88fd, 0x8aff, 0x8d01, 0x8f03}, + {0x9105, 0x9307, 0x9509, 0x970b, 0x990d, 0x9b0f, 0x9d11, 0x9f13}, + {0xa115, 0xa317, 0xa519, 0xa71b, 0xa91d, 0xab1f, 0xad21, 0xaf23}, + {0xb125, 0xb327, 0xb529, 0xb72b, 0xb92d, 0xbb2f, 0xbd31, 0xbf33}, + {0xc135, 0xc337, 0xc539, 0xc73b, 0xc93d, 0xcb3f, 0xcd41, 0xcf43}, + {0xd145, 0xd347, 0xd549, 0xd74b, 0xd94d, 0xdb4f, 0xdd51, 0xdf53}, + {0xe155, 0xe357, 0xe559, 0xe75b, 0xe95d, 0xeb5f, 0xed61, 0xef63}, + {0xf165, 0xf367, 0xf569, 0xf76b, 0xf96d, 0xfb6f, 0xfd71, 0xff73}}, + {{0x8302'80f5, 0x8706'84f9, 0x8b0a'88fd, 0x8f0e'8d01}, + {0x9312'9105, 0x9716'9509, 0x9b1a'990d, 0x9f1e'9d11}, + {0xa322'a115, 0xa726'a519, 0xab2a'a91d, 0xaf2e'ad21}, + {0xb332'b125, 0xb736'b529, 0xbb3a'b92d, 0xbf3e'bd31}, + {0xc342'c135, 0xc746'c539, 0xcb4a'c93d, 0xcf4e'cd41}, + {0xd352'd145, 0xd756'd549, 0xdb5a'd94d, 0xdf5e'dd51}, + {0xe362'e155, 0xe766'e559, 0xeb6a'e95d, 0xef6e'ed61}, + {0xf372'f165, 0xf776'f569, 0xfb7a'f96d, 0xff7e'fd71}}, + {{0x8706'8504'8302'80f5, 0x8f0e'8d0c'8b0a'88fd}, + {0x9716'9514'9312'9105, 0x9f1e'9d1c'9b1a'990d}, + {0xa726'a524'a322'a115, 0xaf2e'ad2c'ab2a'a91d}, + {0xb736'b534'b332'b125, 0xbf3e'bd3c'bb3a'b92d}, + {0xc746'c544'c342'c135, 0xcf4e'cd4c'cb4a'c93d}, + {0xd756'd554'd352'd145, 0xdf5e'dd5c'db5a'd94d}, + {0xe766'e564'e362'e155, 0xef6e'ed6c'eb6a'e95d}, + {0xf776'f574'f372'f165, 0xff7e'fd7c'fb7a'f96d}}, + kVectorCalculationsSource); + + TestVectorFloatInstruction(ExecVfaddvv, + ExecMaskedVfaddvv, + {{0x9604'9200, 0x9e0c'9a09, 0x8b0a'ae29, 0x8f35'af92}, + {0xb624'b220, 0xbe2c'ba29, 0xa634'a233, 0xae3c'aa38}, + {0xd644'd240, 0xde4c'da49, 0xc654'c251, 0xce5c'ca58}, + {0xf664'f260, 0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78}, + {0xc342'c140, 0xc746'c544, 0xcb4a'c948, 0xcf4e'cd4c}, + {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, + {0xe362'e160, 0xe766'e4fe, 0xeb6a'e968, 0xef6e'ed6c}, + {0x76e2'8cfd, 0x7eec'78fb, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0x9e0c'9a09'9604'9200, 0x8f0e'8d45'9f3b'9531}, + {0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231}, + {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251}, + {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0x7eec'7ae9'76e4'72e0, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfaddvf, + ExecMaskedVfaddvf, + {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {0x40b4'0000, 0x40b3'ffe9, 0x40b3'e8a9, 0x409c'2858}, + {0xc33d'2140, 0xc746'bfa4, 0xcb4a'c942, 0xcf4e'cd4c}, + {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, + {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, + {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(ExecVfwaddvv, + ExecMaskedVfwaddvv, + {{0xbac0'9240'0000'4140, 0xbbc1'9341'2000'0043}, + {0xb961'55c5'1088'0000, 0xb9e6'b5f2'4000'0000}, + {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, + {0xbcc6'9446'6d4c'8c00, 0xbdc7'9547'004f'4e8e}, + {0xc2c8'9a48'0000'0000, 0xc3c9'9b49'2000'0000}, + {0xc0ca'984a'2000'0000, 0xc1cb'994b'0000'0000}, + {0xc6cc'9e4c'0000'0000, 0xc7cd'9f4d'2000'0000}, + {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(ExecVfwaddwv, + ExecMaskedVfwaddwv, + {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000}, + {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000}, + {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, + {0xbcc6'9446'2000'0000, 0xbf3e'bd3c'ea65'4738}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(ExecVfwaddwf, + ExecMaskedVfwaddwf, + {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVsubvv() { + asm("vsub.vv v8, v16, v24\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVsubvv() { + asm("vsub.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVsubvx() { + asm("vsub.vx v8, v16, t0\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVsubvx() { + asm("vsub.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVssubuvv() { + asm("vssubu.vv v8, v16, v24\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVssubuvv() { + asm("vssubu.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVssubuvx() { + asm("vssubu.vx v8, v16, t0\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVssubuvx() { + asm("vssubu.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVssubvv() { + asm("vssub.vv v8, v16, v24\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVssubvv() { + asm("vssub.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVssubvx() { + asm("vssub.vx v8, v16, t0\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecMaskedVssubvx() { + asm("vssub.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfsubvv() { + asm("vfsub.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfsubvv() { + asm("vfsub.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfsubvf() { + asm("vfsub.vf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfsubvf() { + asm("vfsub.vf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwsubvv() { + asm("vfwsub.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwsubvv() { + asm("vfwsub.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwsubvf() { + asm("vfwsub.vf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwsubvf() { + asm("vfwsub.vf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwsubwv() { + asm("vfwsub.wv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwsubwv() { + asm("vfwsub.wv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVfwsubwf() { + asm("vfwsub.wf v8, v16, ft0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVfwsubwf() { + asm("vfwsub.wf v8, v16, ft0, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVsub) { + TestVectorInstruction( + ExecVsubvv, + ExecMaskedVsubvv, + {{0, 127, 254, 125, 251, 123, 250, 121, 247, 119, 246, 117, 244, 115, 242, 113}, + {240, 111, 238, 109, 235, 107, 234, 105, 231, 103, 230, 101, 228, 99, 226, 97}, + {224, 95, 222, 93, 219, 91, 218, 89, 215, 87, 214, 85, 212, 83, 210, 81}, + {208, 79, 206, 77, 203, 75, 202, 73, 199, 71, 198, 69, 196, 67, 194, 65}, + {192, 63, 190, 61, 187, 59, 186, 57, 183, 55, 182, 53, 180, 51, 178, 49}, + {176, 47, 174, 45, 171, 43, 170, 41, 167, 39, 166, 37, 164, 35, 162, 33}, + {160, 31, 158, 29, 155, 27, 154, 25, 151, 23, 150, 21, 148, 19, 146, 17}, + {144, 15, 142, 13, 139, 11, 138, 9, 135, 7, 134, 5, 132, 3, 130, 1}}, + {{0x7f00, 0x7cfe, 0x7afb, 0x78fa, 0x76f7, 0x74f6, 0x72f4, 0x70f2}, + {0x6ef0, 0x6cee, 0x6aeb, 0x68ea, 0x66e7, 0x64e6, 0x62e4, 0x60e2}, + {0x5ee0, 0x5cde, 0x5adb, 0x58da, 0x56d7, 0x54d6, 0x52d4, 0x50d2}, + {0x4ed0, 0x4cce, 0x4acb, 0x48ca, 0x46c7, 0x44c6, 0x42c4, 0x40c2}, + {0x3ec0, 0x3cbe, 0x3abb, 0x38ba, 0x36b7, 0x34b6, 0x32b4, 0x30b2}, + {0x2eb0, 0x2cae, 0x2aab, 0x28aa, 0x26a7, 0x24a6, 0x22a4, 0x20a2}, + {0x1ea0, 0x1c9e, 0x1a9b, 0x189a, 0x1697, 0x1496, 0x1294, 0x1092}, + {0x0e90, 0x0c8e, 0x0a8b, 0x088a, 0x0687, 0x0486, 0x0284, 0x0082}}, + {{0x7cfe'7f00, 0x78fa'7afb, 0x74f6'76f7, 0x70f2'72f4}, + {0x6cee'6ef0, 0x68ea'6aeb, 0x64e6'66e7, 0x60e2'62e4}, + {0x5cde'5ee0, 0x58da'5adb, 0x54d6'56d7, 0x50d2'52d4}, + {0x4cce'4ed0, 0x48ca'4acb, 0x44c6'46c7, 0x40c2'42c4}, + {0x3cbe'3ec0, 0x38ba'3abb, 0x34b6'36b7, 0x30b2'32b4}, + {0x2cae'2eb0, 0x28aa'2aab, 0x24a6'26a7, 0x20a2'22a4}, + {0x1c9e'1ea0, 0x189a'1a9b, 0x1496'1697, 0x1092'1294}, + {0x0c8e'0e90, 0x088a'0a8b, 0x0486'0687, 0x0082'0284}}, + {{0x78fa'7afb'7cfe'7f00, 0x70f2'72f4'74f6'76f7}, + {0x68ea'6aeb'6cee'6ef0, 0x60e2'62e4'64e6'66e7}, + {0x58da'5adb'5cde'5ee0, 0x50d2'52d4'54d6'56d7}, + {0x48ca'4acb'4cce'4ed0, 0x40c2'42c4'44c6'46c7}, + {0x38ba'3abb'3cbe'3ec0, 0x30b2'32b4'34b6'36b7}, + {0x28aa'2aab'2cae'2eb0, 0x20a2'22a4'24a6'26a7}, + {0x189a'1a9b'1c9e'1ea0, 0x1092'1294'1496'1697}, + {0x088a'0a8b'0c8e'0e90, 0x0082'0284'0486'0687}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVsubvx, + ExecMaskedVsubvx, + {{86, 215, 88, 217, 90, 219, 92, 221, 94, 223, 96, 225, 98, 227, 100, 229}, + {102, 231, 104, 233, 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, 116, 245}, + {118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 128, 1, 130, 3, 132, 5}, + {134, 7, 136, 9, 138, 11, 140, 13, 142, 15, 144, 17, 146, 19, 148, 21}, + {150, 23, 152, 25, 154, 27, 156, 29, 158, 31, 160, 33, 162, 35, 164, 37}, + {166, 39, 168, 41, 170, 43, 172, 45, 174, 47, 176, 49, 178, 51, 180, 53}, + {182, 55, 184, 57, 186, 59, 188, 61, 190, 63, 192, 65, 194, 67, 196, 69}, + {198, 71, 200, 73, 202, 75, 204, 77, 206, 79, 208, 81, 210, 83, 212, 85}}, + {{0xd656, 0xd858, 0xda5a, 0xdc5c, 0xde5e, 0xe060, 0xe262, 0xe464}, + {0xe666, 0xe868, 0xea6a, 0xec6c, 0xee6e, 0xf070, 0xf272, 0xf474}, + {0xf676, 0xf878, 0xfa7a, 0xfc7c, 0xfe7e, 0x0080, 0x0282, 0x0484}, + {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, + {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, + {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, + {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, + {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, + {{0xd857'd656, 0xdc5b'da5a, 0xe05f'de5e, 0xe463'e262}, + {0xe867'e666, 0xec6b'ea6a, 0xf06f'ee6e, 0xf473'f272}, + {0xf877'f676, 0xfc7b'fa7a, 0x007f'fe7e, 0x0484'0282}, + {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, + {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, + {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, + {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, + {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, + {{0xdc5b'da59'd857'd656, 0xe463'e261'e05f'de5e}, + {0xec6b'ea69'e867'e666, 0xf473'f271'f06f'ee6e}, + {0xfc7b'fa79'f877'f676, 0x0484'0282'007f'fe7e}, + {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, + {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, + {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, + {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, + {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction(ExecVssubuvv, + ExecMaskedVssubuvv, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 5, 0, 3, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 175, 0, 173, 0, 171, 0, 169, 0, 199, 0, 197, 0, 195, 0, 193}, + {0, 159, 0, 157, 0, 155, 0, 153, 0, 183, 0, 181, 0, 179, 0, 177}, + {0, 143, 0, 141, 0, 139, 0, 137, 0, 167, 0, 165, 0, 163, 0, 161}, + {0, 127, 0, 125, 0, 123, 0, 121, 0, 151, 0, 149, 0, 147, 0, 145}}, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x06f7, 0x04f6, 0x02f4, 0x00f2}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0xaec0, 0xacbe, 0xaabb, 0xa8ba, 0xc6b7, 0xc4b6, 0xc2b4, 0xc0b2}, + {0x9eb0, 0x9cae, 0x9aab, 0x98aa, 0xb6a7, 0xb4a6, 0xb2a4, 0xb0a2}, + {0x8ea0, 0x8c9e, 0x8a9b, 0x889a, 0xa697, 0xa496, 0xa294, 0xa092}, + {0x7e90, 0x7c8e, 0x7a8b, 0x788a, 0x9687, 0x9486, 0x9284, 0x9082}}, + {{0x0000'0000, 0x0000'0000, 0x04f6'06f7, 0x00f2'02f4}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0xacbe'aec0, 0xa8ba'aabb, 0xc4b6'c6b7, 0xc0b2'c2b4}, + {0x9cae'9eb0, 0x98aa'9aab, 0xb4a6'b6a7, 0xb0a2'b2a4}, + {0x8c9e'8ea0, 0x889a'8a9b, 0xa496'a697, 0xa092'a294}, + {0x7c8e'7e90, 0x788a'7a8b, 0x9486'9687, 0x9082'9284}}, + {{0x0000'0000'0000'0000, 0x00f2'02f4'04f6'06f7}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0xa8ba'aabb'acbe'aec0, 0xc0b2'c2b4'c4b6'c6b7}, + {0x98aa'9aab'9cae'9eb0, 0xb0a2'b2a4'b4a6'b6a7}, + {0x889a'8a9b'8c9e'8ea0, 0xa092'a294'a496'a697}, + {0x788a'7a8b'7c8e'7e90, 0x9082'9284'9486'9687}}, + kVectorCalculationsSource); + TestVectorInstruction(ExecVssubuvx, + ExecMaskedVssubuvx, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 5}, + {0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 19, 0, 21}, + {0, 23, 0, 25, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 37}, + {0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53}, + {0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69}, + {0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85}}, + {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0080, 0x0282, 0x0484}, + {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, + {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, + {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, + {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, + {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, + {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000}, + {0x0000'0000, 0x0000'0000, 0x007f'fe7e, 0x0484'0282}, + {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, + {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, + {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, + {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, + {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, + {{0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0000'0000'0000'0000}, + {0x0000'0000'0000'0000, 0x0484'0282'007f'fe7e}, + {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, + {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, + {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, + {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, + {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVssubvv, + ExecMaskedVssubvv, + {{0, 239, 254, 237, 251, 235, 250, 233, 247, 7, 246, 5, 244, 3, 242, 1}, + {240, 223, 238, 221, 235, 219, 234, 217, 231, 247, 230, 245, 228, 243, 226, 241}, + {224, 207, 222, 205, 219, 203, 218, 201, 215, 231, 214, 229, 212, 227, 210, 225}, + {208, 191, 206, 189, 203, 187, 202, 185, 199, 215, 198, 213, 196, 211, 194, 209}, + {127, 175, 127, 173, 127, 171, 127, 169, 127, 199, 127, 197, 127, 195, 127, 193}, + {127, 159, 127, 157, 127, 155, 127, 153, 127, 183, 127, 181, 127, 179, 127, 177}, + {127, 143, 127, 141, 127, 139, 127, 137, 127, 167, 127, 165, 127, 163, 127, 161}, + {127, 128, 127, 128, 127, 128, 127, 128, 127, 151, 127, 149, 127, 147, 127, 145}}, + {{0xef00, 0xecfe, 0xeafb, 0xe8fa, 0x06f7, 0x04f6, 0x02f4, 0x00f2}, + {0xdef0, 0xdcee, 0xdaeb, 0xd8ea, 0xf6e7, 0xf4e6, 0xf2e4, 0xf0e2}, + {0xcee0, 0xccde, 0xcadb, 0xc8da, 0xe6d7, 0xe4d6, 0xe2d4, 0xe0d2}, + {0xbed0, 0xbcce, 0xbacb, 0xb8ca, 0xd6c7, 0xd4c6, 0xd2c4, 0xd0c2}, + {0xaec0, 0xacbe, 0xaabb, 0xa8ba, 0xc6b7, 0xc4b6, 0xc2b4, 0xc0b2}, + {0x9eb0, 0x9cae, 0x9aab, 0x98aa, 0xb6a7, 0xb4a6, 0xb2a4, 0xb0a2}, + {0x8ea0, 0x8c9e, 0x8a9b, 0x889a, 0xa697, 0xa496, 0xa294, 0xa092}, + {0x8000, 0x8000, 0x8000, 0x8000, 0x9687, 0x9486, 0x9284, 0x9082}}, + {{0xecfd'ef00, 0xe8f9'eafb, 0x04f6'06f7, 0x00f2'02f4}, + {0xdced'def0, 0xd8e9'daeb, 0xf4e5'f6e7, 0xf0e1'f2e4}, + {0xccdd'cee0, 0xc8d9'cadb, 0xe4d5'e6d7, 0xe0d1'e2d4}, + {0xbccd'bed0, 0xb8c9'bacb, 0xd4c5'd6c7, 0xd0c1'd2c4}, + {0xacbe'aec0, 0xa8ba'aabb, 0xc4b6'c6b7, 0xc0b2'c2b4}, + {0x9cae'9eb0, 0x98aa'9aab, 0xb4a6'b6a7, 0xb0a2'b2a4}, + {0x8c9e'8ea0, 0x889a'8a9b, 0xa496'a697, 0xa092'a294}, + {0x8000'0000, 0x8000'0000, 0x9486'9687, 0x9082'9284}}, + {{0xe8f9'eafa'ecfd'ef00, 0x00f2'02f4'04f6'06f7}, + {0xd8e9'daea'dced'def0, 0xf0e1'f2e3'f4e5'f6e7}, + {0xc8d9'cada'ccdd'cee0, 0xe0d1'e2d3'e4d5'e6d7}, + {0xb8c9'baca'bccd'bed0, 0xd0c1'd2c3'd4c5'd6c7}, + {0xa8ba'aabb'acbe'aec0, 0xc0b2'c2b4'c4b6'c6b7}, + {0x98aa'9aab'9cae'9eb0, 0xb0a2'b2a4'b4a6'b6a7}, + {0x889a'8a9b'8c9e'8ea0, 0xa092'a294'a496'a697}, + {0x8000'0000'0000'0000, 0x9082'9284'9486'9687}}, + kVectorCalculationsSource); + TestVectorInstruction( + ExecVssubvx, + ExecMaskedVssubvx, + {{86, 215, 88, 217, 90, 219, 92, 221, 94, 223, 96, 225, 98, 227, 100, 229}, + {102, 231, 104, 233, 106, 235, 108, 237, 110, 239, 112, 241, 114, 243, 116, 245}, + {118, 247, 120, 249, 122, 251, 124, 253, 126, 255, 127, 1, 127, 3, 127, 5}, + {127, 7, 127, 9, 127, 11, 127, 13, 127, 15, 127, 17, 127, 19, 127, 21}, + {127, 23, 127, 25, 127, 27, 127, 29, 127, 31, 127, 33, 127, 35, 127, 37}, + {127, 39, 127, 41, 127, 43, 127, 45, 127, 47, 127, 49, 127, 51, 127, 53}, + {127, 55, 127, 57, 127, 59, 127, 61, 127, 63, 127, 65, 127, 67, 127, 69}, + {127, 71, 127, 73, 127, 75, 127, 77, 127, 79, 127, 81, 127, 83, 127, 85}}, + {{0xd656, 0xd858, 0xda5a, 0xdc5c, 0xde5e, 0xe060, 0xe262, 0xe464}, + {0xe666, 0xe868, 0xea6a, 0xec6c, 0xee6e, 0xf070, 0xf272, 0xf474}, + {0xf676, 0xf878, 0xfa7a, 0xfc7c, 0xfe7e, 0x0080, 0x0282, 0x0484}, + {0x0686, 0x0888, 0x0a8a, 0x0c8c, 0x0e8e, 0x1090, 0x1292, 0x1494}, + {0x1696, 0x1898, 0x1a9a, 0x1c9c, 0x1e9e, 0x20a0, 0x22a2, 0x24a4}, + {0x26a6, 0x28a8, 0x2aaa, 0x2cac, 0x2eae, 0x30b0, 0x32b2, 0x34b4}, + {0x36b6, 0x38b8, 0x3aba, 0x3cbc, 0x3ebe, 0x40c0, 0x42c2, 0x44c4}, + {0x46c6, 0x48c8, 0x4aca, 0x4ccc, 0x4ece, 0x50d0, 0x52d2, 0x54d4}}, + {{0xd857'd656, 0xdc5b'da5a, 0xe05f'de5e, 0xe463'e262}, + {0xe867'e666, 0xec6b'ea6a, 0xf06f'ee6e, 0xf473'f272}, + {0xf877'f676, 0xfc7b'fa7a, 0x007f'fe7e, 0x0484'0282}, + {0x0888'0686, 0x0c8c'0a8a, 0x1090'0e8e, 0x1494'1292}, + {0x1898'1696, 0x1c9c'1a9a, 0x20a0'1e9e, 0x24a4'22a2}, + {0x28a8'26a6, 0x2cac'2aaa, 0x30b0'2eae, 0x34b4'32b2}, + {0x38b8'36b6, 0x3cbc'3aba, 0x40c0'3ebe, 0x44c4'42c2}, + {0x48c8'46c6, 0x4ccc'4aca, 0x50d0'4ece, 0x54d4'52d2}}, + {{0xdc5b'da59'd857'd656, 0xe463'e261'e05f'de5e}, + {0xec6b'ea69'e867'e666, 0xf473'f271'f06f'ee6e}, + {0xfc7b'fa79'f877'f676, 0x0484'0282'007f'fe7e}, + {0x0c8c'0a8a'0888'0686, 0x1494'1292'1090'0e8e}, + {0x1c9c'1a9a'1898'1696, 0x24a4'22a2'20a0'1e9e}, + {0x2cac'2aaa'28a8'26a6, 0x34b4'32b2'30b0'2eae}, + {0x3cbc'3aba'38b8'36b6, 0x44c4'42c2'40c0'3ebe}, + {0x4ccc'4aca'48c8'46c6, 0x54d4'52d2'50d0'4ece}}, + kVectorCalculationsSource); + + TestVectorFloatInstruction(ExecVfsubvv, + ExecMaskedVfsubvv, + {{0x1604'9200, 0x1e0c'9a09, 0x8b0a'63e7, 0x8ece'd50c}, + {0x3624'b220, 0x3e2c'ba29, 0x2634'a22f, 0x2e3c'aa38}, + {0x5644'd240, 0x5e4c'da49, 0x4654'c251, 0x4e5c'ca58}, + {0x7664'f260, 0x7e6c'fa69, 0x6674'e271, 0x6e7c'ea78}, + {0xc342'c140, 0xc746'c544, 0xcb4a'c948, 0xcf4e'cd4c}, + {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, + {0xe362'e160, 0xe766'e5ca, 0xeb6a'e968, 0xef6e'ed6c}, + {0xf6e6'58c3, 0xfeec'7cd7, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0x1e0c'9a09'9604'9200, 0x8f0e'8cd3'76d9'7cdf}, + {0x3e2c'ba29'b624'b220, 0x2e3c'aa38'a634'a231}, + {0x5e4c'da49'd644'd240, 0x4e5c'ca58'c654'c251}, + {0x7e6c'fa69'f664'f260, 0x6e7c'ea78'e674'e271}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xfeec'7ae9'76e4'72e0, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + TestVectorFloatInstruction(ExecVfsubvf, + ExecMaskedVfsubvf, + {{0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, + {0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, + {0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000, 0xc0b4'0000}, + {0xc0b4'0000, 0xc0b4'0017, 0xc0b4'1757, 0xc0cb'd7a8}, + {0xc348'6140, 0xc746'cae4, 0xcb4a'c94e, 0xcf4e'cd4c}, + {0xd352'd150, 0xd756'd554, 0xdb5a'd958, 0xdf5e'dd5c}, + {0xe362'e160, 0xe766'e564, 0xeb6a'e968, 0xef6e'ed6c}, + {0xf372'f170, 0xf776'f574, 0xfb7a'f978, 0xff7e'fd7c}}, + {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(ExecVfwsubvv, + ExecMaskedVfwsubvv, + {{0x3ac0'923f'ffff'bec0, 0x3bc1'9341'1fff'ffbd}, + {0xb961'4c7c'ef78'0000, 0xb9d9'daa1'8000'0000}, + {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000}, + {0x3cc6'9445'd2b3'7400, 0x3dc7'9546'ffb0'b172}, + {0x42c8'9a48'0000'0000, 0x43c9'9b49'2000'0000}, + {0x40ca'984a'2000'0000, 0x41cb'994b'0000'0000}, + {0x46cc'9e4c'0000'0000, 0x47cd'9f4d'2000'0000}, + {0x44ce'9c4e'2000'0000, 0x45cf'9d4f'0000'0000}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwsubvf, + ExecMaskedVfwsubvf, + {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0003}, + {0xc016'8000'0000'02ab, 0xc016'8000'0002'bab5}, + {0xc016'8000'02ca'c4c0, 0xc016'8002'dad4'd000}, + {0xc016'82ea'e4e0'0000, 0xc019'7af4'f000'0000}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(ExecVfwsubwv, + ExecMaskedVfwsubwv, + {{0x3ac0'9240'0000'0000, 0x3bc1'9341'2000'0000}, + {0x38c2'9042'2000'0000, 0x39c3'9143'0000'0000}, + {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000}, + {0x3cc6'9446'2000'0000, 0xbf3e'bd3c'8c10'2b38}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(ExecVfwsubwf, + ExecMaskedVfwsubwf, + {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); +} + +[[gnu::naked]] void ExecVandvv() { + asm("vand.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVandvv() { + asm("vand.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVandvx() { + asm("vand.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVandvx() { + asm("vand.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecVandvi() { + asm("vand.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVandvi() { + asm("vand.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVand) { + TestVectorInstruction( + ExecVandvv, + ExecMaskedVandvv, + {{0, 0, 0, 2, 0, 0, 4, 6, 0, 0, 0, 2, 8, 8, 12, 14}, + {0, 0, 0, 2, 0, 0, 4, 6, 16, 16, 16, 18, 24, 24, 28, 30}, + {0, 0, 0, 2, 0, 0, 4, 6, 0, 0, 0, 2, 8, 8, 12, 14}, + {32, 32, 32, 34, 32, 32, 36, 38, 48, 48, 48, 50, 56, 56, 60, 62}, + {0, 128, 0, 130, 0, 128, 4, 134, 0, 128, 0, 130, 8, 136, 12, 142}, + {0, 128, 0, 130, 0, 128, 4, 134, 16, 144, 16, 146, 24, 152, 28, 158}, + {64, 192, 64, 194, 64, 192, 68, 198, 64, 192, 64, 194, 72, 200, 76, 206}, + {96, 224, 96, 226, 96, 224, 100, 230, 112, 240, 112, 242, 120, 248, 124, 254}}, + {{0x0000, 0x0200, 0x0000, 0x0604, 0x0000, 0x0200, 0x0808, 0x0e0c}, + {0x0000, 0x0200, 0x0000, 0x0604, 0x1010, 0x1210, 0x1818, 0x1e1c}, + {0x0000, 0x0200, 0x0000, 0x0604, 0x0000, 0x0200, 0x0808, 0x0e0c}, + {0x2020, 0x2220, 0x2020, 0x2624, 0x3030, 0x3230, 0x3838, 0x3e3c}, + {0x8000, 0x8200, 0x8000, 0x8604, 0x8000, 0x8200, 0x8808, 0x8e0c}, + {0x8000, 0x8200, 0x8000, 0x8604, 0x9010, 0x9210, 0x9818, 0x9e1c}, + {0xc040, 0xc240, 0xc040, 0xc644, 0xc040, 0xc240, 0xc848, 0xce4c}, + {0xe060, 0xe260, 0xe060, 0xe664, 0xf070, 0xf270, 0xf878, 0xfe7c}}, + {{0x0200'0000, 0x0604'0000, 0x0200'0000, 0x0e0c'0808}, + {0x0200'0000, 0x0604'0000, 0x1210'1010, 0x1e1c'1818}, + {0x0200'0000, 0x0604'0000, 0x0200'0000, 0x0e0c'0808}, + {0x2220'2020, 0x2624'2020, 0x3230'3030, 0x3e3c'3838}, + {0x8200'8000, 0x8604'8000, 0x8200'8000, 0x8e0c'8808}, + {0x8200'8000, 0x8604'8000, 0x9210'9010, 0x9e1c'9818}, + {0xc240'c040, 0xc644'c040, 0xc240'c040, 0xce4c'c848}, + {0xe260'e060, 0xe664'e060, 0xf270'f070, 0xfe7c'f878}}, + {{0x0604'0000'0200'0000, 0x0e0c'0808'0200'0000}, + {0x0604'0000'0200'0000, 0x1e1c'1818'1210'1010}, + {0x0604'0000'0200'0000, 0x0e0c'0808'0200'0000}, + {0x2624'2020'2220'2020, 0x3e3c'3838'3230'3030}, + {0x8604'8000'8200'8000, 0x8e0c'8808'8200'8000}, + {0x8604'8000'8200'8000, 0x9e1c'9818'9210'9010}, + {0xc644'c040'c240'c040, 0xce4c'c848'c240'c040}, + {0xe664'e060'e260'e060, 0xfe7c'f878'f270'f070}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction(ExecVandvx, + ExecMaskedVandvx, + {{0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, + {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, + {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, + {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, + {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, + {0, 128, 2, 130, 0, 128, 2, 130, 8, 136, 10, 138, 8, 136, 10, 138}, + {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}, + {32, 160, 34, 162, 32, 160, 34, 162, 40, 168, 42, 170, 40, 168, 42, 170}}, + {{0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, + {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, + {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, + {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, + {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, + {0x8000, 0x8202, 0x8000, 0x8202, 0x8808, 0x8a0a, 0x8808, 0x8a0a}, + {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}, + {0xa020, 0xa222, 0xa020, 0xa222, 0xa828, 0xaa2a, 0xa828, 0xaa2a}}, + {{0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, + {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, + {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, + {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, + {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, + {0x8202'8000, 0x8202'8000, 0x8a0a'8808, 0x8a0a'8808}, + {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}, + {0xa222'a020, 0xa222'a020, 0xaa2a'a828, 0xaa2a'a828}}, + {{0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, + {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, + {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, + {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, + {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, + {0x8202'8000'8202'8000, 0x8a0a'8808'8a0a'8808}, + {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}, + {0xa222'a020'a222'a020, 0xaa2a'a828'aa2a'a828}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVandvi, + ExecMaskedVandvi, + {{0, 129, 0, 129, 4, 133, 4, 133, 0, 129, 0, 129, 4, 133, 4, 133}, + {16, 145, 16, 145, 20, 149, 20, 149, 16, 145, 16, 145, 20, 149, 20, 149}, + {32, 161, 32, 161, 36, 165, 36, 165, 32, 161, 32, 161, 36, 165, 36, 165}, + {48, 177, 48, 177, 52, 181, 52, 181, 48, 177, 48, 177, 52, 181, 52, 181}, + {64, 193, 64, 193, 68, 197, 68, 197, 64, 193, 64, 193, 68, 197, 68, 197}, + {80, 209, 80, 209, 84, 213, 84, 213, 80, 209, 80, 209, 84, 213, 84, 213}, + {96, 225, 96, 225, 100, 229, 100, 229, 96, 225, 96, 225, 100, 229, 100, 229}, + {112, 241, 112, 241, 116, 245, 116, 245, 112, 241, 112, 241, 116, 245, 116, 245}}, + {{0x8100, 0x8300, 0x8504, 0x8704, 0x8900, 0x8b00, 0x8d04, 0x8f04}, + {0x9110, 0x9310, 0x9514, 0x9714, 0x9910, 0x9b10, 0x9d14, 0x9f14}, + {0xa120, 0xa320, 0xa524, 0xa724, 0xa920, 0xab20, 0xad24, 0xaf24}, + {0xb130, 0xb330, 0xb534, 0xb734, 0xb930, 0xbb30, 0xbd34, 0xbf34}, + {0xc140, 0xc340, 0xc544, 0xc744, 0xc940, 0xcb40, 0xcd44, 0xcf44}, + {0xd150, 0xd350, 0xd554, 0xd754, 0xd950, 0xdb50, 0xdd54, 0xdf54}, + {0xe160, 0xe360, 0xe564, 0xe764, 0xe960, 0xeb60, 0xed64, 0xef64}, + {0xf170, 0xf370, 0xf574, 0xf774, 0xf970, 0xfb70, 0xfd74, 0xff74}}, + {{0x8302'8100, 0x8706'8504, 0x8b0a'8900, 0x8f0e'8d04}, + {0x9312'9110, 0x9716'9514, 0x9b1a'9910, 0x9f1e'9d14}, + {0xa322'a120, 0xa726'a524, 0xab2a'a920, 0xaf2e'ad24}, + {0xb332'b130, 0xb736'b534, 0xbb3a'b930, 0xbf3e'bd34}, + {0xc342'c140, 0xc746'c544, 0xcb4a'c940, 0xcf4e'cd44}, + {0xd352'd150, 0xd756'd554, 0xdb5a'd950, 0xdf5e'dd54}, + {0xe362'e160, 0xe766'e564, 0xeb6a'e960, 0xef6e'ed64}, + {0xf372'f170, 0xf776'f574, 0xfb7a'f970, 0xff7e'fd74}}, + {{0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8900}, + {0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9910}, + {0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a920}, + {0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b930}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c940}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd950}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e960}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f970}}, + kVectorCalculationsSourceLegacy); +} + +[[gnu::naked]] void ExecVorvv() { + asm("vor.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVorvv() { + asm("vor.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVorvx() { + asm("vor.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVorvx() { + asm("vor.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVorvi() { + asm("vor.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVorvi() { + asm("vor.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVor) { + TestVectorInstruction( + ExecVorvv, + ExecMaskedVorvv, + {{0, 131, 6, 135, 13, 143, 14, 143, 25, 155, 30, 159, 28, 159, 30, 159}, + {48, 179, 54, 183, 61, 191, 62, 191, 57, 187, 62, 191, 60, 191, 62, 191}, + {96, 227, 102, 231, 109, 239, 110, 239, 121, 251, 126, 255, 124, 255, 126, 255}, + {112, 243, 118, 247, 125, 255, 126, 255, 121, 251, 126, 255, 124, 255, 126, 255}, + {192, 195, 198, 199, 205, 207, 206, 207, 217, 219, 222, 223, 220, 223, 222, 223}, + {240, 243, 246, 247, 253, 255, 254, 255, 249, 251, 254, 255, 252, 255, 254, 255}, + {224, 227, 230, 231, 237, 239, 238, 239, 249, 251, 254, 255, 252, 255, 254, 255}, + {240, 243, 246, 247, 253, 255, 254, 255, 249, 251, 254, 255, 252, 255, 254, 255}}, + {{0x8300, 0x8706, 0x8f0d, 0x8f0e, 0x9b19, 0x9f1e, 0x9f1c, 0x9f1e}, + {0xb330, 0xb736, 0xbf3d, 0xbf3e, 0xbb39, 0xbf3e, 0xbf3c, 0xbf3e}, + {0xe360, 0xe766, 0xef6d, 0xef6e, 0xfb79, 0xff7e, 0xff7c, 0xff7e}, + {0xf370, 0xf776, 0xff7d, 0xff7e, 0xfb79, 0xff7e, 0xff7c, 0xff7e}, + {0xc3c0, 0xc7c6, 0xcfcd, 0xcfce, 0xdbd9, 0xdfde, 0xdfdc, 0xdfde}, + {0xf3f0, 0xf7f6, 0xfffd, 0xfffe, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}, + {0xe3e0, 0xe7e6, 0xefed, 0xefee, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}, + {0xf3f0, 0xf7f6, 0xfffd, 0xfffe, 0xfbf9, 0xfffe, 0xfffc, 0xfffe}}, + {{0x8706'8300, 0x8f0e'8f0d, 0x9f1e'9b19, 0x9f1e'9f1c}, + {0xb736'b330, 0xbf3e'bf3d, 0xbf3e'bb39, 0xbf3e'bf3c}, + {0xe766'e360, 0xef6e'ef6d, 0xff7e'fb79, 0xff7e'ff7c}, + {0xf776'f370, 0xff7e'ff7d, 0xff7e'fb79, 0xff7e'ff7c}, + {0xc7c6'c3c0, 0xcfce'cfcd, 0xdfde'dbd9, 0xdfde'dfdc}, + {0xf7f6'f3f0, 0xfffe'fffd, 0xfffe'fbf9, 0xfffe'fffc}, + {0xe7e6'e3e0, 0xefee'efed, 0xfffe'fbf9, 0xfffe'fffc}, + {0xf7f6'f3f0, 0xfffe'fffd, 0xfffe'fbf9, 0xfffe'fffc}}, + {{0x8f0e'8f0d'8706'8300, 0x9f1e'9f1c'9f1e'9b19}, + {0xbf3e'bf3d'b736'b330, 0xbf3e'bf3c'bf3e'bb39}, + {0xef6e'ef6d'e766'e360, 0xff7e'ff7c'ff7e'fb79}, + {0xff7e'ff7d'f776'f370, 0xff7e'ff7c'ff7e'fb79}, + {0xcfce'cfcd'c7c6'c3c0, 0xdfde'dfdc'dfde'dbd9}, + {0xfffe'fffd'f7f6'f3f0, 0xfffe'fffc'fffe'fbf9}, + {0xefee'efed'e7e6'e3e0, 0xfffe'fffc'fffe'fbf9}, + {0xfffe'fffd'f7f6'f3f0, 0xfffe'fffc'fffe'fbf9}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVorvx, + ExecMaskedVorvx, + {{170, 171, 170, 171, 174, 175, 174, 175, 170, 171, 170, 171, 174, 175, 174, 175}, + {186, 187, 186, 187, 190, 191, 190, 191, 186, 187, 186, 187, 190, 191, 190, 191}, + {170, 171, 170, 171, 174, 175, 174, 175, 170, 171, 170, 171, 174, 175, 174, 175}, + {186, 187, 186, 187, 190, 191, 190, 191, 186, 187, 186, 187, 190, 191, 190, 191}, + {234, 235, 234, 235, 238, 239, 238, 239, 234, 235, 234, 235, 238, 239, 238, 239}, + {250, 251, 250, 251, 254, 255, 254, 255, 250, 251, 250, 251, 254, 255, 254, 255}, + {234, 235, 234, 235, 238, 239, 238, 239, 234, 235, 234, 235, 238, 239, 238, 239}, + {250, 251, 250, 251, 254, 255, 254, 255, 250, 251, 250, 251, 254, 255, 254, 255}}, + {{0xabaa, 0xabaa, 0xafae, 0xafae, 0xabaa, 0xabaa, 0xafae, 0xafae}, + {0xbbba, 0xbbba, 0xbfbe, 0xbfbe, 0xbbba, 0xbbba, 0xbfbe, 0xbfbe}, + {0xabaa, 0xabaa, 0xafae, 0xafae, 0xabaa, 0xabaa, 0xafae, 0xafae}, + {0xbbba, 0xbbba, 0xbfbe, 0xbfbe, 0xbbba, 0xbbba, 0xbfbe, 0xbfbe}, + {0xebea, 0xebea, 0xefee, 0xefee, 0xebea, 0xebea, 0xefee, 0xefee}, + {0xfbfa, 0xfbfa, 0xfffe, 0xfffe, 0xfbfa, 0xfbfa, 0xfffe, 0xfffe}, + {0xebea, 0xebea, 0xefee, 0xefee, 0xebea, 0xebea, 0xefee, 0xefee}, + {0xfbfa, 0xfbfa, 0xfffe, 0xfffe, 0xfbfa, 0xfbfa, 0xfffe, 0xfffe}}, + {{0xabaa'abaa, 0xafae'afae, 0xabaa'abaa, 0xafae'afae}, + {0xbbba'bbba, 0xbfbe'bfbe, 0xbbba'bbba, 0xbfbe'bfbe}, + {0xabaa'abaa, 0xafae'afae, 0xabaa'abaa, 0xafae'afae}, + {0xbbba'bbba, 0xbfbe'bfbe, 0xbbba'bbba, 0xbfbe'bfbe}, + {0xebea'ebea, 0xefee'efee, 0xebea'ebea, 0xefee'efee}, + {0xfbfa'fbfa, 0xfffe'fffe, 0xfbfa'fbfa, 0xfffe'fffe}, + {0xebea'ebea, 0xefee'efee, 0xebea'ebea, 0xefee'efee}, + {0xfbfa'fbfa, 0xfffe'fffe, 0xfbfa'fbfa, 0xfffe'fffe}}, + {{0xafae'afae'abaa'abaa, 0xafae'afae'abaa'abaa}, + {0xbfbe'bfbe'bbba'bbba, 0xbfbe'bfbe'bbba'bbba}, + {0xafae'afae'abaa'abaa, 0xafae'afae'abaa'abaa}, + {0xbfbe'bfbe'bbba'bbba, 0xbfbe'bfbe'bbba'bbba}, + {0xefee'efee'ebea'ebea, 0xefee'efee'ebea'ebea}, + {0xfffe'fffe'fbfa'fbfa, 0xfffe'fffe'fbfa'fbfa}, + {0xefee'efee'ebea'ebea, 0xefee'efee'ebea'ebea}, + {0xfffe'fffe'fbfa'fbfa, 0xfffe'fffe'fbfa'fbfa}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVorvi, + ExecMaskedVorvi, + {{245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}, + {245, 245, 247, 247, 245, 245, 247, 247, 253, 253, 255, 255, 253, 253, 255, 255}}, + {{0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}, + {0xfff5, 0xfff7, 0xfff5, 0xfff7, 0xfffd, 0xffff, 0xfffd, 0xffff}}, + {{0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}, + {0xffff'fff5, 0xffff'fff5, 0xffff'fffd, 0xffff'fffd}}, + {{0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}, + {0xffff'ffff'ffff'fff5, 0xffff'ffff'ffff'fffd}}, + kVectorCalculationsSourceLegacy); +} + +[[gnu::naked]] void ExecVxorvv() { + asm("vxor.vv v8, v16, v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVxorvv() { + asm("vxor.vv v8, v16, v24, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVxorvx() { + asm("vxor.vx v8, v16, t0\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVxorvx() { + asm("vxor.vx v8, v16, t0, v0.t\n\t" + "ret\n\t"); +} +[[gnu::naked]] void ExecVxorvi() { + asm("vxor.vi v8, v16, -0xb\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVxorvi() { + asm("vxor.vi v8, v16, -0xb, v0.t\n\t" + "ret\n\t"); +} + +TEST(InlineAsmTestRiscv64, TestVxor) { + TestVectorInstruction( + ExecVxorvv, + ExecMaskedVxorvv, + {{0, 131, 6, 133, 13, 143, 10, 137, 25, 155, 30, 157, 20, 151, 18, 145}, + {48, 179, 54, 181, 61, 191, 58, 185, 41, 171, 46, 173, 36, 167, 34, 161}, + {96, 227, 102, 229, 109, 239, 106, 233, 121, 251, 126, 253, 116, 247, 114, 241}, + {80, 211, 86, 213, 93, 223, 90, 217, 73, 203, 78, 205, 68, 199, 66, 193}, + {192, 67, 198, 69, 205, 79, 202, 73, 217, 91, 222, 93, 212, 87, 210, 81}, + {240, 115, 246, 117, 253, 127, 250, 121, 233, 107, 238, 109, 228, 103, 226, 97}, + {160, 35, 166, 37, 173, 47, 170, 41, 185, 59, 190, 61, 180, 55, 178, 49}, + {144, 19, 150, 21, 157, 31, 154, 25, 137, 11, 142, 13, 132, 7, 130, 1}}, + {{0x8300, 0x8506, 0x8f0d, 0x890a, 0x9b19, 0x9d1e, 0x9714, 0x9112}, + {0xb330, 0xb536, 0xbf3d, 0xb93a, 0xab29, 0xad2e, 0xa724, 0xa122}, + {0xe360, 0xe566, 0xef6d, 0xe96a, 0xfb79, 0xfd7e, 0xf774, 0xf172}, + {0xd350, 0xd556, 0xdf5d, 0xd95a, 0xcb49, 0xcd4e, 0xc744, 0xc142}, + {0x43c0, 0x45c6, 0x4fcd, 0x49ca, 0x5bd9, 0x5dde, 0x57d4, 0x51d2}, + {0x73f0, 0x75f6, 0x7ffd, 0x79fa, 0x6be9, 0x6dee, 0x67e4, 0x61e2}, + {0x23a0, 0x25a6, 0x2fad, 0x29aa, 0x3bb9, 0x3dbe, 0x37b4, 0x31b2}, + {0x1390, 0x1596, 0x1f9d, 0x199a, 0x0b89, 0x0d8e, 0x0784, 0x0182}}, + {{0x8506'8300, 0x890a'8f0d, 0x9d1e'9b19, 0x9112'9714}, + {0xb536'b330, 0xb93a'bf3d, 0xad2e'ab29, 0xa122'a724}, + {0xe566'e360, 0xe96a'ef6d, 0xfd7e'fb79, 0xf172'f774}, + {0xd556'd350, 0xd95a'df5d, 0xcd4e'cb49, 0xc142'c744}, + {0x45c6'43c0, 0x49ca'4fcd, 0x5dde'5bd9, 0x51d2'57d4}, + {0x75f6'73f0, 0x79fa'7ffd, 0x6dee'6be9, 0x61e2'67e4}, + {0x25a6'23a0, 0x29aa'2fad, 0x3dbe'3bb9, 0x31b2'37b4}, + {0x1596'1390, 0x199a'1f9d, 0x0d8e'0b89, 0x0182'0784}}, + {{0x890a'8f0d'8506'8300, 0x9112'9714'9d1e'9b19}, + {0xb93a'bf3d'b536'b330, 0xa122'a724'ad2e'ab29}, + {0xe96a'ef6d'e566'e360, 0xf172'f774'fd7e'fb79}, + {0xd95a'df5d'd556'd350, 0xc142'c744'cd4e'cb49}, + {0x49ca'4fcd'45c6'43c0, 0x51d2'57d4'5dde'5bd9}, + {0x79fa'7ffd'75f6'73f0, 0x61e2'67e4'6dee'6be9}, + {0x29aa'2fad'25a6'23a0, 0x31b2'37b4'3dbe'3bb9}, + {0x199a'1f9d'1596'1390, 0x0182'0784'0d8e'0b89}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVxorvx, + ExecMaskedVxorvx, + {{170, 43, 168, 41, 174, 47, 172, 45, 162, 35, 160, 33, 166, 39, 164, 37}, + {186, 59, 184, 57, 190, 63, 188, 61, 178, 51, 176, 49, 182, 55, 180, 53}, + {138, 11, 136, 9, 142, 15, 140, 13, 130, 3, 128, 1, 134, 7, 132, 5}, + {154, 27, 152, 25, 158, 31, 156, 29, 146, 19, 144, 17, 150, 23, 148, 21}, + {234, 107, 232, 105, 238, 111, 236, 109, 226, 99, 224, 97, 230, 103, 228, 101}, + {250, 123, 248, 121, 254, 127, 252, 125, 242, 115, 240, 113, 246, 119, 244, 117}, + {202, 75, 200, 73, 206, 79, 204, 77, 194, 67, 192, 65, 198, 71, 196, 69}, + {218, 91, 216, 89, 222, 95, 220, 93, 210, 83, 208, 81, 214, 87, 212, 85}}, + {{0x2baa, 0x29a8, 0x2fae, 0x2dac, 0x23a2, 0x21a0, 0x27a6, 0x25a4}, + {0x3bba, 0x39b8, 0x3fbe, 0x3dbc, 0x33b2, 0x31b0, 0x37b6, 0x35b4}, + {0x0b8a, 0x0988, 0x0f8e, 0x0d8c, 0x0382, 0x0180, 0x0786, 0x0584}, + {0x1b9a, 0x1998, 0x1f9e, 0x1d9c, 0x1392, 0x1190, 0x1796, 0x1594}, + {0x6bea, 0x69e8, 0x6fee, 0x6dec, 0x63e2, 0x61e0, 0x67e6, 0x65e4}, + {0x7bfa, 0x79f8, 0x7ffe, 0x7dfc, 0x73f2, 0x71f0, 0x77f6, 0x75f4}, + {0x4bca, 0x49c8, 0x4fce, 0x4dcc, 0x43c2, 0x41c0, 0x47c6, 0x45c4}, + {0x5bda, 0x59d8, 0x5fde, 0x5ddc, 0x53d2, 0x51d0, 0x57d6, 0x55d4}}, + {{0x29a8'2baa, 0x2dac'2fae, 0x21a0'23a2, 0x25a4'27a6}, + {0x39b8'3bba, 0x3dbc'3fbe, 0x31b0'33b2, 0x35b4'37b6}, + {0x0988'0b8a, 0x0d8c'0f8e, 0x0180'0382, 0x0584'0786}, + {0x1998'1b9a, 0x1d9c'1f9e, 0x1190'1392, 0x1594'1796}, + {0x69e8'6bea, 0x6dec'6fee, 0x61e0'63e2, 0x65e4'67e6}, + {0x79f8'7bfa, 0x7dfc'7ffe, 0x71f0'73f2, 0x75f4'77f6}, + {0x49c8'4bca, 0x4dcc'4fce, 0x41c0'43c2, 0x45c4'47c6}, + {0x59d8'5bda, 0x5ddc'5fde, 0x51d0'53d2, 0x55d4'57d6}}, + {{0x2dac'2fae'29a8'2baa, 0x25a4'27a6'21a0'23a2}, + {0x3dbc'3fbe'39b8'3bba, 0x35b4'37b6'31b0'33b2}, + {0x0d8c'0f8e'0988'0b8a, 0x0584'0786'0180'0382}, + {0x1d9c'1f9e'1998'1b9a, 0x1594'1796'1190'1392}, + {0x6dec'6fee'69e8'6bea, 0x65e4'67e6'61e0'63e2}, + {0x7dfc'7ffe'79f8'7bfa, 0x75f4'77f6'71f0'73f2}, + {0x4dcc'4fce'49c8'4bca, 0x45c4'47c6'41c0'43c2}, + {0x5ddc'5fde'59d8'5bda, 0x55d4'57d6'51d0'53d2}}, + kVectorCalculationsSourceLegacy); + TestVectorInstruction( + ExecVxorvi, + ExecMaskedVxorvi, + {{245, 116, 247, 118, 241, 112, 243, 114, 253, 124, 255, 126, 249, 120, 251, 122}, + {229, 100, 231, 102, 225, 96, 227, 98, 237, 108, 239, 110, 233, 104, 235, 106}, + {213, 84, 215, 86, 209, 80, 211, 82, 221, 92, 223, 94, 217, 88, 219, 90}, + {197, 68, 199, 70, 193, 64, 195, 66, 205, 76, 207, 78, 201, 72, 203, 74}, + {181, 52, 183, 54, 177, 48, 179, 50, 189, 60, 191, 62, 185, 56, 187, 58}, + {165, 36, 167, 38, 161, 32, 163, 34, 173, 44, 175, 46, 169, 40, 171, 42}, + {149, 20, 151, 22, 145, 16, 147, 18, 157, 28, 159, 30, 153, 24, 155, 26}, + {133, 4, 135, 6, 129, 0, 131, 2, 141, 12, 143, 14, 137, 8, 139, 10}}, + {{0x7ef5, 0x7cf7, 0x7af1, 0x78f3, 0x76fd, 0x74ff, 0x72f9, 0x70fb}, + {0x6ee5, 0x6ce7, 0x6ae1, 0x68e3, 0x66ed, 0x64ef, 0x62e9, 0x60eb}, + {0x5ed5, 0x5cd7, 0x5ad1, 0x58d3, 0x56dd, 0x54df, 0x52d9, 0x50db}, + {0x4ec5, 0x4cc7, 0x4ac1, 0x48c3, 0x46cd, 0x44cf, 0x42c9, 0x40cb}, + {0x3eb5, 0x3cb7, 0x3ab1, 0x38b3, 0x36bd, 0x34bf, 0x32b9, 0x30bb}, + {0x2ea5, 0x2ca7, 0x2aa1, 0x28a3, 0x26ad, 0x24af, 0x22a9, 0x20ab}, + {0x1e95, 0x1c97, 0x1a91, 0x1893, 0x169d, 0x149f, 0x1299, 0x109b}, + {0x0e85, 0x0c87, 0x0a81, 0x0883, 0x068d, 0x048f, 0x0289, 0x008b}}, + {{0x7cfd'7ef5, 0x78f9'7af1, 0x74f5'76fd, 0x70f1'72f9}, + {0x6ced'6ee5, 0x68e9'6ae1, 0x64e5'66ed, 0x60e1'62e9}, + {0x5cdd'5ed5, 0x58d9'5ad1, 0x54d5'56dd, 0x50d1'52d9}, + {0x4ccd'4ec5, 0x48c9'4ac1, 0x44c5'46cd, 0x40c1'42c9}, + {0x3cbd'3eb5, 0x38b9'3ab1, 0x34b5'36bd, 0x30b1'32b9}, + {0x2cad'2ea5, 0x28a9'2aa1, 0x24a5'26ad, 0x20a1'22a9}, + {0x1c9d'1e95, 0x1899'1a91, 0x1495'169d, 0x1091'1299}, + {0x0c8d'0e85, 0x0889'0a81, 0x0485'068d, 0x0081'0289}}, + {{0x78f9'7afb'7cfd'7ef5, 0x70f1'72f3'74f5'76fd}, + {0x68e9'6aeb'6ced'6ee5, 0x60e1'62e3'64e5'66ed}, + {0x58d9'5adb'5cdd'5ed5, 0x50d1'52d3'54d5'56dd}, + {0x48c9'4acb'4ccd'4ec5, 0x40c1'42c3'44c5'46cd}, + {0x38b9'3abb'3cbd'3eb5, 0x30b1'32b3'34b5'36bd}, + {0x28a9'2aab'2cad'2ea5, 0x20a1'22a3'24a5'26ad}, + {0x1899'1a9b'1c9d'1e95, 0x1091'1293'1495'169d}, + {0x0889'0a8b'0c8d'0e85, 0x0081'0283'0485'068d}}, + kVectorCalculationsSourceLegacy); +} + } // namespace diff --git a/tests/run_host_tests.mk b/tests/run_host_tests.mk index ab6bca76..ad726743 100644 --- a/tests/run_host_tests.mk +++ b/tests/run_host_tests.mk @@ -137,6 +137,28 @@ $(eval $(call add_test,berberis_ndk_program_tests_two_gear,\ $(TARGET_OUT_TESTCASES)/berberis_ndk_program_tests_static.native_bridge/x86_64/berberis_ndk_program_tests_static,\ BERBERIS_MODE=two-gear)) +# inline_asm_tests_riscv64 + +$(eval $(call add_test,inline_asm_tests_riscv64_interpret_only,\ + run_test_x86_64_riscv64,\ + $(TARGET_OUT_TESTCASES)/inline_asm_tests_riscv64.native_bridge/x86_64/inline_asm_tests_riscv64,\ + BERBERIS_MODE=interpret-only)) + +$(eval $(call add_test,inline_asm_tests_riscv64_lite_translate_or_interpret,\ + run_test_x86_64_riscv64,\ + $(TARGET_OUT_TESTCASES)/inline_asm_tests_riscv64.native_bridge/x86_64/inline_asm_tests_riscv64,\ + BERBERIS_MODE=lite-translate-or-interpret)) + +$(eval $(call add_test,inline_asm_tests_riscv64_heavy_optimize_or_interpret,\ + run_test_x86_64_riscv64,\ + $(TARGET_OUT_TESTCASES)/inline_asm_tests_riscv64.native_bridge/x86_64/inline_asm_tests_riscv64,\ + BERBERIS_MODE=heavy-optimize-or-interpret)) + +$(eval $(call add_test,inline_asm_tests_riscv64_two_gear,\ + run_test_x86_64_riscv64,\ + $(TARGET_OUT_TESTCASES)/inline_asm_tests_riscv64.native_bridge/x86_64/inline_asm_tests_riscv64,\ + BERBERIS_MODE=two-gear)) + # berberis_host_tests $(eval $(call add_test,berberis_host_tests,\ |