diff options
author | Lev Rumyantsev <levarum@google.com> | 2024-05-07 18:09:43 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2024-05-07 18:09:43 +0000 |
commit | 346ab9b6dc748755a14c614ee83f044352b7eff7 (patch) | |
tree | 3574c967c0d7b241fcb26fda18d74784a662e7bd | |
parent | 33e1504627596ebfe6b11b4a1c7b5e52bb4a1aed (diff) | |
parent | 877822aa0e56dc16afbb75e811ce7cabeaee816e (diff) | |
download | binary_translation-346ab9b6dc748755a14c614ee83f044352b7eff7.tar.gz |
Merge changes I535e1632,I959a383a into main
* changes:
inline_asm_tests: support testing masked insn
inline_asm_tests: pass tested insn as an arg
-rw-r--r-- | tests/inline_asm_tests/main_riscv64.cc | 158 |
1 files changed, 104 insertions, 54 deletions
diff --git a/tests/inline_asm_tests/main_riscv64.cc b/tests/inline_asm_tests/main_riscv64.cc index 6887be47..694909a4 100644 --- a/tests/inline_asm_tests/main_riscv64.cc +++ b/tests/inline_asm_tests/main_riscv64.cc @@ -16,6 +16,7 @@ #include "gtest/gtest.h" +#include <cstdint> #include <tuple> namespace { @@ -53,8 +54,80 @@ constexpr __v2du kVectorCalculationsSource[16] = { constexpr __v2du kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555}; constexpr __v2du kAgnosticResult = {~uint64_t{0U}, ~uint64_t{0U}}; +// Mask in form suitable for storing in v0 and use in v0.t form. +static constexpr __v2du kMask = {0xd5ad'd6b5'ad6b'b5ad, 0x6af7'57bb'deed'7bb5}; + +using ExecInsnFunc = void (*)(); + +void RunTwoVectorArgsOneRes(ExecInsnFunc exec_insn, + const __v2du* src, + __v2du* res, + uint64_t vtype, + uint64_t vlmax) { + uint64_t vstart, vl; + // Mask register is, unconditionally, v0, and we need 8, 16, or 24 to handle full 8-registers + // inputs thus we use v8..v15 for destination and place sources into v16..v23 and v24..v31. + asm( // Load arguments and undisturbed result. + "vsetvli t0, zero, e64, m8, ta, ma\n\t" + "vle64.v v8, (%[res])\n\t" + "vle64.v v16, (%[src])\n\t" + "addi t0, %[src], 128\n\t" + "vle64.v v24, (t0)\n\t" + // Load mask. + "vsetvli t0, zero, e64, m1, ta, ma\n\t" + "vle64.v v0, (%[mask])\n\t" + // Execute tested instruction. + "vsetvl t0, zero, %[vtype]\n\t" + "jalr %[exec_insn]\n\t" + // Save vstart and vl just after insn execution for checks. + "csrr %[vstart], vstart\n\t" + "csrr %[vl], vl\n\t" + // Store the result. + "vsetvli t0, zero, e64, m8, ta, ma\n\t" + "vse64.v v8, (%[res])\n\t" + : [vstart] "=&r"(vstart), [vl] "=&r"(vl) + : [exec_insn] "r"(exec_insn), + [src] "r"(src), + [res] "r"(res), + [vtype] "r"(vtype), + [mask] "r"(&kMask) + : "t0", + "ra", + "v0", + "v8", + "v9", + "v10", + "v11", + "v12", + "v13", + "v14", + "v15", + "v16", + "v17", + "v18", + "v19", + "v20", + "v21", + "v22", + "v23", + "v24", + "v25", + "v26", + "v27", + "v28", + "v29", + "v30", + "v31", + "memory"); + // Every vector instruction must set vstart to 0, but shouldn't touch vl. + EXPECT_EQ(vstart, 0); + EXPECT_EQ(vl, vlmax); +} + template <typename... ExpectedResultType> void TestVectorReductionInstruction( + ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, const __v2du (&source)[16], std::tuple<const ExpectedResultType (&)[8], const ExpectedResultType (&)[8]>... expected_result) { @@ -62,7 +135,10 @@ void TestVectorReductionInstruction( // of the possible vlmul, i.e. expected_result_vd0_int8[n] = vd[0], int8, no mask, vlmul=n. // // As vlmul=4 is reserved, expected_result_vd0_*[4] is ignored. - auto Verify = [&source](uint8_t vsew, uint8_t vlmul, const auto& expected_result) { + auto Verify = [&source](ExecInsnFunc exec_insn, + uint8_t vsew, + uint8_t vlmul, + const auto& expected_result) { for (uint8_t vta = 0; vta < 2; ++vta) { for (uint8_t vma = 0; vma < 2; ++vma) { uint64_t vtype = (vma << 7) | (vta << 6) | (vsew << 3) | vlmul; @@ -78,55 +154,7 @@ void TestVectorReductionInstruction( memcpy(&result[index], &kUndisturbedResult, sizeof(result[index])); } - uint64_t vstart, vl; - - asm( // Load arguments and undisturbed result. - "vsetvli t0, zero, e64, m8, ta, ma\n\t" - "vle64.v v8, (%[res])\n\t" - "vle64.v v16, (%[src])\n\t" - "addi t0, %[src], 128\n\t" - "vle64.v v24, (t0)\n\t" - // Execute tested instruction. - "vsetvl t0, zero, %[vtype]\n\t" - "vredsum.vs v8,v16,v24\n\t" - // Save vstart and vl just after insn execution for checks. - "csrr %[vstart], vstart\n\t" - "csrr %[vl], vl\n\t" - // Store the result. - "vsetvli t0, zero, e64, m8, ta, ma\n\t" - "vse64.v v8, (%[res])\n\t" - : [vstart] "=&r"(vstart), [vl] "=&r"(vl) - : [src] "r"(&kVectorCalculationsSource[0]), [res] "r"(&result[0]), [vtype] "r"(vtype) - : "t0", - "v8", - "v9", - "v10", - "v11", - "v12", - "v13", - "v14", - "v15", - "v16", - "v17", - "v18", - "v19", - "v20", - "v21", - "v22", - "v23", - "v24", - "v25", - "v26", - "v27", - "v28", - "v29", - "v30", - "v31", - "memory"); - - // Every vector instruction must set vstart to 0, but shouldn't touch vl. - EXPECT_EQ(vstart, 0); - EXPECT_EQ(vl, vlmax); + RunTwoVectorArgsOneRes(exec_insn, &kVectorCalculationsSource[0], &result[0], vtype, vlmax); // Reduction instructions are unique in that they produce a scalar // output to a single vector register as opposed to a register group. @@ -161,13 +189,21 @@ void TestVectorReductionInstruction( }; for (int vlmul = 0; vlmul < 8; vlmul++) { - // TODO(b/301577077): Also test masked versions. - ((Verify(BitUtilLog2(sizeof(ExpectedResultType)), vlmul, std::get<0>(expected_result)[vlmul]), - ...)); + ((Verify(exec_insn, + BitUtilLog2(sizeof(ExpectedResultType)), + vlmul, + std::get<0>(expected_result)[vlmul]), + Verify(exec_masked_insn, + BitUtilLog2(sizeof(ExpectedResultType)), + vlmul, + std::get<1>(expected_result)[vlmul])), + ...); } } -void TestVectorReductionInstruction(const uint8_t (&expected_result_vd0_int8)[8], +void TestVectorReductionInstruction(ExecInsnFunc exec_insn, + ExecInsnFunc exec_masked_insn, + const uint8_t (&expected_result_vd0_int8)[8], const uint16_t (&expected_result_vd0_int16)[8], const uint32_t (&expected_result_vd0_int32)[8], const uint64_t (&expected_result_vd0_int64)[8], @@ -177,6 +213,8 @@ void TestVectorReductionInstruction(const uint8_t (&expected_result_vd0_int8)[8] const uint64_t (&expected_result_vd0_with_mask_int64)[8], const __v2du (&source)[16]) { TestVectorReductionInstruction( + exec_insn, + exec_masked_insn, source, std::tuple<const uint8_t(&)[8], const uint8_t(&)[8]>{expected_result_vd0_int8, expected_result_vd0_with_mask_int8}, @@ -188,8 +226,20 @@ void TestVectorReductionInstruction(const uint8_t (&expected_result_vd0_int8)[8] expected_result_vd0_with_mask_int64}); } +[[gnu::naked]] void ExecVredsum() { + asm("vredsum.vs v8,v16,v24\n\t" + "ret\n\t"); +} + +[[gnu::naked]] void ExecMaskedVredsum() { + asm("vredsum.vs v8,v16,v24,v0.t\n\t" + "ret\n\t"); +} + TEST(InlineAsmTestRiscv64, TestVredsum) { TestVectorReductionInstruction( + ExecVredsum, + ExecMaskedVredsum, // expected_result_vd0_int8 {242, 228, 200, 144, /* unused */ 0, 146, 44, 121}, // expected_result_vd0_int16 |