aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLev Rumyantsev <levarum@google.com>2024-05-07 05:06:23 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2024-05-07 05:06:23 +0000
commit1718fd24a0927253ab3428ee155e33b8172c8ce4 (patch)
tree81b6c4830e49a832e08a7dbfa8c1f193b7a16e61
parentd23006b7d52ba976a67623ea20c8f898bc7c1129 (diff)
parent4459ca2577c44f3c9f8a2af1210aa73ba428e20b (diff)
downloadbinary_translation-1718fd24a0927253ab3428ee155e33b8172c8ce4.tar.gz
Merge "inline_asm_tests: first riscv test" into main
-rw-r--r--interpreter/riscv64/interpreter.h2
-rw-r--r--tests/inline_asm_tests/Android.bp15
-rw-r--r--tests/inline_asm_tests/main_riscv64.cc240
3 files changed, 256 insertions, 1 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 86d7697b..62de95d7 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1039,7 +1039,7 @@ class Interpreter {
if (!IsAligned<kNumRegistersInGroup>(dst)) {
return Undefined();
}
- if (dst + kNumRegistersInGroup * kSegmentSize >= 32) {
+ if (dst + kNumRegistersInGroup * kSegmentSize > 32) {
return Undefined();
}
constexpr size_t kElementsCount = 16 / sizeof(ElementType);
diff --git a/tests/inline_asm_tests/Android.bp b/tests/inline_asm_tests/Android.bp
index bba729f6..90e082e5 100644
--- a/tests/inline_asm_tests/Android.bp
+++ b/tests/inline_asm_tests/Android.bp
@@ -48,3 +48,18 @@ cc_test {
},
static_executable: true,
}
+
+cc_test {
+ name: "inline_asm_tests_riscv64",
+ native_bridge_supported: true,
+ enabled: false,
+ arch: {
+ riscv64: {
+ enabled: true,
+ srcs: [
+ "main_riscv64.cc",
+ ],
+ },
+ },
+ static_executable: true,
+}
diff --git a/tests/inline_asm_tests/main_riscv64.cc b/tests/inline_asm_tests/main_riscv64.cc
new file mode 100644
index 00000000..6887be47
--- /dev/null
+++ b/tests/inline_asm_tests/main_riscv64.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include <tuple>
+
+namespace {
+
+template <typename T>
+constexpr T BitUtilLog2(T x) {
+ return __builtin_ctz(x);
+}
+
+// TODO(b/301577077): Maybe use __uint128_t instead.
+// Or provide a more versatile wrapper, that one can easily init, copy and compare.
+using __v2du = uint64_t[2];
+
+constexpr __v2du kVectorCalculationsSource[16] = {
+ {0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8908},
+ {0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9918},
+ {0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a928},
+ {0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b938},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978},
+
+ {0x9e0c'9a09'9604'9200, 0x8e1c'8a18'8614'8211},
+ {0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231},
+ {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251},
+ {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271},
+ {0x1e8c'1a89'1684'1280, 0x0e9c'0a98'0694'0291},
+ {0x3eac'3aa9'36a4'32a0, 0x2ebc'2ab8'26b4'22b1},
+ {0x5ecc'5ac9'56c4'52c0, 0x4edc'4ad8'46d4'42d1},
+ {0x7eec'7ae9'76e4'72e0, 0x6efc'6af8'66f4'62f1},
+};
+
+// Easily recognizable bit pattern for target register.
+constexpr __v2du kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
+constexpr __v2du kAgnosticResult = {~uint64_t{0U}, ~uint64_t{0U}};
+
+template <typename... ExpectedResultType>
+void TestVectorReductionInstruction(
+ const __v2du (&source)[16],
+ std::tuple<const ExpectedResultType (&)[8],
+ const ExpectedResultType (&)[8]>... expected_result) {
+ // Each expected_result input to this function is the vd[0] value of the reduction, for each
+ // of the possible vlmul, i.e. expected_result_vd0_int8[n] = vd[0], int8, no mask, vlmul=n.
+ //
+ // As vlmul=4 is reserved, expected_result_vd0_*[4] is ignored.
+ auto Verify = [&source](uint8_t vsew, uint8_t vlmul, const auto& expected_result) {
+ for (uint8_t vta = 0; vta < 2; ++vta) {
+ for (uint8_t vma = 0; vma < 2; ++vma) {
+ uint64_t vtype = (vma << 7) | (vta << 6) | (vsew << 3) | vlmul;
+ uint64_t vlmax = 0;
+ asm("vsetvl %0, zero, %1" : "=r"(vlmax) : "r"(vtype));
+ if (vlmax == 0) {
+ continue;
+ }
+
+ __v2du result[8];
+ // Set undisturbed result vector registers.
+ for (size_t index = 0; index < 8; ++index) {
+ memcpy(&result[index], &kUndisturbedResult, sizeof(result[index]));
+ }
+
+ uint64_t vstart, vl;
+
+ asm( // Load arguments and undisturbed result.
+ "vsetvli t0, zero, e64, m8, ta, ma\n\t"
+ "vle64.v v8, (%[res])\n\t"
+ "vle64.v v16, (%[src])\n\t"
+ "addi t0, %[src], 128\n\t"
+ "vle64.v v24, (t0)\n\t"
+ // Execute tested instruction.
+ "vsetvl t0, zero, %[vtype]\n\t"
+ "vredsum.vs v8,v16,v24\n\t"
+ // Save vstart and vl just after insn execution for checks.
+ "csrr %[vstart], vstart\n\t"
+ "csrr %[vl], vl\n\t"
+ // Store the result.
+ "vsetvli t0, zero, e64, m8, ta, ma\n\t"
+ "vse64.v v8, (%[res])\n\t"
+ : [vstart] "=&r"(vstart), [vl] "=&r"(vl)
+ : [src] "r"(&kVectorCalculationsSource[0]), [res] "r"(&result[0]), [vtype] "r"(vtype)
+ : "t0",
+ "v8",
+ "v9",
+ "v10",
+ "v11",
+ "v12",
+ "v13",
+ "v14",
+ "v15",
+ "v16",
+ "v17",
+ "v18",
+ "v19",
+ "v20",
+ "v21",
+ "v22",
+ "v23",
+ "v24",
+ "v25",
+ "v26",
+ "v27",
+ "v28",
+ "v29",
+ "v30",
+ "v31",
+ "memory");
+
+ // Every vector instruction must set vstart to 0, but shouldn't touch vl.
+ EXPECT_EQ(vstart, 0);
+ EXPECT_EQ(vl, vlmax);
+
+ // Reduction instructions are unique in that they produce a scalar
+ // output to a single vector register as opposed to a register group.
+ // This allows us to take some short-cuts when validating:
+ //
+ // - The mask setting is only useful during computation, as the body
+ // of the destination is always only element 0, which will always be
+ // written to, regardless of mask setting.
+ // - The tail is guaranteed to be 1..VLEN/SEW, so the vlmul setting
+ // does not affect the elements that the tail policy applies to in the
+ // destination register.
+
+ // Verify that the destination register holds the reduction in the
+ // first element and the tail policy applies to the remaining.
+ __uint128_t expected_result_register;
+ if (vta) {
+ memcpy(&expected_result_register, &kAgnosticResult, sizeof(expected_result_register));
+ } else {
+ memcpy(&expected_result_register, &kUndisturbedResult, sizeof(expected_result_register));
+ }
+ size_t vsew_bits = 8 << vsew;
+ expected_result_register = (expected_result_register >> vsew_bits) << vsew_bits;
+ expected_result_register |= expected_result;
+ EXPECT_TRUE(memcmp(&result[0], &expected_result_register, sizeof(result[0])) == 0);
+
+ // Verify all non-destination registers are undisturbed.
+ for (size_t index = 1; index < 8; ++index) {
+ EXPECT_TRUE(memcmp(&result[index], &kUndisturbedResult, sizeof(result[index])) == 0);
+ }
+ }
+ }
+ };
+
+ for (int vlmul = 0; vlmul < 8; vlmul++) {
+ // TODO(b/301577077): Also test masked versions.
+ ((Verify(BitUtilLog2(sizeof(ExpectedResultType)), vlmul, std::get<0>(expected_result)[vlmul]),
+ ...));
+ }
+}
+
+void TestVectorReductionInstruction(const uint8_t (&expected_result_vd0_int8)[8],
+ const uint16_t (&expected_result_vd0_int16)[8],
+ const uint32_t (&expected_result_vd0_int32)[8],
+ const uint64_t (&expected_result_vd0_int64)[8],
+ const uint8_t (&expected_result_vd0_with_mask_int8)[8],
+ const uint16_t (&expected_result_vd0_with_mask_int16)[8],
+ const uint32_t (&expected_result_vd0_with_mask_int32)[8],
+ const uint64_t (&expected_result_vd0_with_mask_int64)[8],
+ const __v2du (&source)[16]) {
+ TestVectorReductionInstruction(
+ source,
+ std::tuple<const uint8_t(&)[8], const uint8_t(&)[8]>{expected_result_vd0_int8,
+ expected_result_vd0_with_mask_int8},
+ std::tuple<const uint16_t(&)[8], const uint16_t(&)[8]>{expected_result_vd0_int16,
+ expected_result_vd0_with_mask_int16},
+ std::tuple<const uint32_t(&)[8], const uint32_t(&)[8]>{expected_result_vd0_int32,
+ expected_result_vd0_with_mask_int32},
+ std::tuple<const uint64_t(&)[8], const uint64_t(&)[8]>{expected_result_vd0_int64,
+ expected_result_vd0_with_mask_int64});
+}
+
+TEST(InlineAsmTestRiscv64, TestVredsum) {
+ TestVectorReductionInstruction(
+ // expected_result_vd0_int8
+ {242, 228, 200, 144, /* unused */ 0, 146, 44, 121},
+ // expected_result_vd0_int16
+ {0x0172, 0x82e4, 0x88c8, 0xa090, /* unused */ 0, 0x1300, 0xa904, 0xe119},
+ // expected_result_vd0_int32
+ {0xcb44'b932,
+ 0x9407'71e4,
+ 0xa70e'64c8,
+ 0xd312'5090,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0xb713'ad09},
+ // expected_result_vd0_int64
+ {0xb32f'a926'9f1b'9511,
+ 0x1f99'0d88'fb74'e962,
+ 0xb92c'970e'74e8'52c4,
+ 0xef4e'ad14'6aca'2888,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
+ // expected_result_vd0_with_mask_int8
+ {39, 248, 142, 27, /* unused */ 0, 0, 154, 210},
+ // expected_result_vd0_with_mask_int16
+ {0x5f45, 0xc22f, 0x99d0, 0x98bf, /* unused */ 0, 0x1300, 0x1300, 0x4b15},
+ // expected_result_vd0_with_mask_int32
+ {0x2d38'1f29,
+ 0x99a1'838a,
+ 0x1989'ef5c,
+ 0x9cf4'4aa1,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0x1907'1300},
+ // expected_result_vd0_with_mask_int64
+ {0x2513'1f0e'1907'1300,
+ 0x917c'8370'7560'6751,
+ 0x4e56'3842'222a'0c13,
+ 0xc833'9e0e'73df'49b5,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
+ kVectorCalculationsSource);
+}
+
+} // namespace