aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPrashanth Swaminathan <prashanthsw@google.com>2023-12-13 19:12:45 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-12-13 19:12:45 +0000
commit8d3d05e15a263fd548eea28bbd5c40e3c96dae2c (patch)
tree19f5b6cd2b66a039114ab1925d56e1376f3fee6a
parentcdbf77f85400050bbb295c256f3ccc74f11158ae (diff)
parent4140c70518a16a73845ff8c5cf1fa29323e67354 (diff)
downloadbinary_translation-8d3d05e15a263fd548eea28bbd5c40e3c96dae2c.tar.gz
Merge "Enable vmacc.[vx|vv] instructions" into main
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h46
-rw-r--r--decoder/include/berberis/decoder/riscv64/semantics_player.h13
-rw-r--r--interpreter/riscv64/interpreter.cc56
-rw-r--r--interpreter/riscv64/interpreter_test.cc68
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h18
5 files changed, 195 insertions, 6 deletions
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index f4c6021f..7ad556c1 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -340,6 +340,11 @@ class Decoder {
kMaxValue = 0b111111
};
+ enum class VOpMVvOpcode : uint8_t {
+ kVmaccvv = 0b101101,
+ kMaxValue = 0b111111
+ };
+
enum class VOpIVxOpcode : uint8_t {
kVaddvx = 0b000000,
kVsubvx = 0b000010,
@@ -384,6 +389,11 @@ class Decoder {
kMaxValue = 0b111111
};
+ enum class VOpMVxOpcode : uint8_t {
+ kVmaccvx = 0b101101,
+ kMaxValue = 0b111111
+ };
+
// Load/Store instruction include 3bit “width” field while all other floating-point instructions
// include 2bit “fmt” field.
//
@@ -636,6 +646,14 @@ class Decoder {
uint8_t src2;
};
+ struct VOpMVvArgs {
+ VOpMVvOpcode opcode;
+ bool vm;
+ uint8_t dst;
+ uint8_t src1;
+ uint8_t src2;
+ };
+
struct VOpIVxArgs {
VOpIVxOpcode opcode;
bool vm;
@@ -644,6 +662,14 @@ class Decoder {
uint8_t src2;
};
+ struct VOpMVxArgs {
+ VOpMVxOpcode opcode;
+ bool vm;
+ uint8_t dst;
+ uint8_t src1;
+ uint8_t src2;
+ };
+
struct VsetivliArgs {
uint8_t dst;
uint8_t avl;
@@ -1721,6 +1747,16 @@ class Decoder {
};
return insn_consumer_->OpVector(args);
}
+ case 0b010: {
+ const VOpMVvArgs args = {
+ .opcode = VOpMVvOpcode(opcode),
+ .vm = vm,
+ .dst = dst,
+ .src1 = src1,
+ .src2 = src2,
+ };
+ return insn_consumer_->OpVector(args);
+ }
case 0b011: {
const VOpIViArgs args = {
.opcode = VOpIViOpcode(opcode),
@@ -1741,6 +1777,16 @@ class Decoder {
};
return insn_consumer_->OpVector(args);
}
+ case 0b110: {
+ const VOpMVxArgs args = {
+ .opcode = VOpMVxOpcode(opcode),
+ .vm = vm,
+ .dst = dst,
+ .src1 = src1,
+ .src2 = src2,
+ };
+ return insn_consumer_->OpVector(args);
+ }
case 0b111:
if (GetBits<uint8_t, 31, 1>() == 0) {
const VsetvliArgs args = {
diff --git a/decoder/include/berberis/decoder/riscv64/semantics_player.h b/decoder/include/berberis/decoder/riscv64/semantics_player.h
index cfa707c7..faddb48c 100644
--- a/decoder/include/berberis/decoder/riscv64/semantics_player.h
+++ b/decoder/include/berberis/decoder/riscv64/semantics_player.h
@@ -801,6 +801,12 @@ class SemanticsPlayer {
listener_->OpVector(args);
}
+ void OpVector(const typename Decoder::VOpMVvArgs& args) {
+ // TODO(300690740): develop and implement strategy which would allow us to support vector
+ // intrinsics not just in the interpreter.
+ listener_->OpVector(args);
+ }
+
void OpVector(const typename Decoder::VOpIVxArgs& args) {
// TODO(300690740): develop and implement strategy which would allow us to support vector
// intrinsics not just in the interpreter.
@@ -808,6 +814,13 @@ class SemanticsPlayer {
listener_->OpVector(args, arg2);
}
+ void OpVector(const typename Decoder::VOpMVxArgs& args) {
+ // TODO(300690740): develop and implement strategy which would allow us to support vector
+ // intrinsics not just in the interpreter.
+ Register arg2 = GetRegOrZero(args.src2);
+ listener_->OpVector(args, arg2);
+ }
+
void Vsetivli(const typename Decoder::VsetivliArgs& args) {
// Note: it's unclear whether args.avl should be treated similarly to x0 in Vsetvli or not.
// Keep implementation separate from Vsetvli to make it easier to adjust that code.
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index 6785b4fa..b48d9349 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -652,6 +652,17 @@ class Interpreter {
}
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
+ void OpVector(const Decoder::VOpMVvArgs& args) {
+ switch (args.opcode) {
+ case Decoder::VOpMVvOpcode::kVmaccvv:
+ return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, args.src2);
+ default:
+ Unimplemented();
+ }
+ }
+
+ template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
void OpVector(const Decoder::VOpIVxArgs& args, Register arg2) {
switch (args.opcode) {
case Decoder::VOpIVxOpcode::kVaddvx:
@@ -716,6 +727,17 @@ class Interpreter {
}
}
+ template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
+ void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
+ switch (args.opcode) {
+ case Decoder::VOpMVxOpcode::kVmaccvx:
+ return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, arg2);
+ default:
+ Unimplemented();
+ }
+ }
+
template <auto Intrinsic,
typename ElementType,
VectorRegisterGroupMultiplier vlmul,
@@ -902,6 +924,23 @@ class Interpreter {
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
InactiveProcessing vma>
+ void OpVector(const Decoder::VOpMVvArgs& args) {
+ switch (args.opcode) {
+ case Decoder::VOpMVvOpcode::kVmaccvv:
+ return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, args.src2);
+ default:
+ Unimplemented();
+ }
+ }
+
+ template <typename ElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ InactiveProcessing vma>
void OpVector(const Decoder::VOpIVxArgs& args, Register arg2) {
switch (args.opcode) {
case Decoder::VOpIVxOpcode::kVaddvx:
@@ -978,6 +1017,23 @@ class Interpreter {
}
}
+ template <typename ElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ InactiveProcessing vma>
+ void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
+ switch (args.opcode) {
+ case Decoder::VOpMVxOpcode::kVmaccvx:
+ return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, arg2);
+ default:
+ Unimplemented();
+ }
+ }
+
template <auto Intrinsic,
typename ElementType,
VectorRegisterGroupMultiplier vlmul,
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 49ed2776..724c4892 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -1785,6 +1785,74 @@ TEST_F(Riscv64InterpreterTest, TestVsll) {
{0xb3ab'a39b'938b'8000, 0xf3eb'e3db'd3cb'c000}});
}
+TEST_F(Riscv64InterpreterTest, TestVmacc) {
+ TestVectorInstruction(0xb5882457, // vmacc.vv v8, v16, v24, v0.t
+ {{85, 87, 93, 103, 121, 135, 157, 183, 221, 247, 29, 71, 117, 167, 221, 23},
+ {85, 151, 221, 39, 137, 199, 29, 119, 237, 55, 157, 7, 117, 231, 93, 215},
+ {85, 215, 93, 231, 153, 7, 157, 55, 253, 119, 29, 199, 117, 39, 221, 151},
+ {85, 23, 221, 167, 169, 71, 29, 247, 13, 183, 157, 135, 117, 103, 93, 87},
+ {85, 87, 93, 103, 185, 135, 157, 183, 29, 247, 29, 71, 117, 167, 221, 23},
+ {85, 151, 221, 39, 201, 199, 29, 119, 45, 55, 157, 7, 117, 231, 93, 215},
+ {85, 215, 93, 231, 217, 7, 157, 55, 61, 119, 29, 199, 117, 39, 221, 151},
+ {85, 23, 221, 167, 233, 71, 29, 247, 77, 183, 157, 135, 117, 103, 93, 87}},
+ {{0x5555, 0x6d5d, 0xaa79, 0xfd9d, 0x7edd, 0x0e1d, 0xc675, 0x9edd},
+ {0x9755, 0xafdd, 0xfd89, 0x411d, 0xd2ed, 0x529d, 0x0b75, 0xe45d},
+ {0xdd55, 0xf65d, 0x5499, 0x889d, 0x2afd, 0x9b1d, 0x5475, 0x2ddd},
+ {0x2755, 0x40dd, 0xafa9, 0xd41d, 0x870d, 0xe79d, 0xa175, 0x7b5d},
+ {0x7555, 0x8f5d, 0x0eb9, 0x239d, 0xe71d, 0x381d, 0xf275, 0xccdd},
+ {0xc755, 0xe1dd, 0x71c9, 0x771d, 0x4b2d, 0x8c9d, 0x4775, 0x225d},
+ {0x1d55, 0x385d, 0xd8d9, 0xce9d, 0xb33d, 0xe51d, 0xa075, 0x7bdd},
+ {0x7755, 0x92dd, 0x43e9, 0x2a1d, 0x1f4d, 0x419d, 0xfd75, 0xd95d}},
+ {{0x5d57'5555, 0x44ed'aa79, 0x2a42'7edd, 0x0149'c675},
+ {0xe41b'9755, 0xdec3'fd89, 0xc71a'd2ed, 0x9114'0b75},
+ {0x76e7'dd55, 0x84a2'5499, 0x6ffb'2afd, 0x2ce6'5475},
+ {0x15bc'2755, 0x3688'afa9, 0x24e3'870d, 0xd4c0'a175},
+ {0xc098'7555, 0xf477'0eb9, 0xe5d3'e71d, 0x88a2'f275},
+ {0x777c'c755, 0xbe6d'71c9, 0xb2cc'4b2d, 0x488d'4775},
+ {0x3a69'1d55, 0x946b'd8d9, 0x8bcc'b33d, 0x147f'a075},
+ {0x095d'7755, 0x7672'43e9, 0x70d5'1f4d, 0xec79'fd75}},
+ {{0xc89d'7e69'5d57'5555, 0x5ace'6e38'2a42'7edd},
+ {0xebfd'5b02'e41b'9755, 0x8c3a'54d9'c71a'd2ed},
+ {0x2b75'4bac'76e7'dd55, 0xd9be'4f8b'6ffb'2afd},
+ {0x8705'5066'15bc'2755, 0x435a'5e4d'24e3'870d},
+ {0xfead'692f'c098'7555, 0xc90e'811e'e5d3'e71d},
+ {0x926d'9609'777c'c755, 0x6ada'b800'b2cc'4b2d},
+ {0x4245'd6f3'3a69'1d55, 0x28bf'02f2'8bcc'b33d},
+ {0x0e36'2bed'095d'7755, 0x02bb'61f4'70d5'1f4d}});
+ TestVectorInstruction(0xb500e457, // vmacc.vx v8, x1, v16, v0.t
+ {{85, 255, 169, 83, 253, 167, 81, 251, 165, 79, 249, 163, 77, 247, 161, 75},
+ {245, 159, 73, 243, 157, 71, 241, 155, 69, 239, 153, 67, 237, 151, 65, 235},
+ {149, 63, 233, 147, 61, 231, 145, 59, 229, 143, 57, 227, 141, 55, 225, 139},
+ {53, 223, 137, 51, 221, 135, 49, 219, 133, 47, 217, 131, 45, 215, 129, 43},
+ {213, 127, 41, 211, 125, 39, 209, 123, 37, 207, 121, 35, 205, 119, 33, 203},
+ {117, 31, 201, 115, 29, 199, 113, 27, 197, 111, 25, 195, 109, 23, 193, 107},
+ {21, 191, 105, 19, 189, 103, 17, 187, 101, 15, 185, 99, 13, 183, 97, 11},
+ {181, 95, 9, 179, 93, 7, 177, 91, 5, 175, 89, 3, 173, 87, 1, 171}},
+ {{0xff55, 0xa8a9, 0x51fd, 0xfb51, 0xa4a5, 0x4df9, 0xf74d, 0xa0a1},
+ {0x49f5, 0xf349, 0x9c9d, 0x45f1, 0xef45, 0x9899, 0x41ed, 0xeb41},
+ {0x9495, 0x3de9, 0xe73d, 0x9091, 0x39e5, 0xe339, 0x8c8d, 0x35e1},
+ {0xdf35, 0x8889, 0x31dd, 0xdb31, 0x8485, 0x2dd9, 0xd72d, 0x8081},
+ {0x29d5, 0xd329, 0x7c7d, 0x25d1, 0xcf25, 0x7879, 0x21cd, 0xcb21},
+ {0x7475, 0x1dc9, 0xc71d, 0x7071, 0x19c5, 0xc319, 0x6c6d, 0x15c1},
+ {0xbf15, 0x6869, 0x11bd, 0xbb11, 0x6465, 0x0db9, 0xb70d, 0x6061},
+ {0x09b5, 0xb309, 0x5c5d, 0x05b1, 0xaf05, 0x5859, 0x01ad, 0xab01}},
+ {{0x5353'ff55, 0xfb51'51fd, 0xa34e'a4a5, 0x4b4b'f74d},
+ {0xf349'49f5, 0x9b46'9c9d, 0x4343'ef45, 0xeb41'41ed},
+ {0x933e'9495, 0x3b3b'e73d, 0xe339'39e5, 0x8b36'8c8d},
+ {0x3333'df35, 0xdb31'31dd, 0x832e'8485, 0x2b2b'd72d},
+ {0xd329'29d5, 0x7b26'7c7d, 0x2323'cf25, 0xcb21'21cd},
+ {0x731e'7475, 0x1b1b'c71d, 0xc319'19c5, 0x6b16'6c6d},
+ {0x1313'bf15, 0xbb11'11bd, 0x630e'6465, 0x0b0b'b70d},
+ {0xb309'09b5, 0x5b06'5c5d, 0x0303'af05, 0xab01'01ad}},
+ {{0xfb51'51fd'5353'ff55, 0xa0a1'4ca2'a34e'a4a5},
+ {0x45f1'4747'f349'49f5, 0xeb41'41ed'4343'ef45},
+ {0x9091'3c92'933e'9495, 0x35e1'3737'e339'39e5},
+ {0xdb31'31dd'3333'df35, 0x8081'2c82'832e'8485},
+ {0x25d1'2727'd329'29d5, 0xcb21'21cd'2323'cf25},
+ {0x7071'1c72'731e'7475, 0x15c1'1717'c319'19c5},
+ {0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465},
+ {0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}});
+}
} // namespace
} // namespace berberis
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index 48c32d1a..bb3b19d9 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -90,12 +90,14 @@ inline std::tuple<SIMD128Register> VectorArithmetic(Lambda lambda,
}
if (vstart == 0 && vl == static_cast<int>(16 / sizeof(ElementType))) {
for (int index = vstart; index < vl; ++index) {
- result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index);
+ result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index),
+ VectorElement<ElementType>(source, index)...), index);
}
} else {
#pragma clang loop unroll(disable)
for (int index = vstart; index < vl; ++index) {
- result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index);
+ result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index),
+ VectorElement<ElementType>(source, index)...), index);
}
if constexpr (vta == TailProcessing::kAgnostic) {
if (vl < static_cast<int>(16 / sizeof(ElementType))) {
@@ -140,7 +142,8 @@ inline std::tuple<SIMD128Register> VectorArithmetic(Lambda lambda,
#pragma clang loop unroll(disable)
for (int index = vstart; index < vl; ++index) {
if (mask & (1 << index)) {
- result.Set<ElementType>(lambda(VectorElement<ElementType>(source, index)...), index);
+ result.Set<ElementType>(lambda(VectorElement<ElementType>(result, index),
+ VectorElement<ElementType>(source, index)...), index);
} else if constexpr (vma == InactiveProcessing::kAgnostic) {
result.Set<ElementType>(fill_value, index);
}
@@ -183,7 +186,7 @@ inline ElementType mask_bits(ElementType val) {
SIMD128Register result, \
DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS parameters) { \
return VectorArithmetic<ElementType, vta>( \
- [](auto... args) { \
+ []([[maybe_unused]] auto vd, auto... args) { \
static_assert((std::is_same_v<decltype(args), ElementType> && ...)); \
arithmetic; \
}, \
@@ -204,7 +207,7 @@ inline ElementType mask_bits(ElementType val) {
SIMD128Register result, \
DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS parameters) { \
return VectorArithmetic<ElementType, vta, vma>( \
- [](auto... args) { \
+ []([[maybe_unused]] auto vd, auto... args) { \
static_assert((std::is_same_v<decltype(args), ElementType> && ...)); \
arithmetic; \
}, \
@@ -246,7 +249,10 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...};
(arg1 << mask_bits(arg2)))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...};
(arg1 << mask_bits(arg2)))
-
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...};
+ ((arg1 * arg2) + vd));
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...};
+ ((arg1 * arg2) + vd));
#undef DEFINE_ARITHMETIC_INTRINSIC
#undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS