aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohamed Elbashir Younes Snosy <mohamedsamir151298@gmail.com>2024-03-06 01:49:08 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2024-03-06 01:49:08 +0000
commit04f2e59514f902c0535b11cb7aca28a48da8c2ea (patch)
tree6ae93ff449d93bdec19507acaecb76ab7324aacb
parentc71f5d038c3bac654c9e47f1d97210d757958e8b (diff)
parentf6483bc069f2d96d809a3523f15c954e12ba3beb (diff)
downloadbinary_translation-temp_319669529.tar.gz
Implement vfmul and vidiv instructions. am: f6483bc069temp_319669529
Original change: https://android-review.googlesource.com/c/platform/frameworks/libs/binary_translation/+/2986213 Change-Id: I849b187f91ae0fe64240c431b4fc882e97ae73a7 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--interpreter/riscv64/interpreter.h78
-rw-r--r--interpreter/riscv64/interpreter_test.cc93
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h88
3 files changed, 218 insertions, 41 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 6ce41660..09df485b 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1227,6 +1227,27 @@ class Interpreter {
case Decoder::VOpFVfOpcode::kVmfgevf:
return OpVectormvx<intrinsics::Vfgevx<ElementType>, ElementType, vlmul, vma>(
args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfdivvf:
+ return OpVectorvx<intrinsics::Vfdivvf<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfrdivvf:
+ return OpVectorvx<intrinsics::Vfrdivvf<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfmulvf:
+ return OpVectorvx<intrinsics::Vfmulvf<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1, arg2);
default:
return Unimplemented();
}
@@ -1478,6 +1499,20 @@ class Interpreter {
case Decoder::VOpFVvOpcode::kVmfnevv:
return OpVectormvv<intrinsics::Vfnevv<ElementType>, ElementType, vlmul, vma>(
args.dst, args.src1, args.src2);
+ case Decoder::VOpFVvOpcode::kVfdivvv:
+ return OpVectorvv<intrinsics::Vfdivvv<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1, args.src2);
+ case Decoder::VOpFVvOpcode::kVfmulvv:
+ return OpVectorvv<intrinsics::Vfmulvv<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ CsrName::kFrm>(args.dst, args.src1, args.src2);
default:
break; // Make compiler happy.
}
@@ -2568,17 +2603,23 @@ class Interpreter {
typename ElementType,
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
- auto vma>
+ auto vma,
+ CsrName... kExtraCsrs>
void OpVectorvv(uint8_t dst, uint8_t src1, uint8_t src2) {
- return OpVectorvv<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(
- dst, src1, src2);
+ return OpVectorvv<Intrinsic,
+ ElementType,
+ NumberOfRegistersInvolved(vlmul),
+ vta,
+ vma,
+ kExtraCsrs...>(dst, src1, src2);
}
template <auto Intrinsic,
typename ElementType,
size_t kRegistersInvolved,
TailProcessing vta,
- auto vma>
+ auto vma,
+ CsrName... kExtraCsrs>
void OpVectorvv(uint8_t dst, uint8_t src1, uint8_t src2) {
if (!IsAligned<kRegistersInvolved>(dst | src1 | src2)) {
return Unimplemented();
@@ -2597,7 +2638,12 @@ class Interpreter {
SIMD128Register arg1{state_->cpu.v[src1 + index]};
SIMD128Register arg2{state_->cpu.v[src2 + index]};
result = VectorMasking<ElementType, vta, vma>(
- result, std::get<0>(Intrinsic(arg1, arg2)), vstart, vl, index, mask);
+ result,
+ std::get<0>(Intrinsic(GetCsr<kExtraCsrs>()..., arg1, arg2)),
+ vstart,
+ vl,
+ index,
+ mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
}
@@ -2751,17 +2797,22 @@ class Interpreter {
VectorRegisterGroupMultiplier vlmul,
TailProcessing vta,
auto vma,
- typename... DstMaskType>
- void OpVectorvx(uint8_t dst, uint8_t src1, ElementType arg2, DstMaskType... dst_mask) {
- return OpVectorvx<Intrinsic, ElementType, NumberOfRegistersInvolved(vlmul), vta, vma>(
- dst, src1, arg2, dst_mask...);
+ CsrName... kExtraCsrs>
+ void OpVectorvx(uint8_t dst, uint8_t src1, ElementType arg2) {
+ return OpVectorvx<Intrinsic,
+ ElementType,
+ NumberOfRegistersInvolved(vlmul),
+ vta,
+ vma,
+ kExtraCsrs...>(dst, src1, arg2);
}
template <auto Intrinsic,
typename ElementType,
size_t kRegistersInvolved,
TailProcessing vta,
- auto vma>
+ auto vma,
+ CsrName... kExtraCsrs>
void OpVectorvx(uint8_t dst, uint8_t src1, ElementType arg2) {
if (!IsAligned<kRegistersInvolved>(dst | src1)) {
return Unimplemented();
@@ -2779,7 +2830,12 @@ class Interpreter {
SIMD128Register result(state_->cpu.v[dst + index]);
SIMD128Register arg1(state_->cpu.v[src1 + index]);
result = VectorMasking<ElementType, vta, vma>(
- result, std::get<0>(Intrinsic(arg1, arg2)), vstart, vl, index, mask);
+ result,
+ std::get<0>(Intrinsic(GetCsr<kExtraCsrs>()..., arg1, arg2)),
+ vstart,
+ vl,
+ index,
+ mask);
state_->cpu.v[dst + index] = result.Get<__uint128_t>();
}
}
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index ba5fe0a5..46f9a448 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -7890,6 +7890,42 @@ TEST_F(Riscv64InterpreterTest, TestVmul) {
{0x65bb'6712'1313'69c0, 0x0b0b'61b7'630e'0f10},
{0xb05b'5c5c'b308'b460, 0x55ab'5702'0303'59b0}},
kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x910c1457, // vfmul.vv v8, v16, v24, v0.t
+ {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x8000'02f0, 0x85ca'89ec, 0x91d9'a3e9, 0x9de9'3ee6},
+ {0xa9f9'5ae5, 0xb604'fbf4, 0xc20d'8af5, 0xce16'5a77},
+ {0xda1f'6a7a, 0xe628'bafe, 0xf232'4c02, 0xfe3c'1d87},
+ {0x0a49'9dd9, 0x165a'3ee4, 0x226b'60ef, 0x2e7d'03f9},
+ {0x3a87'9403, 0x4690'e68c, 0x529a'7994, 0x5ea4'4d1d},
+ {0x6aae'6126, 0x76b8'b5b2, 0x7f80'0000, 0x7f80'0000},
+ {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}},
+ {{0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
+ {0x8553'e032'b59e'2bf7, 0x9d6b'012b'925d'8532},
+ {0xb584'0511'cdec'af2c, 0xcd9b'2e22'd263'd03f},
+ {0xe5b4'2a11'269b'b302, 0xfdcb'5b3a'52ca'9bed},
+ {0x15e4'4f30'bfab'3779, 0x2dfb'8872'1391'e83b},
+ {0x4614'7470'991b'3c90, 0x5e2b'b5ca'14b9'b52b},
+ {0x7644'99d0'b2eb'c249, 0x7ff0'0000'0000'0000},
+ {0x7ff0'0000'0000'0000, 0x7ff0'0000'0000'0000}},
+ kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x9100d457, // vfmul.vf v8, v16, f1, v0.t
+ {{0x8437'8568, 0x883d'2b0e, 0x8c42'd0b3, 0x9048'7659},
+ {0x944e'1bfe, 0x9853'c1a4, 0x9c59'674a, 0xa05f'0cef},
+ {0xa464'b295, 0xa86a'583b, 0xac6f'fde0, 0xb075'a386},
+ {0xb47b'492c, 0xb880'7769, 0xbc83'4a3b, 0xc086'1d0e},
+ {0xc488'efe1, 0xc88b'c2b4, 0xcc8e'9587, 0xd091'6859},
+ {0xd494'3b2c, 0xd897'0dff, 0xdc99'e0d2, 0xe09c'b3a5},
+ {0xe49f'8678, 0xe8a2'594a, 0xeca5'2c1d, 0xf0a7'fef0},
+ {0xf4aa'd1c3, 0xf8ad'a496, 0xfcb0'7768, 0xff80'0000}},
+ {{0x872f'ab0e'583b'8568, 0x8f35'7b2c'd1c3'685a},
+ {0x973f'c1a4'eed2'1bfe, 0x9f45'8678'1d0e'b3a5},
+ {0xa74f'd83b'8568'b295, 0xaf55'91c3'6859'fef0},
+ {0xb75f'eed2'1bff'492c, 0xbf65'9d0e'b3a5'4a3b},
+ {0xc770'02b4'594a'efe1, 0xcf75'a859'fef0'9587},
+ {0xd780'0dff'a496'3b2c, 0xdf85'b3a5'4a3b'e0d2},
+ {0xe790'194a'efe1'8678, 0xef95'bef0'9587'2c1d},
+ {0xf7a0'2496'3b2c'd1c3, 0xffa5'ca3b'e0d2'7768}},
+ kVectorCalculationsSourceLegacy);
}
TEST_F(Riscv64InterpreterTest, TestVmulh) {
@@ -8111,6 +8147,63 @@ TEST_F(Riscv64InterpreterTest, TestVmulhsu) {
kVectorCalculationsSourceLegacy);
}
+TEST_F(Riscv64InterpreterTest, TestVdiv) {
+ TestVectorFloatInstruction(0x810c1457, // vfdiv.vv v8, v16, v24, v0.t
+ {{0xbc7d'1561, 0xb875'e8eb, 0xb46f'83ae, 0xb069'c6ec},
+ {0xac64'99e5, 0xa85f'e87d, 0xa45b'a22e, 0xa057'b943},
+ {0x9c54'2241, 0x9850'd382, 0x944d'c4d2, 0x904a'ef31},
+ {0x8c48'4c98, 0x8845'd7d4, 0x8443'8c62, 0x8060'b328},
+ {0x7c3c'206f, 0x7835'0888, 0x742e'b4f5, 0x7029'0782},
+ {0x6c23'e7dc, 0x681f'423a, 0x641b'0659, 0x6017'26b8},
+ {0x5c13'980b, 0x5810'50ca, 0x540d'48e2, 0x500a'7968},
+ {0x4c07'dc6c, 0x4805'6ccb, 0x4403'260f, 0x4001'0454}},
+ {{0xb8e9'b361'617b'3332, 0xb0e1'64f5'e24e'7813},
+ {0xa8d9'a850'c33d'b3c7, 0xa0d1'5a44'cf64'f786},
+ {0x98c9'9d59'4646'6ce0, 0x90c1'4fab'f702'438e},
+ {0x88b9'927a'9559'd99b, 0x80b1'452b'0727'cc70},
+ {0x78a9'87b4'5cbc'33ee, 0x70a1'3ac1'af47'5dc7},
+ {0x6899'7d06'4a29'0e6f, 0x6091'306f'a03b'130b},
+ {0x5889'7270'0ccb'2650, 0x5081'2634'8c3d'81c9},
+ {0x4879'67f1'5534'6be6, 0x4071'1c10'26e2'17fd}},
+ kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x8100d457, // vfdiv.vf v8, v16, f1, v0.t
+ {{0x81b9'9b06, 0x85bf'5117, 0x89c5'0728, 0x8dca'bd39},
+ {0x91d0'734a, 0x95d6'295b, 0x99db'df6c, 0x9de1'957d},
+ {0xa1e7'4b8e, 0xa5ed'019f, 0xa9f2'b7b0, 0xadf8'6dc1},
+ {0xb1fe'23d2, 0xb601'ecf2, 0xba04'c7fa, 0xbe07'a303},
+ {0xc20a'7e0b, 0xc60d'5914, 0xca10'341c, 0xce13'0f25},
+ {0xd215'ea2e, 0xd618'c536, 0xda1b'a03f, 0xde1e'7b47},
+ {0xe221'5650, 0xe624'3158, 0xea27'0c61, 0xee29'e769},
+ {0xf22c'c272, 0xf62f'9d7a, 0xfa32'7883, 0xfe35'538b}},
+ {{0x86e0'0391'6e3a'ab61, 0x8ee5'b9a2'8501'cd89},
+ {0x96f0'0efd'9068'39a5, 0x9ef5'c50e'a72f'5bcd},
+ {0xa700'1a69'b295'c7e9, 0xaf05'd07a'c95c'ea11},
+ {0xb710'25d5'd4c3'562e, 0xbf15'dbe6'eb8a'7855},
+ {0xc720'3141'f6f0'e472, 0xcf25'e753'0db8'069a},
+ {0xd730'3cae'191e'72b6, 0xdf35'f2bf'2fe5'94de},
+ {0xe740'481a'3b4c'00fa, 0xef45'fe2b'5213'2322},
+ {0xf750'5386'5d79'8f3f, 0xff56'0997'7440'b166}},
+ kVectorCalculationsSourceLegacy);
+ TestVectorFloatInstruction(0x8500d457, // vfrdiv.vf v8, v16, f1, v0.t
+ {{0xfd30'8be3, 0xf92b'46b3, 0xf526'4fba, 0xf121'a05c},
+ {0xed1d'32b7, 0xe919'0189, 0xe515'081b, 0xe111'4231},
+ {0xdd0d'abfb, 0xd90a'4206, 0xd507'0132, 0xd103'e6a8},
+ {0xcd00'efd5, 0xc8fc'34b9, 0xc4f6'c831, 0xc0f1'9620},
+ {0xbcec'9ae8, 0xb8e7'd337, 0xb4e3'3bfd, 0xb0de'd268},
+ {0xacda'93da, 0xa8d6'7deb, 0xa4d2'8e5f, 0xa0ce'c322},
+ {0x9ccb'1a46, 0x98c7'9200, 0x94c4'28a6, 0x90c0'dca6},
+ {0x8cbd'ac8d, 0x88ba'96ff, 0x84b7'9ab5, 0x80b4'b67d}},
+ {{0xf8ff'f8de'ba96'50ff, 0xf0f7'9132'204d'3f73},
+ {0xe8ef'e220'dbd5'38e4, 0xe0e7'84d4'8fe3'51e3},
+ {0xd8df'cb83'4048'7bb7, 0xd0d7'7883'f290'f6d8},
+ {0xc8cf'b505'a379'43a3, 0xc0c7'6c40'3409'4932},
+ {0xb8bf'9ea7'c1b1'e9b5, 0xb0b7'6009'4029'bc36},
+ {0xa8af'8869'57fb'4e5c, 0xa0a7'53df'02f9'ad62},
+ {0x989f'724a'241a'3d11, 0x9097'47c1'68a9'f793},
+ {0x888f'5c49'e48c'db01, 0x8087'3bb0'5d94'877b}},
+ kVectorCalculationsSourceLegacy);
+}
+
TEST_F(Riscv64InterpreterTest, TestVslideup) {
// With slide offset equal zero, this is equivalent to Vmv.
TestVectorInstruction(
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index b7dc8de1..d3c91b9a 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -508,13 +508,13 @@ inline std::tuple<SIMD128Register> Vfcvtv(int8_t rm, int8_t frm, SIMD128Register
}
#define DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS(...) __VA_ARGS__
-#define DEFINE_ARITHMETIC_INTRINSIC(Name, arithmetic, parameters, arguments) \
+#define DEFINE_ARITHMETIC_INTRINSIC(Name, arithmetic, parameters, capture, arguments) \
\
template <typename ElementType, \
enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible> \
inline std::tuple<SIMD128Register> Name(DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS parameters) { \
return VectorProcessing<ElementType>( \
- [](auto... args) { \
+ [DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS capture](auto... args) { \
static_assert((std::is_same_v<decltype(args), ElementType> && ...)); \
arithmetic; \
}, \
@@ -522,10 +522,48 @@ inline std::tuple<SIMD128Register> Vfcvtv(int8_t rm, int8_t frm, SIMD128Register
}
#define DEFINE_1OP_ARITHMETIC_INTRINSIC_M(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC(V##name##m, return ({ __VA_ARGS__; });, (Int128 src), (src))
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##m, return ({ __VA_ARGS__; });, (Int128 src), (), (src))
+
+#define DEFINE_1OP_ARITHMETIC_INTRINSIC_V(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##v, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src), (), (src))
+
+#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##vv, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src1, SIMD128Register src2), (), (src1, src2))
+
+#define DEFINE_3OP_ARITHMETIC_INTRINSIC_VV(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##vv, return ({ __VA_ARGS__; }); \
+ , \
+ (SIMD128Register src1, SIMD128Register src2, SIMD128Register src3), \
+ (), \
+ (src1, src2, src3))
+
#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(name, ...) \
DEFINE_ARITHMETIC_INTRINSIC(V##name##vs, return ({ __VA_ARGS__; }); \
- , (ElementType src1, ElementType src2), (src1, src2))
+ , (ElementType src1, ElementType src2), (), (src1, src2))
+
+#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##vx, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src1, ElementType src2), (), (src1, src2))
+
+#define DEFINE_3OP_ARITHMETIC_INTRINSIC_VX(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC( \
+ V##name##vx, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src1, ElementType src2, SIMD128Register src3), (), (src1, src2, src3))
+
+#define DEFINE_1OP_ARITHMETIC_INTRINSIC_X(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC(V##name##x, return ({ __VA_ARGS__; });, (ElementType src), (), (src))
+
+#define DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VF(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC( \
+ Vf##name##vf, return ({ __VA_ARGS__; }); \
+ , (int8_t frm, SIMD128Register src1, ElementType src2), (frm), (src1, src2))
+
+#define DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VV(name, ...) \
+ DEFINE_ARITHMETIC_INTRINSIC( \
+ Vf##name##vv, return ({ __VA_ARGS__; }); \
+ , (int8_t frm, SIMD128Register src1, SIMD128Register src2), (frm), (src1, src2))
#define DEFINE_W_ARITHMETIC_INTRINSIC(Name, Pattern, arithmetic, parameters, arguments) \
\
@@ -540,33 +578,14 @@ inline std::tuple<SIMD128Register> Vfcvtv(int8_t rm, int8_t frm, SIMD128Register
DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS arguments); \
}
-#define DEFINE_1OP_ARITHMETIC_INTRINSIC_V(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC(V##name##v, return ({ __VA_ARGS__; });, (SIMD128Register src), (src))
-#define DEFINE_1OP_ARITHMETIC_INTRINSIC_X(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC(V##name##x, return ({ __VA_ARGS__; });, (ElementType src), (src))
-#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC(V##name##vv, return ({ __VA_ARGS__; }); \
- , (SIMD128Register src1, SIMD128Register src2), (src1, src2))
-#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC(V##name##vx, return ({ __VA_ARGS__; }); \
- , (SIMD128Register src1, ElementType src2), (src1, src2))
-#define DEFINE_3OP_ARITHMETIC_INTRINSIC_VV(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC( \
- V##name##vv, return ({ __VA_ARGS__; }); \
- , (SIMD128Register src1, SIMD128Register src2, SIMD128Register src3), (src1, src2, src3))
-#define DEFINE_3OP_ARITHMETIC_INTRINSIC_VX(name, ...) \
- DEFINE_ARITHMETIC_INTRINSIC( \
- V##name##vx, return ({ __VA_ARGS__; }); \
- , (SIMD128Register src1, ElementType src2, SIMD128Register src3), (src1, src2, src3))
+#define DEFINE_2OP_ARITHMETIC_INTRINSIC_WV(name, pattern, ...) \
+ DEFINE_W_ARITHMETIC_INTRINSIC(V##name##wv, pattern, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src1, SIMD128Register src2), (src1, src2))
#define DEFINE_2OP_ARITHMETIC_INTRINSIC_WVV(name, pattern, ...) \
DEFINE_W_ARITHMETIC_INTRINSIC(V##name##vv, pattern, return ({ __VA_ARGS__; }); \
, (SIMD128Register src1, SIMD128Register src2), (src1, src2))
-#define DEFINE_2OP_ARITHMETIC_INTRINSIC_WV(name, pattern, ...) \
- DEFINE_W_ARITHMETIC_INTRINSIC(V##name##wv, pattern, return ({ __VA_ARGS__; }); \
- , (SIMD128Register src1, SIMD128Register src2), (src1, src2))
-
#define DEFINE_2OP_ARITHMETIC_INTRINSIC_WX(name, pattern, ...) \
DEFINE_W_ARITHMETIC_INTRINSIC(V##name##wx, pattern, return ({ __VA_ARGS__; }); \
, (SIMD128Register src1, ElementType src2), (src1, src2))
@@ -584,6 +603,12 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(or, (args | ...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(or, (args | ...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(xor, (args ^ ...))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(xor, (args ^ ...))
+DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VF(mul, std::get<0>(FMul(FPFlags::DYN, frm, args...)))
+DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VV(mul, std::get<0>(FMul(FPFlags::DYN, frm, args...)))
+DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VF(div, std::get<0>(FDiv(FPFlags::DYN, frm, args...)))
+DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VV(div, std::get<0>(FDiv(FPFlags::DYN, frm, args...)))
+DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VF(rdiv, auto [arg1, arg2] = std::tuple{args...};
+ std::get<0>(FDiv(FPFlags::DYN, frm, arg2, arg1)))
// SIMD mask either includes results with all bits set to 0 or all bits set to 1.
// This way it may be used with VAnd and VAndN operations to perform masking.
// Such comparison is effectively one instruction of x86-64 (via SSE or AVX) but
@@ -705,14 +730,17 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_WX(nsr, Narrowwv, auto [arg1, arg2] = std::tuple
#undef DEFINE_W_ARITHMETIC_INTRINSIC
#undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS
#undef DEFINE_1OP_ARITHMETIC_INTRINSIC_M
-#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VS
+#undef DEFINE_1OP_ARITHMETIC_INTRINSIC_V
#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VV
-#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VX
#undef DEFINE_3OP_ARITHMETIC_INTRINSIC_VV
+#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VS
+#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VX
#undef DEFINE_3OP_ARITHMETIC_INTRINSIC_VX
-#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_VV
-#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_WVV
+#undef DEFINE_1OP_ARITHMETIC_INTRINSIC_X
+#undef DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VF
+#undef DEFINE_2OP_FMR_ARITHMETIC_INTRINSIC_VV
#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_WV
+#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_WVV
#undef DEFINE_2OP_ARITHMETIC_INTRINSIC_WX
} // namespace berberis::intrinsics