aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaines Sy <hainesy@google.com>2024-05-01 18:54:47 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2024-05-01 18:54:47 +0000
commit49aeffee4d7835072e4dfc085f27c2ce7bec381f (patch)
treea8ec2e83dd982eb264060162ecb6d6b75a746c8f
parent07b7c15db5bf1e8d708331b32ed325353e2fac28 (diff)
parent7740ffae74aa08ff978821d94f48fa0c7e8cf32f (diff)
downloadbinary_translation-49aeffee4d7835072e4dfc085f27c2ce7bec381f.tar.gz
Merge "Implement vfwadd/sub wv/wf as intrinsics" into main
-rw-r--r--interpreter/riscv64/interpreter.h28
-rw-r--r--interpreter/riscv64/interpreter_test.cc44
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h16
3 files changed, 82 insertions, 6 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 4458bd4a..0c7bd140 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1288,6 +1288,20 @@ class Interpreter {
vta,
vma,
kFrm>(args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfwaddwf:
+ return OpVectorWidenwx<intrinsics::Vfwaddwf<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ kFrm>(args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfwsubwf:
+ return OpVectorWidenwx<intrinsics::Vfwsubwf<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ kFrm>(args.dst, args.src1, arg2);
default:
break;
}
@@ -1507,6 +1521,20 @@ class Interpreter {
vta,
vma,
kFrm>(args.dst, args.src1, args.src2);
+ case Decoder::VOpFVvOpcode::kVfwaddwv:
+ return OpVectorWidenwv<intrinsics::Vfwaddwv<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ kFrm>(args.dst, args.src1, args.src2);
+ case Decoder::VOpFVvOpcode::kVfwsubwv:
+ return OpVectorWidenwv<intrinsics::Vfwsubwv<ElementType>,
+ ElementType,
+ vlmul,
+ vta,
+ vma,
+ kFrm>(args.dst, args.src1, args.src2);
case Decoder::VOpFVvOpcode::kVFUnary0:
switch (args.vfunary0_opcode) {
case Decoder::VFUnary0Opcode::kVfwcvtxufv:
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 22a8f772..2feb230d 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -7679,15 +7679,26 @@ TEST_F(Riscv64InterpreterTest, TestVadd) {
{0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}},
kVectorCalculationsSource);
- TestWideningVectorFloatInstruction(0xc100d457, // vfwadd.vf v8, v16, f1, v0.t
+ TestWideningVectorFloatInstruction(0xd10c1457, // vfwadd.wv v8, v16, v24, v0.t
+ {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000},
+ {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000},
+ {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000},
+ {0xbcc6'9446'2000'0000, 0xbf3e'bd3c'ea65'4738},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}},
+ kVectorCalculationsSource);
+
+ TestWideningVectorFloatInstruction(0xd100d457, // vfwadd.wf v8, v16, f1, v0.t
{{0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
{0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
{0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
- {0x4016'8000'0000'0000, 0x4016'8000'0000'0000},
- {0x4016'8000'0000'0000, 0x4016'7fff'ffff'fffd},
- {0x4016'7fff'ffff'fd55, 0x4016'7fff'fffd'454b},
- {0x4016'7fff'fd35'3b40, 0x4016'7ffd'252b'3000},
- {0x4016'7d15'1b20'0000, 0x4013'850b'1000'0000}},
+ {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}},
kVectorCalculationsSource);
}
@@ -8223,6 +8234,27 @@ TEST_F(Riscv64InterpreterTest, TestVsub) {
{0xc016'8000'02ca'c4c0, 0xc016'8002'dad4'd000},
{0xc016'82ea'e4e0'0000, 0xc019'7af4'f000'0000}},
kVectorCalculationsSource);
+
+ TestWideningVectorFloatInstruction(0xd90c1457, // vfwsub.wv v8, v16, v24, v0.t
+ {{0x3ac0'9240'0000'0000, 0x3bc1'9341'2000'0000},
+ {0x38c2'9042'2000'0000, 0x39c3'9143'0000'0000},
+ {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000},
+ {0x3cc6'9446'2000'0000, 0xbf3e'bd3c'8c10'2b38},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}},
+ kVectorCalculationsSource);
+ TestWideningVectorFloatInstruction(0xd900d457, // vfwsub.wf v8, v16, f1, v0.t
+ {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000},
+ {0xc016'8000'0000'0000, 0xc016'8000'0000'0000},
+ {0xc016'8000'0000'0000, 0xc016'8000'0000'0000},
+ {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVand) {
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index 5e99d744..27353bf3 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -838,6 +838,16 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType
Vfw##name##vf, Widenvv, return ({ __VA_ARGS__; }); \
, (int8_t csr, SIMD128Register src1, ElementType src2), (csr), (src1, src2))
+#define DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(name, ...) \
+ DEFINE_W_ARITHMETIC_INTRINSIC( \
+ Vfw##name##wv, Widenwv, return ({ __VA_ARGS__; }); \
+ , (int8_t csr, SIMD128Register src1, SIMD128Register src2), (csr), (src1, src2))
+
+#define DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(name, ...) \
+ DEFINE_W_ARITHMETIC_INTRINSIC( \
+ Vfw##name##wf, Widenwv, return ({ __VA_ARGS__; }); \
+ , (int8_t csr, SIMD128Register src1, ElementType src2), (csr), (src1, src2))
+
#define DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VVW(name, ...) \
DEFINE_W_ARITHMETIC_INTRINSIC( \
Vw##name##vv, Widenvvw, return ({ __VA_ARGS__; }); \
@@ -917,6 +927,10 @@ DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(sub, std::get<0>(FSub(FPFlags::DYN
DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...)))
DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...)))
DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...)))
+DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...)))
+DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...)))
+DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...)))
+DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...)))
DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...)))
DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...)))
@@ -1114,6 +1128,8 @@ DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX(
#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV
#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV
#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF
+#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV
+#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF
#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VVW
#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WV
#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX