diff options
author | Haines Sy <hainesy@google.com> | 2024-04-30 16:59:49 -0700 |
---|---|---|
committer | Haines Sy <hainesy@google.com> | 2024-04-30 17:00:20 -0700 |
commit | 7740ffae74aa08ff978821d94f48fa0c7e8cf32f (patch) | |
tree | ed1ffb505b20cc8aaba63e3e9345649b6d82a58e | |
parent | 906d91fe1083b1c906c6d37fbed1ae7061670247 (diff) | |
download | binary_translation-7740ffae74aa08ff978821d94f48fa0c7e8cf32f.tar.gz |
Implement vfwadd/sub wv/wf as intrinsics
Test: m berberis_all
Change-Id: I9ed8aeb334e63544580aaabba49e272d77a35043
-rw-r--r-- | interpreter/riscv64/interpreter.h | 28 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 44 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 16 |
3 files changed, 82 insertions, 6 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 4458bd4a..0c7bd140 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -1288,6 +1288,20 @@ class Interpreter { vta, vma, kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwaddwf: + return OpVectorWidenwx<intrinsics::Vfwaddwf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVfwsubwf: + return OpVectorWidenwx<intrinsics::Vfwsubwf<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, arg2); default: break; } @@ -1507,6 +1521,20 @@ class Interpreter { vta, vma, kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwaddwv: + return OpVectorWidenwv<intrinsics::Vfwaddwv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVfwsubwv: + return OpVectorWidenwv<intrinsics::Vfwsubwv<ElementType>, + ElementType, + vlmul, + vta, + vma, + kFrm>(args.dst, args.src1, args.src2); case Decoder::VOpFVvOpcode::kVFUnary0: switch (args.vfunary0_opcode) { case Decoder::VFUnary0Opcode::kVfwcvtxufv: diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 22a8f772..2feb230d 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -7679,15 +7679,26 @@ TEST_F(Riscv64InterpreterTest, TestVadd) { {0xc4ce'9c4e'2000'0000, 0xc5cf'9d4f'0000'0000}}, kVectorCalculationsSource); - TestWideningVectorFloatInstruction(0xc100d457, // vfwadd.vf v8, v16, f1, v0.t + TestWideningVectorFloatInstruction(0xd10c1457, // vfwadd.wv v8, v16, v24, v0.t + {{0xbac0'9240'0000'0000, 0xbbc1'9341'2000'0000}, + {0xb8c2'9042'2000'0000, 0xb9c3'9143'0000'0000}, + {0xbec4'9644'0000'0000, 0xbfc5'9745'2000'0000}, + {0xbcc6'9446'2000'0000, 0xbf3e'bd3c'ea65'4738}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(0xd100d457, // vfwadd.wf v8, v16, f1, v0.t {{0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'7fff'ffff'fffd}, - {0x4016'7fff'ffff'fd55, 0x4016'7fff'fffd'454b}, - {0x4016'7fff'fd35'3b40, 0x4016'7ffd'252b'3000}, - {0x4016'7d15'1b20'0000, 0x4013'850b'1000'0000}}, + {0x4016'8000'0000'0000, 0x4016'7f85'0b0d'1315}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, kVectorCalculationsSource); } @@ -8223,6 +8234,27 @@ TEST_F(Riscv64InterpreterTest, TestVsub) { {0xc016'8000'02ca'c4c0, 0xc016'8002'dad4'd000}, {0xc016'82ea'e4e0'0000, 0xc019'7af4'f000'0000}}, kVectorCalculationsSource); + + TestWideningVectorFloatInstruction(0xd90c1457, // vfwsub.wv v8, v16, v24, v0.t + {{0x3ac0'9240'0000'0000, 0x3bc1'9341'2000'0000}, + {0x38c2'9042'2000'0000, 0x39c3'9143'0000'0000}, + {0x3ec4'9644'0000'0000, 0x3fc5'9745'2000'0000}, + {0x3cc6'9446'2000'0000, 0xbf3e'bd3c'8c10'2b38}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); + TestWideningVectorFloatInstruction(0xd900d457, // vfwsub.wf v8, v16, f1, v0.t + {{0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'8000'0000'0000}, + {0xc016'8000'0000'0000, 0xc016'807a'f4f2'eceb}, + {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948}, + {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958}, + {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968}, + {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, + kVectorCalculationsSource); } TEST_F(Riscv64InterpreterTest, TestVand) { diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index 5e99d744..27353bf3 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -838,6 +838,16 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType Vfw##name##vf, Widenvv, return ({ __VA_ARGS__; }); \ , (int8_t csr, SIMD128Register src1, ElementType src2), (csr), (src1, src2)) +#define DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(name, ...) \ + DEFINE_W_ARITHMETIC_INTRINSIC( \ + Vfw##name##wv, Widenwv, return ({ __VA_ARGS__; }); \ + , (int8_t csr, SIMD128Register src1, SIMD128Register src2), (csr), (src1, src2)) + +#define DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(name, ...) \ + DEFINE_W_ARITHMETIC_INTRINSIC( \ + Vfw##name##wf, Widenwv, return ({ __VA_ARGS__; }); \ + , (int8_t csr, SIMD128Register src1, ElementType src2), (csr), (src1, src2)) + #define DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VVW(name, ...) \ DEFINE_W_ARITHMETIC_INTRINSIC( \ Vw##name##vv, Widenvvw, return ({ __VA_ARGS__; }); \ @@ -917,6 +927,10 @@ DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(sub, std::get<0>(FSub(FPFlags::DYN DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF(mul, std::get<0>(FMul(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(add, std::get<0>(FAdd(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) +DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF(sub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VV(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) DEFINE_2OP_1CSR_ARITHMETIC_INTRINSIC_VF(fsub, std::get<0>(FSub(FPFlags::DYN, csr, args...))) @@ -1114,6 +1128,8 @@ DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX( #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV #undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VV #undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_VF +#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WV +#undef DEFINE_2OP_1CSR_WIDEN_ARITHMETIC_INTRINSIC_WF #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VVW #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WV #undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX |