diff options
author | Mohamed Elbashir Younes Snosy <mohamedsamir151298@gmail.com> | 2024-03-05 01:04:57 +0200 |
---|---|---|
committer | Victor Khimenko <khim@google.com> | 2024-03-05 22:48:44 +0000 |
commit | 15acc49da4da82770e73d519d5cf11baa24b4892 (patch) | |
tree | bc3d61ddfb35183a8df585a24f76d614bc22f30b | |
parent | 51da4ecd54b7443dd17a6230242eabce8c250667 (diff) | |
download | binary_translation-15acc49da4da82770e73d519d5cf11baa24b4892.tar.gz |
Implement vector floating-point comparison operators.
Test: berberis_all
Change-Id: I0c73af3b9cdb89844d5475a946cebf70094fa422
-rw-r--r-- | interpreter/riscv64/interpreter.h | 34 | ||||
-rw-r--r-- | interpreter/riscv64/interpreter_test.cc | 319 | ||||
-rw-r--r-- | intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h | 30 |
3 files changed, 263 insertions, 120 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h index 586551c5..6ce41660 100644 --- a/interpreter/riscv64/interpreter.h +++ b/interpreter/riscv64/interpreter.h @@ -1203,11 +1203,29 @@ class Interpreter { InactiveProcessing::kUndisturbed>( args.dst, arg2, /*dst_mask=*/args.src1); } + case Decoder::VOpFVfOpcode::kVfminvf: + return OpVectorvx<intrinsics::Vfminvx<ElementType>, ElementType, vlmul, vta, vma>( + args.dst, args.src1, arg2); case Decoder::VOpFVfOpcode::kVfmaxvf: return OpVectorvx<intrinsics::Vfmaxvx<ElementType>, ElementType, vlmul, vta, vma>( args.dst, args.src1, arg2); - case Decoder::VOpFVfOpcode::kVfminvf: - return OpVectorvx<intrinsics::Vfminvx<ElementType>, ElementType, vlmul, vta, vma>( + case Decoder::VOpFVfOpcode::kVmfeqvf: + return OpVectormvx<intrinsics::Vfeqvx<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVmflevf: + return OpVectormvx<intrinsics::Vflevx<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVmfltvf: + return OpVectormvx<intrinsics::Vfltvx<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVmfnevf: + return OpVectormvx<intrinsics::Vfnevx<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVmfgtvf: + return OpVectormvx<intrinsics::Vfgtvx<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, arg2); + case Decoder::VOpFVfOpcode::kVmfgevf: + return OpVectormvx<intrinsics::Vfgevx<ElementType>, ElementType, vlmul, vma>( args.dst, args.src1, arg2); default: return Unimplemented(); @@ -1448,6 +1466,18 @@ class Interpreter { break; // Make compiler happy. } break; + case Decoder::VOpFVvOpcode::kVmfeqvv: + return OpVectormvv<intrinsics::Vfeqvv<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVmflevv: + return OpVectormvv<intrinsics::Vflevv<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVmfltvv: + return OpVectormvv<intrinsics::Vfltvv<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, args.src2); + case Decoder::VOpFVvOpcode::kVmfnevv: + return OpVectormvv<intrinsics::Vfnevv<ElementType>, ElementType, vlmul, vma>( + args.dst, args.src1, args.src2); default: break; // Make compiler happy. } diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc index 73679623..ba5fe0a5 100644 --- a/interpreter/riscv64/interpreter_test.cc +++ b/interpreter/riscv64/interpreter_test.cc @@ -1363,11 +1363,31 @@ class Riscv64InterpreterTest : public ::testing::Test { } void TestVectorMaskTargetInstruction(uint32_t insn_bytes, - const __v16qu(&expected_result_int8), + const uint32_t expected_result_int32, + const uint16_t expected_result_int64, + const __v2du (&source)[16]) { + TestVectorMaskTargetInstruction( + insn_bytes, source, expected_result_int32, expected_result_int64); + } + + void TestVectorMaskTargetInstruction(uint32_t insn_bytes, + const uint8_t (&expected_result_int8)[16], const uint64_t expected_result_int16, const uint32_t expected_result_int32, - const uint64_t expected_result_int64, + const uint16_t expected_result_int64, const __v2du (&source)[16]) { + TestVectorMaskTargetInstruction(insn_bytes, + source, + expected_result_int8, + expected_result_int16, + expected_result_int32, + expected_result_int64); + } + + template <typename... ExpectedResultType> + void TestVectorMaskTargetInstruction(uint32_t insn_bytes, + const __v2du (&source)[16], + const ExpectedResultType(&... expected_result)) { auto Verify = [this, &source]( uint32_t insn_bytes, uint8_t vsew, const auto& expected_result, auto mask) { // Mask register is, unconditionally, v0, and we need 8, 16, or 24 to handle full 8-registers @@ -1378,6 +1398,12 @@ class Riscv64InterpreterTest : public ::testing::Test { } // Set x1 for vx instructions. SetXReg<1>(state_.cpu, 0xaaaa'aaaa'aaaa'aaaa); + // Set f1 for vf instructions. + if (vsew == 2) { + SetFReg<1>(state_.cpu, 0xffff'ffff'40b4'0000); // float 5.625 + } else if (vsew == 3) { + SetFReg<1>(state_.cpu, 0x4016'8000'0000'0000); // double 5.625 + } for (uint8_t vlmul = 0; vlmul < 8; ++vlmul) { for (uint8_t vta = 0; vta < 2; ++vta) { // vta should be ignored but we test both values! for (uint8_t vma = 0; vma < 2; ++vma) { @@ -1438,14 +1464,15 @@ class Riscv64InterpreterTest : public ::testing::Test { } }; - Verify(insn_bytes, 0, expected_result_int8, kMask); - Verify(insn_bytes, 1, expected_result_int16, kMask); - Verify(insn_bytes, 2, expected_result_int32, kMask); - Verify(insn_bytes, 3, expected_result_int64, kMask); - Verify(insn_bytes | (1 << 25), 0, expected_result_int8, kNoMask[0]); - Verify(insn_bytes | (1 << 25), 1, expected_result_int16, kNoMask[0]); - Verify(insn_bytes | (1 << 25), 2, expected_result_int32, kNoMask[0]); - Verify(insn_bytes | (1 << 25), 3, expected_result_int64, kNoMask[0]); + ((Verify(insn_bytes, + BitUtilLog2(sizeof(SIMD128Register) / sizeof(ExpectedResultType)), + expected_result, + kMask), + Verify(insn_bytes | (1 << 25), + BitUtilLog2(sizeof(SIMD128Register) / sizeof(ExpectedResultType)), + expected_result, + kNoMask[0])), + ...); } void TestVXmXXsInstruction(uint32_t insn_bytes, @@ -1751,16 +1778,16 @@ class Riscv64InterpreterTest : public ::testing::Test { }; static constexpr __v2du kVectorComparisonSource[16] = { - {0xfff5'fff5'fff5'fff5, 0xfff5'fff5'fff5'fff5}, - {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}, - {0xbbbb'bbbb'bbbb'bbbb, 0xaaaa'aaaa'aaaa'aaaa}, + {0xf005'f005'f005'f005, 0xffff'ffff'4040'4040}, + {0xffff'ffff'40b4'40b4, 0xffff'ffff'40b4'0000}, + {0x4016'4016'4016'4016, 0x4016'8000'0000'0000}, {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, {0xfff4'fff4'fff4'fff4, 0xfff6'fff6'fff6'fff6}, {0xfff8'fff8'fff4'fff4, 0xfff5'fff5'fff5'fff5}, {0xa9bb'bbbb'a9bb'bbbb, 0xa9bb'bbbb'a9bb'bbbb}, {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}, - {0xfff5'fff5'fff5'fff5, 0xfff5'fff5'fff5'fff5}, + {0xf005'f005'f005'f005, 0xffff'ffff'4040'4040}, {0x1111'1111'1111'1111, 0x1111'1111'1111'1111}, {0xfff1'fff1'fff1'fff1, 0xfff1'fff1'fff1'fff1}, {0x6e6c'6a69'6664'6260, 0x7e7c'7a78'7674'7271}, @@ -6280,158 +6307,216 @@ TEST_F(Riscv64InterpreterTest, TestVxor) { kVectorCalculationsSourceLegacy); } +TEST_F(Riscv64InterpreterTest, TestVmfeq) { + TestVectorMaskTargetInstruction(0x610c1457, // Vmfeq.vv v8, v16, v24, v0.t + 0x0000'0007, + 0x0001, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x6100d457, // Vmfeq.vf v8, v16, f1, v0.t + 0x0000'0040, + 0x0020, + kVectorComparisonSource); +} + TEST_F(Riscv64InterpreterTest, TestVmseq) { - TestVectorMaskTargetInstruction(0x610c0457, // Vmseq.vv v8, v16, v24 + TestVectorMaskTargetInstruction(0x610c0457, // Vmseq.vv v8, v16, v24, v0.t {255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000'0000'0000'00ff, 0x0000'000f, 0x0003, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x6100c457, // Vmseq.vx v8, v16, x1 - {0, 0, 255, 255, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - 0x0000'0000'0ff0'ff00, - 0x0000'3cf0, - 0x006c, + TestVectorMaskTargetInstruction(0x6100c457, // Vmseq.vx v8, v16, x1, v0.t + {0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + 0x0000'0000'0f00'0000, + 0x0000'3000, + 0x0040, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x610ab457, // Vmseq.vi v8, v16, -0xb - {85, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 0, 0, 0}, - 0x0000'f000'0000'00ff, + TestVectorMaskTargetInstruction(0x610ab457, // Vmseq.vi v8, v16, -0xb, v0.t + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 0, 0, 0}, + 0x0000'f000'0000'0000, 0x0000'0000, 0x0000, kVectorComparisonSource); } +TEST_F(Riscv64InterpreterTest, TestVmfne) { + TestVectorMaskTargetInstruction(0x710c1457, // Vmfne.vv v8, v16, v24, v0.t + 0xffff'fff8, + 0xfffe, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x7100d457, // Vmfne.vf v8, v16, f1, v0.t + 0xffff'ffbf, + 0xffdf, + kVectorComparisonSource); +} + TEST_F(Riscv64InterpreterTest, TestVmsne) { TestVectorMaskTargetInstruction( - 0x650c0457, // Vmsne.vv v8, v16, v24 + 0x650c0457, // Vmsne.vv v8, v16, v24, v0.t {0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, 0xffff'ffff'ffff'ff00, 0xffff'fff0, 0xfffc, kVectorComparisonSource); TestVectorMaskTargetInstruction( - 0x6500c457, // Vmsne.vx v8, v16, x1 - {255, 255, 0, 0, 255, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255}, - 0xffff'ffff'f00f'00ff, - 0xffff'c30f, - 0xff93, + 0x6500c457, // Vmsne.vx v8, v16, x1, v0.t + {255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255}, + 0xffff'ffff'f0ff'ffff, + 0xffff'cfff, + 0xffbf, kVectorComparisonSource); TestVectorMaskTargetInstruction( - 0x650ab457, // Vmsne.vi v8, v16, -0xb - {170, 170, 255, 255, 255, 255, 255, 255, 255, 255, 255, 170, 255, 255, 255, 255}, - 0xffff'0fff'ffff'ff00, + 0x650ab457, // Vmsne.vi v8, v16, -0xb, v0.t + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 170, 255, 255, 255, 255}, + 0xffff'0fff'ffff'ffff, 0xffff'ffff, 0xffff, kVectorComparisonSource); } +TEST_F(Riscv64InterpreterTest, TestVmflt) { + TestVectorMaskTargetInstruction(0x6d0c1457, // Vmflt.vv v8, v16, v24, v0.t + 0x0000'f000, + 0x00c0, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x6d00d457, // Vmflt.vf v8, v16, f1, v0.t + 0xff00'ff07, + 0xf0d1, + kVectorComparisonSource); +} + TEST_F(Riscv64InterpreterTest, TestVmsltu) { - TestVectorMaskTargetInstruction(0x690c0457, // Vmsltu.vv v8, v16, v24 - {0, 0, 0, 0, 255, 255, 0, 255, 0, 0, 0, 0, 255, 255, 255, 255}, - 0xffff'0000'f0ff'0000, + TestVectorMaskTargetInstruction(0x690c0457, // Vmsltu.vv v8, v16, v24, v0.t + {0, 0, 0, 3, 255, 255, 0, 255, 0, 0, 0, 0, 255, 255, 255, 255}, + 0xffff'0000'f0ff'1000, 0xff00'cf00, 0xf0b0, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x6900c457, // Vmsltu.vx v8, v16, x1 - {0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 136, 136, 255, 255}, - 0xffaa'0000'f000'0000, - 0xff00'c000, - 0xf080, - kVectorComparisonSource); + TestVectorMaskTargetInstruction( + 0x6900c457, // Vmsltu.vx v8, v16, x1, v0.t + {85, 15, 10, 11, 255, 255, 0, 255, 0, 0, 0, 0, 136, 136, 255, 255}, + 0xffaa'0000'f0ff'3330, + 0xff00'cf54, + 0xf0b0, + kVectorComparisonSource); } TEST_F(Riscv64InterpreterTest, TestVmslt) { - TestVectorMaskTargetInstruction( - 0x6d0c0457, // Vmslt.vv v8, v16, v24 - {0, 0, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255}, - 0xffff'0000'ffff'ff00, - 0xff00'fff0, - 0xf0fc, - kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x6d00c457, // Vmslt.vx v8, v16, x1 - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 136, 255, 255}, - 0xffaa'0000'0000'0000, + TestVectorMaskTargetInstruction(0x6d0c0457, // Vmslt.vv v8, v16, v24, v0.t + {0, 0, 245, 247, 0, 32, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255}, + 0xffff'0000'ff40'dc00, + 0xff00'f0a0, + 0xf0cc, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x6d00c457, // Vmslt.vx v8, v16, x1, v0.t + {0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 136, 136, 255, 255}, + 0xffaa'0000'0040'0000, 0xff00'0000, 0xf000, kVectorComparisonSource); } +TEST_F(Riscv64InterpreterTest, TestVmfle) { + TestVectorMaskTargetInstruction(0x650c1457, // Vmfle.vv v8, v16, v24, v0.t + 0x0000'f007, + 0x00c1, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x6500d457, // Vmfle.vf v8, v16, f1, v0.t + 0xff00'ff47, + 0xf0f1, + kVectorComparisonSource); +} + TEST_F(Riscv64InterpreterTest, TestVmsleu) { TestVectorMaskTargetInstruction( - 0x710c0457, // Vmsleu.vv v8, v16, v24 - {255, 255, 0, 0, 255, 255, 0, 255, 0, 0, 0, 0, 255, 255, 255, 255}, - 0xffff'0000'f0ff'00ff, + 0x710c0457, // Vmsleu.vv v8, v16, v24, v0.t + {255, 255, 0, 3, 255, 255, 0, 255, 0, 0, 0, 0, 255, 255, 255, 255}, + 0xffff'0000'f0ff'10ff, 0xff00'cf0f, 0xf0b3, kVectorComparisonSource); TestVectorMaskTargetInstruction( - 0x7100c457, // Vmsleu.vx v8, v16, x1 - {0, 0, 255, 255, 0, 255, 255, 255, 0, 0, 0, 0, 136, 136, 255, 255}, - 0xffaa'0000'fff0'ff00, - 0xff00'fcf0, - 0xf0ec, + 0x7100c457, // Vmsleu.vx v8, v16, x1, v0.t + {85, 15, 10, 11, 255, 255, 255, 255, 0, 0, 0, 0, 136, 136, 255, 255}, + 0xffaa'0000'ffff'3330, + 0xff00'ff54, + 0xf0f0, kVectorComparisonSource); TestVectorMaskTargetInstruction( - 0x710ab457, // Vmsleu.vi v8, v16, -0xb - {85, 85, 255, 255, 255, 255, 255, 255, 85, 0, 5, 85, 255, 255, 255, 255}, - 0xffff'f30f'ffff'ffff, - 0xffff'ffff, + 0x710ab457, // Vmsleu.vi v8, v16, -0xb, v0.t + {255, 15, 15, 15, 255, 255, 255, 255, 85, 0, 5, 85, 255, 255, 255, 255}, + 0xffff'f30f'ffff'333f, + 0xffff'ff57, 0xffff, kVectorComparisonSource); } TEST_F(Riscv64InterpreterTest, TestVmsle) { TestVectorMaskTargetInstruction( - 0x750c0457, // Vmsle.vv v8, v16, v24 - {255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255}, - 0xffff'0000'ffff'ffff, - 0xff00'ffff, - 0xf0ff, + 0x750c0457, // Vmsle.vv v8, v16, v24, v0.t + {255, 255, 245, 247, 0, 32, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255}, + 0xffff'0000'ff40'dcff, + 0xff00'f0af, + 0xf0cf, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x7500c457, // Vmsle.vx v8, v16, x1 - {0, 0, 255, 255, 0, 255, 255, 0, 0, 0, 0, 0, 136, 136, 255, 255}, - 0xffaa'0000'0ff0'ff00, - 0xff00'3cf0, - 0xf06c, + TestVectorMaskTargetInstruction(0x7500c457, // Vmsle.vx v8, v16, x1, v0.t + {0, 0, 0, 0, 0, 32, 255, 0, 0, 0, 0, 0, 136, 136, 255, 255}, + 0xffaa'0000'0f40'0000, + 0xff00'3000, + 0xf040, + kVectorComparisonSource); + TestVectorMaskTargetInstruction(0x750ab457, // Vmsle.vi v8, v16, -0xb + {170, 0, 5, 4, 0, 32, 255, 0, 85, 0, 5, 85, 255, 255, 255, 255}, + 0xffff'f30f'0f40'000f, + 0xffff'3003, + 0xff4f, + kVectorComparisonSource); +} + +TEST_F(Riscv64InterpreterTest, TestVmfgt) { + TestVectorMaskTargetInstruction(0x7500d457, // Vmfgt.vf v8, v16, f1, v0.t + 0x0000'0010, + 0x0000, kVectorComparisonSource); - TestVectorMaskTargetInstruction( - 0x750ab457, // Vmsle.vi v8, v16, -0xb - {85, 85, 255, 255, 255, 255, 255, 0, 85, 0, 5, 85, 255, 255, 255, 255}, - 0xffff'f30f'0fff'ffff, - 0xffff'3fff, - 0xff7f, - kVectorComparisonSource); } TEST_F(Riscv64InterpreterTest, TestVmsgtu) { TestVectorMaskTargetInstruction( - 0x7900c457, // Vmsgtu.vx v8, v16, x1 - {255, 255, 0, 0, 255, 0, 0, 0, 255, 255, 255, 255, 119, 119, 0, 0}, - 0x0055'ffff'000f'00ff, - 0x00ff'030f, - 0x0f13, + 0x7900c457, // Vmsgtu.vx v8, v16, x1, v0.t + {170, 240, 245, 244, 0, 0, 0, 0, 255, 255, 255, 255, 119, 119, 0, 0}, + 0x0055'ffff'0000'cccf, + 0x00ff'00ab, + 0x0f0f, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x790ab457, // Vmsgtu.vi v8, v16, -0xb - {170, 170, 0, 0, 0, 0, 0, 0, 170, 255, 250, 170, 0, 0, 0, 0}, - 0x0000'0cf0'0000'0000, - 0x0000'0000, + TestVectorMaskTargetInstruction(0x790ab457, // Vmsgtu.vi v8, v16, -0xb, v0.t + {0, 240, 240, 240, 0, 0, 0, 0, 170, 255, 250, 170, 0, 0, 0, 0}, + 0x0000'0cf0'0000'ccc0, + 0x0000'00a8, 0x0000, kVectorComparisonSource); } TEST_F(Riscv64InterpreterTest, TestVmsgt) { TestVectorMaskTargetInstruction( - 0x7d00c457, // Vmsgt.vx v8, v16, x1 - {255, 255, 0, 0, 255, 0, 0, 255, 255, 255, 255, 255, 119, 119, 0, 0}, - 0x0055'ffff'f00f'00ff, - 0x00ff'c30f, - 0x0f93, + 0x7d00c457, // Vmsgt.vx v8, v16, x1, v0.t + {255, 255, 255, 255, 255, 223, 0, 255, 255, 255, 255, 255, 119, 119, 0, 0}, + 0x0055'ffff'f0bf'ffff, + 0x00ff'cfff, + 0x0fbf, kVectorComparisonSource); - TestVectorMaskTargetInstruction(0x7d0ab457, // Vmsgt.vi v8, v16, -0xb - {170, 170, 0, 0, 0, 0, 0, 255, 170, 255, 250, 170, 0, 0, 0, 0}, - 0x0000'0cf0'f000'0000, - 0x0000'c000, - 0x0080, + TestVectorMaskTargetInstruction( + 0x7d0ab457, // Vmsgt.vi v8, v16, -0xb, v0.t + {85, 255, 250, 251, 255, 223, 0, 255, 170, 255, 250, 170, 0, 0, 0, 0}, + 0x0000'0cf0'f0bf'fff0, + 0x0000'cffc, + 0x00b0, + kVectorComparisonSource); +} + +TEST_F(Riscv64InterpreterTest, TestVmfge) { + TestVectorMaskTargetInstruction(0x7d00d457, // Vmfge.vf v8, v16, f1, v0.t + 0x0000'0050, + 0x0020, kVectorComparisonSource); } @@ -7188,17 +7273,17 @@ TEST_F(Riscv64InterpreterTest, TestVmin) { {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}}, kVectorCalculationsSourceLegacy); TestVectorFloatInstruction(0x1100d457, // vfmin.vf v8, v16, f1, v0.t - {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa}, - {0xbbbb'bbbb, 0xbbbb'bbbb, 0xaaaa'aaaa, 0xaaaa'aaaa}, + {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, + {0x40b4'0000, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, + {0x4016'4016, 0x4016'4016, 0x0000'0000, 0x4016'8000}, {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, - {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}, - {0xbbbb'bbbb'bbbb'bbbb, 0xaaaa'aaaa'aaaa'aaaa}, + {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x40164'016'4016'4016, 0x4016'8000'0000'0000}, {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, @@ -7206,16 +7291,16 @@ TEST_F(Riscv64InterpreterTest, TestVmin) { {0xa9a9'a9a9'a9a9'a9a9, 0xa9a9'a9a9'a9a9'a9a9}}, kVectorComparisonSource); TestVectorFloatInstruction(0x110c1457, // vfmin.vv v8,v16,v24,v0.t - {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa, 0xaaaa'aaaa}, + {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, + {0x1111'1111, 0x7fc0'0000, 0x1111'1111, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0xaaaa'aaaa, 0xaaaa'aaaa, 0x1111'1111, 0x1111'1111}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0xc6c4'c2c0, 0xcecc'cac9, 0xd6d4'd2d1, 0xdedc'dad8}, {0xe6e4'e2e0, 0xeeec'eae9, 0xf6f4'f2f1, 0xfefc'faf8}}, - {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0xaaaa'aaaa'aaaa'aaaa, 0xaaaa'aaaa'aaaa'aaaa}, + {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0xaaaa'aaaa'aaaa'aaaa, 0x1111'1111'1111'1111}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, @@ -7370,16 +7455,16 @@ TEST_F(Riscv64InterpreterTest, TestVmax) { {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978}}, kVectorCalculationsSourceLegacy); TestVectorFloatInstruction(0x1900d457, // vfmax.vf v8, v16, f1, v0.t - {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, + {{0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x7fc0'0000}, + {0x40b4'40b4, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}, {0x40b4'0000, 0x40b4'0000, 0x40b4'0000, 0x40b4'0000}}, - {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, + {{0x4016'8000'0000'0000, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, @@ -7388,16 +7473,16 @@ TEST_F(Riscv64InterpreterTest, TestVmax) { {0x4016'8000'0000'0000, 0x4016'8000'0000'0000}}, kVectorComparisonSource); TestVectorFloatInstruction(0x190c1457, // vfmax.vv v8,v16,v24,v0.t - {{0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, - {0x1111'1111, 0x1111'1111, 0x1111'1111, 0x1111'1111}, + {{0xf005'f005, 0xf005'f005, 0x4040'4040, 0x7fc0'0000}, + {0x40b4'40b4, 0x7fc0'0000, 0x40b4'0000, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x6664'6260, 0x6e6c'6a69, 0x7674'7271, 0x7e7c'7a78}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000, 0x7fc0'0000}, {0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb, 0xa9bb'bbbb}, {0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9, 0xa9a9'a9a9}}, - {{0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, - {0x1111'1111'1111'1111, 0x1111'1111'1111'1111}, + {{0xf005'f005'f005'f005, 0x7ff8'0000'0000'0000}, + {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, {0x6e6c'6a69'6664'6260, 0x7e7c'7a78'7674'7271}, {0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000}, diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h index 60577242..b7dc8de1 100644 --- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h +++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h @@ -587,7 +587,35 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(xor, (args ^ ...)) // SIMD mask either includes results with all bits set to 0 or all bits set to 1. // This way it may be used with VAnd and VAndN operations to perform masking. // Such comparison is effectively one instruction of x86-64 (via SSE or AVX) but -// to achieve it we need to multiply bool result on (~ElementType{0}). +// to achieve it we need to multiply bool result by (~IntType{0}) or (~ElementType{0}). +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(feq, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Feq(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(feq, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Feq(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(fne, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(!std::get<0>(Feq(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fne, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(!std::get<0>(Feq(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(flt, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Flt(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(flt, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Flt(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(fle, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Fle(args...)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fle, using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Fle(args...)))) +// Note: for floating point numbers Flt(b, a) and !Fle(a, b) produce different and incompatible +// results. IEEE754-2008 defined NOT (!=) predicate as negation of EQ (==) predicate while GT (>) +// and GE (>=) are not negations of LE (<) or GT (<=) predicated but instead use swap of arguments. +// Note that scalar form includes only three predicates (Feq, Fle, Fgt) while vector form includes +// Vmfgt.vf and Vmfge.vf instructions only for vector+scalar case (vector+vector case is supposed +// to be handled by swapping arguments). More here: https://github.com/riscv/riscv-v-spec/issues/300 +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fgt, auto [arg1, arg2] = std::tuple{args...}; + using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Flt(arg2, arg1)))) +DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(fge, auto [arg1, arg2] = std::tuple{args...}; + using IntType = typename TypeTraits<ElementType>::Int; + (~IntType{0}) * IntType(std::get<0>(Fle(arg2, arg1)))) DEFINE_2OP_ARITHMETIC_INTRINSIC_VV( seq, (~ElementType{0}) * ElementType{static_cast<typename ElementType::BaseType>((args == ...))}) |