aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAhmed Mohamed Mohamed <ahmed200615200@gmail.com>2024-03-03 13:31:33 +0000
committerVictor Khimenko <khim@google.com>2024-03-04 14:53:31 +0000
commitae7726ba95d2daa79febdabd45115c27e9b6f47e (patch)
tree0e18775beddca79b0ef3b1eb9afed0ba241da033
parente22f587ea25b76f2290c34bd770a0c00c8f03f3d (diff)
downloadbinary_translation-ae7726ba95d2daa79febdabd45115c27e9b6f47e.tar.gz
Implement narrowing floating point conversions.
Note: vfncvt.rod.f.f.w instruction is not implemented. It's rarely used (we haven't observed it in the wild yet) and, more importantly, this is rounding which is not support by x86-64 architecture and out intrinsics don't support it either. Test: berberis_all Change-Id: Ia9caa1fbb33db22a71d9f103b212da5892efee6f
-rw-r--r--interpreter/riscv64/interpreter.h110
-rw-r--r--interpreter/riscv64/interpreter_test.cc91
2 files changed, 195 insertions, 6 deletions
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 66ab030c..28245f54 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1243,6 +1243,38 @@ class Interpreter {
vlmul,
vta,
vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtxufw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ UnsignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtxfw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ SignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtrtzxufw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<UnsignedType, WideElementType>(FPFlags::RTZ, frm, src);
+ },
+ UnsignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtrtzxfw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<SignedType, WideElementType>(FPFlags::RTZ, frm, src);
+ },
+ SignedType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
default:
break; // Make compiler happy.
}
@@ -1306,6 +1338,30 @@ class Interpreter {
vlmul,
vta,
vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtfxuw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideUnsignedType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtffw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideElementType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
+ case Decoder::VFUnary0Opcode::kVfncvtfxw:
+ return OpVectorNarrowwr<[](int8_t frm, SIMD128Register src) {
+ return intrinsics::Vfcvtv<ElementType, WideSignedType>(FPFlags::DYN, frm, src);
+ },
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1);
default:
break; // Make compiler happy.
}
@@ -2712,6 +2768,60 @@ class Interpreter {
}
}
+ template <auto Intrinsic,
+ typename TargetElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorNarrowwr(uint8_t dst, uint8_t src) {
+ return OpVectorNarrowwr<Intrinsic,
+ TargetElementType,
+ NumberOfRegistersInvolved(vlmul),
+ NumRegistersInvolvedForWideOperand(vlmul),
+ vta,
+ vma>(dst, src);
+ }
+
+ template <auto Intrinsic,
+ typename TargetElementType,
+ size_t kDestRegistersInvolved,
+ size_t kSrcRegistersInvolved,
+ TailProcessing vta,
+ auto vma>
+ void OpVectorNarrowwr(uint8_t dst, uint8_t src) {
+ if constexpr (kDestRegistersInvolved == kSrcRegistersInvolved) {
+ if (!IsAligned<kDestRegistersInvolved>(dst | src)) {
+ return Unimplemented();
+ }
+ } else if (!IsAligned<kDestRegistersInvolved>(dst) || !IsAligned<kSrcRegistersInvolved>(src)) {
+ return Unimplemented();
+ }
+ size_t vstart = GetCsr<CsrName::kVstart>();
+ size_t vl = GetCsr<CsrName::kVl>();
+ SetCsr<CsrName::kVstart>(0);
+ // When vstart >= vl, there are no body elements, and no elements are updated in any destination
+ // vector register group, including that no tail elements are updated with agnostic values.
+ if (vstart >= vl) [[unlikely]] {
+ return;
+ }
+ int8_t frm = GetCsr<CsrName::kFrm>();
+ auto mask = GetMaskForVectorOperations<vma>();
+ for (size_t index = 0; index < kDestRegistersInvolved; index++) {
+ SIMD128Register orig_result(state_->cpu.v[dst + index]);
+ SIMD128Register arg_low(state_->cpu.v[src + 2 * index]);
+ SIMD128Register intrinsic_result = std::get<0>(Intrinsic(frm, arg_low));
+ if constexpr (kSrcRegistersInvolved > 1) {
+ SIMD128Register arg_high(state_->cpu.v[src + 2 * index + 1]);
+ SIMD128Register result_high = std::get<0>(Intrinsic(frm, arg_high));
+ intrinsic_result = std::get<0>(
+ intrinsics::VMergeBottomHalfToTop<TargetElementType>(intrinsic_result, result_high));
+ }
+ auto result = VectorMasking<TargetElementType, vta, vma>(
+ orig_result, intrinsic_result, vstart, vl, index, mask);
+ state_->cpu.v[dst + index] = result.template Get<__uint128_t>();
+ }
+ }
+
// SEW = 2*SEW op SEW
template <auto Intrinsic,
typename ElementType,
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index ec753c17..ce991806 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -977,10 +977,25 @@ class Riscv64InterpreterTest : public ::testing::Test {
expected_result_int64);
}
+ void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes,
+ const uint32_t (&expected_result_int32)[4][4],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>(
+ insn_bytes, source, expected_result_int32);
+ }
+
+ void TestNarrowingVectorFloatInstruction(uint32_t insn_bytes,
+ const uint16_t (&expected_result_int16)[4][8],
+ const uint32_t (&expected_result_int32)[4][4],
+ const __v2du (&source)[16]) {
+ TestVectorInstruction<TestVectorInstructionKind::kFloat, TestVectorInstructionMode::kNarrowing>(
+ insn_bytes, source, expected_result_int16, expected_result_int32);
+ }
+
void TestNarrowingVectorInstruction(uint32_t insn_bytes,
- const uint8_t (&expected_result_int8)[8][16],
- const uint16_t (&expected_result_int16)[8][8],
- const uint32_t (&expected_result_int32)[8][4],
+ const uint8_t (&expected_result_int8)[4][16],
+ const uint16_t (&expected_result_int16)[4][8],
+ const uint32_t (&expected_result_int32)[4][4],
const __v2du (&source)[16]) {
TestVectorInstruction<TestVectorInstructionKind::kInteger,
TestVectorInstructionMode::kNarrowing>(
@@ -1018,10 +1033,12 @@ class Riscv64InterpreterTest : public ::testing::Test {
template <TestVectorInstructionKind kTestVectorInstructionKind,
TestVectorInstructionMode kTestVectorInstructionMode,
typename... ElementType,
+ size_t... kResultsCount,
size_t... kElementCount>
- void TestVectorInstruction(uint32_t insn_bytes,
- const __v2du (&source)[16],
- const ElementType (&... expected_result)[8][kElementCount]) {
+ void TestVectorInstruction(
+ uint32_t insn_bytes,
+ const __v2du (&source)[16],
+ const ElementType (&... expected_result)[kResultsCount][kElementCount]) {
auto Verify = [this, &source](uint32_t insn_bytes,
uint8_t vsew,
uint8_t vlmul_max,
@@ -2115,6 +2132,68 @@ TEST_F(Riscv64InterpreterTest, TestVfcvtxfv) {
{0x8000'0000'0000'0000, 0x8000'0000'0000'0000},
{0x8000'0000'0000'0000, 0x8000'0000'0000'0000}},
kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x49881457, // Vfncvt.xu.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x49889457, // Vfncvt.x.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x8000, 0x8000, 0xcacf, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x498a1457, // Vfncvt.f.f.w v8, v24, v0.t
+ {{0x8000'0000, 0x8000'0000, 0xb165'd14e, 0x8000'0000},
+ {0xff80'0000, 0xff80'0000, 0xff80'0000, 0xff80'0000},
+ {0x0000'0000, 0x0000'0000, 0x3561'd54a, 0x0000'0000},
+ {0x7f80'0000, 0x7f80'0000, 0x7f80'0000, 0x7f80'0000}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x49891457, // Vfncvt.f.xu.w v8, v24, v0.t
+ {{0x5f1e'0c9a, 0x5f0e'1c8a, 0x5f3e'2cba, 0x5f2e'3caa},
+ {0x5f5e'4cda, 0x5f4e'5cca, 0x5f7e'6cfa, 0x5f6e'7cea},
+ {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab},
+ {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(0x49899457, // Vfncvt.f.x.w v8, v24, v0.t
+ {{0xdec3'e6cc, 0xdee3'c6ec, 0xde83'a68c, 0xdea3'86ac},
+ {0xde06'cc97, 0xde46'8cd7, 0xdbc9'82cb, 0xdd8c'18ac},
+ {0x5df4'60d4, 0x5d69'c0aa, 0x5e7a'b0eb, 0x5e3a'f0ab},
+ {0x5ebd'98b6, 0x5e9d'b896, 0x5efd'd8f6, 0x5edd'f8d6}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x498b1457, // Vfncvt.rtz.xu.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0xffff, 0xffff, 0x6a21, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff}},
+ kVectorCalculationsSource);
+ TestNarrowingVectorFloatInstruction(
+ 0x498b9457, // Vfncvt.rtz.x.f.w v8, v24, v0.t
+ {{0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x8000, 0x8000, 0xcad0, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000},
+ {0x7fff, 0x7fff, 0x6a21, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff}},
+ {{0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0x0000'0000, 0x0000'0000, 0x0000'0000, 0x0000'0000},
+ {0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff, 0x7fff'ffff}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVfmvfs) {