aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-05-07 23:15:48 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-05-07 23:15:48 +0000
commitc7caefd0df41378801e48aaa3cda1270c312762a (patch)
tree306c083dbb84bd7ee8529aaa3d2aad2c5b6c9dc2
parent910c1c3005863b02f71278fb61028acd8f399e51 (diff)
parent59addd2f7e65b35643dbe541cbd7a20d0b5e90df (diff)
downloadbinary_translation-c7caefd0df41378801e48aaa3cda1270c312762a.tar.gz
Snap for 11812660 from 59addd2f7e65b35643dbe541cbd7a20d0b5e90df to sdk-release
Change-Id: I66aac65e8d03033faad94ce229a165b5195108ff
-rw-r--r--assembler/include/berberis/assembler/common_x86.h6
-rw-r--r--assembler/include/berberis/assembler/x86_32.h2
-rw-r--r--assembler/include/berberis/assembler/x86_64.h13
-rw-r--r--interpreter/riscv64/interpreter.h112
-rw-r--r--interpreter/riscv64/interpreter_test.cc1107
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h9
-rw-r--r--kernel_api/riscv64/open_emulation.cc37
-rw-r--r--tests/inline_asm_tests/Android.bp15
-rw-r--r--tests/inline_asm_tests/main_riscv64.cc290
-rw-r--r--tests/run_host_tests.mk23
10 files changed, 1347 insertions, 267 deletions
diff --git a/assembler/include/berberis/assembler/common_x86.h b/assembler/include/berberis/assembler/common_x86.h
index 86453678..c67ce55a 100644
--- a/assembler/include/berberis/assembler/common_x86.h
+++ b/assembler/include/berberis/assembler/common_x86.h
@@ -785,13 +785,9 @@ inline void AssemblerX86<Assembler>::Xchgl(Register dest, Register src) {
Register other = Assembler::IsAccumulator(src) ? dest : src;
EmitInstruction<Opcodes<0x90>>(Register32Bit(other));
} else {
- // Clang 8 (after r330298) swaps these two arguments. We are comparing output
+ // Clang 8 (after r330298) puts dest before src. We are comparing output
// to clang in exhaustive test thus we want to match clang behavior exactly.
-#if __clang_major__ >= 8
EmitInstruction<Opcodes<0x87>>(Register32Bit(dest), Register32Bit(src));
-#else
- EmitInstruction<Opcodes<0x87>>(Register32Bit(src), Register32Bit(dest));
-#endif
}
}
diff --git a/assembler/include/berberis/assembler/x86_32.h b/assembler/include/berberis/assembler/x86_32.h
index 40e87a2f..cde5c682 100644
--- a/assembler/include/berberis/assembler/x86_32.h
+++ b/assembler/include/berberis/assembler/x86_32.h
@@ -183,7 +183,7 @@ class Assembler : public AssemblerX86<Assembler> {
// Make sure only type void* can be passed to function below, not Label* or any other type.
template <typename T>
- auto Jmp(Condition cc, T* target) -> void = delete;
+ auto Jmp(T* target) -> void = delete;
void Jmp(const void* target) {
Emit8(0xe9);
diff --git a/assembler/include/berberis/assembler/x86_64.h b/assembler/include/berberis/assembler/x86_64.h
index ba343f86..c66cc1c7 100644
--- a/assembler/include/berberis/assembler/x86_64.h
+++ b/assembler/include/berberis/assembler/x86_64.h
@@ -179,7 +179,7 @@ class Assembler : public AssemblerX86<Assembler> {
// Make sure only type void* can be passed to function below, not Label* or any other type.
template <typename T>
- auto Jmp(Condition cc, T* target) -> void = delete;
+ auto Jmp(T* target) -> void = delete;
void Jmp(const void* target) {
// There are no jump instruction with properties we need thus we emulate it.
@@ -533,22 +533,15 @@ inline void Assembler::Xchgq(Register dest, Register src) {
// We compare output to that from clang and thus want to produce the same code.
// 0x48 0x90 is suboptimal encoding for that operation (pure 0x90 does the same
// and this is what gcc + gas are producing), but this is what clang <= 8 does.
-#if __clang_major__ >= 8
if (IsAccumulator(src) && IsAccumulator(dest)) {
Emit8(0x90);
- } else
-#endif
- if (IsAccumulator(src) || IsAccumulator(dest)) {
+ } else if (IsAccumulator(src) || IsAccumulator(dest)) {
Register other = IsAccumulator(src) ? dest : src;
EmitInstruction<Opcodes<0x90>>(Register64Bit(other));
} else {
- // Clang 8 (after r330298) swaps these two arguments. We are comparing output
+ // Clang 8 (after r330298) puts dest before src. We are comparing output
// to clang in exhaustive test thus we want to match clang behavior exactly.
-#if __clang_major__ >= 8
EmitInstruction<Opcodes<0x87>>(Register64Bit(dest), Register64Bit(src));
-#else
- EmitInstruction<Opcodes<0x87>>(Register64Bit(src), Register64Bit(dest));
-#endif
}
}
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index 0c7bd140..661e4d92 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -469,7 +469,7 @@ class Interpreter {
template <typename ElementType, VectorRegisterGroupMultiplier vlmul>
static constexpr size_t GetVlmax() {
- constexpr int kElementsCount = static_cast<int>(sizeof(SIMD128Register) / sizeof(ElementType));
+ constexpr size_t kElementsCount = sizeof(SIMD128Register) / sizeof(ElementType);
switch (vlmul) {
case VectorRegisterGroupMultiplier::k1register:
return kElementsCount;
@@ -923,8 +923,7 @@ class Interpreter {
if (!IsAligned<kIndexRegistersInvolved>(args.idx)) {
return Undefined();
}
- constexpr size_t kElementsCount =
- static_cast<int>(sizeof(SIMD128Register) / sizeof(IndexElementType));
+ constexpr size_t kElementsCount = sizeof(SIMD128Register) / sizeof(IndexElementType);
alignas(alignof(SIMD128Register))
IndexElementType indexes[kElementsCount * kIndexRegistersInvolved];
memcpy(indexes, state_->cpu.v + args.idx, sizeof(SIMD128Register) * kIndexRegistersInvolved);
@@ -1040,10 +1039,10 @@ class Interpreter {
if (!IsAligned<kNumRegistersInGroup>(dst)) {
return Undefined();
}
- if (dst + kNumRegistersInGroup * kSegmentSize >= 32) {
+ if (dst + kNumRegistersInGroup * kSegmentSize > 32) {
return Undefined();
}
- constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = 16 / sizeof(ElementType);
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
if constexpr (opcode == Decoder::VLUmOpOpcode::kVlm) {
@@ -1211,7 +1210,7 @@ class Interpreter {
auto vma,
typename GetElementIndexLambdaType>
void OpVectorGather(uint8_t dst, uint8_t src1, GetElementIndexLambdaType GetElementIndex) {
- constexpr int kRegistersInvolved = NumberOfRegistersInvolved(vlmul);
+ constexpr size_t kRegistersInvolved = NumberOfRegistersInvolved(vlmul);
if (!IsAligned<kRegistersInvolved>(dst | src1)) {
return Undefined();
}
@@ -1219,7 +1218,7 @@ class Interpreter {
if (dst < (src1 + kRegistersInvolved) && src1 < (dst + kRegistersInvolved)) {
return Undefined();
}
- constexpr int kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = 16 / sizeof(ElementType);
constexpr size_t vlmax = GetVlmax<ElementType, vlmul>();
size_t vstart = GetCsr<CsrName::kVstart>();
@@ -1323,6 +1322,10 @@ class Interpreter {
case Decoder::VOpFVfOpcode::kVfsgnjxvf:
return OpVectorvx<intrinsics::Vfsgnjxvx<ElementType>, ElementType, vlmul, vta, vma>(
args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfslide1upvf:
+ return OpVectorslide1up<ElementType, vlmul, vta, vma>(args.dst, args.src1, arg2);
+ case Decoder::VOpFVfOpcode::kVfslide1downvf:
+ return OpVectorslide1down<ElementType, vlmul, vta, vma>(args.dst, args.src1, arg2);
case Decoder::VOpFVfOpcode::kVfmvsf:
if constexpr (!std::is_same_v<decltype(vma), intrinsics::NoInactiveProcessing>) {
return Undefined();
@@ -1640,14 +1643,14 @@ class Interpreter {
vlmul,
vta,
vma,
- kFrm>(args.dst, args.src1, Vec<kNegativeZero>{args.src2});
+ kFrm>(args.dst, Vec<kNegativeZero>{args.src1}, args.src2);
} else {
return OpVectorvs<intrinsics::Vfredusumvs<ElementType>,
ElementType,
vlmul,
vta,
vma,
- kFrm>(args.dst, args.src1, Vec<kPositiveZero>{args.src2});
+ kFrm>(args.dst, Vec<kPositiveZero>{args.src1}, args.src2);
}
case Decoder::VOpFVvOpcode::kVfredosumvs:
// 14.3. Vector Single-Width Floating-Point Reduction Instructions:
@@ -1658,14 +1661,14 @@ class Interpreter {
vlmul,
vta,
vma,
- kFrm>(args.dst, args.src1, Vec<kNegativeZero>{args.src2});
+ kFrm>(args.dst, Vec<kNegativeZero>{args.src1}, args.src2);
} else {
return OpVectorvs<intrinsics::Vfredosumvs<ElementType>,
ElementType,
vlmul,
vta,
vma,
- kFrm>(args.dst, args.src1, Vec<kPositiveZero>{args.src2});
+ kFrm>(args.dst, Vec<kPositiveZero>{args.src1}, args.src2);
}
case Decoder::VOpFVvOpcode::kVfminvv:
return OpVectorvv<intrinsics::Vfminvv<ElementType>, ElementType, vlmul, vta, vma>(
@@ -1674,10 +1677,10 @@ class Interpreter {
// For Vfredmin the identity element is +inf.
return OpVectorvs<intrinsics::Vfredminvs<ElementType>, ElementType, vlmul, vta, vma>(
args.dst,
- args.src1,
Vec<UnsignedType{(sizeof(ElementType) == sizeof(Float32)) ? 0x7f80'0000
: 0x7ff0'0000'0000'0000}>{
- args.src2});
+ args.src1},
+ args.src2);
case Decoder::VOpFVvOpcode::kVfmaxvv:
return OpVectorvv<intrinsics::Vfmaxvv<ElementType>, ElementType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
@@ -1685,10 +1688,10 @@ class Interpreter {
// For Vfredmax the identity element is -inf.
return OpVectorvs<intrinsics::Vfredmaxvs<ElementType>, ElementType, vlmul, vta, vma>(
args.dst,
- args.src1,
Vec<UnsignedType{(sizeof(ElementType) == sizeof(Float32)) ? 0xff80'0000
: 0xfff0'0000'0000'0000}>{
- args.src2});
+ args.src1},
+ args.src2);
case Decoder::VOpFVvOpcode::kVfsgnjvv:
return OpVectorvv<intrinsics::Vfsgnjvv<ElementType>, ElementType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
@@ -2091,6 +2094,20 @@ class Interpreter {
case Decoder::VOpIVvOpcode::kVnsrlwv:
return OpVectorNarrowwv<intrinsics::Vnsrwv<UnsignedType>, UnsignedType, vlmul, vta, vma>(
args.dst, args.src1, args.src2);
+ case Decoder::VOpIVvOpcode::kVnclipuwv:
+ return OpVectorNarrowwv<intrinsics::Vnclipwv<SaturatingUnsignedType>,
+ SaturatingUnsignedType,
+ vlmul,
+ vta,
+ vma,
+ kVxrm>(args.dst, args.src1, args.src2);
+ case Decoder::VOpIVvOpcode::kVnclipwv:
+ return OpVectorNarrowwv<intrinsics::Vnclipwv<SaturatingSignedType>,
+ SaturatingSignedType,
+ vlmul,
+ vta,
+ vma,
+ kVxrm>(args.dst, args.src1, args.src2);
default:
Undefined();
}
@@ -2224,6 +2241,20 @@ class Interpreter {
case Decoder::VOpIVxOpcode::kVslidedownvx:
return OpVectorslidedown<ElementType, vlmul, vta, vma>(
args.dst, args.src1, MaybeTruncateTo<UnsignedType>(arg2));
+ case Decoder::VOpIVxOpcode::kVnclipuwx:
+ return OpVectorNarrowwx<intrinsics::Vnclipwx<SaturatingUnsignedType>,
+ SaturatingUnsignedType,
+ vlmul,
+ vta,
+ vma,
+ kVxrm>(args.dst, args.src1, MaybeTruncateTo<UnsignedType>(arg2));
+ case Decoder::VOpIVxOpcode::kVnclipwx:
+ return OpVectorNarrowwx<intrinsics::Vnclipwx<SaturatingSignedType>,
+ SaturatingSignedType,
+ vlmul,
+ vta,
+ vma,
+ kVxrm>(args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
default:
Undefined();
}
@@ -2267,35 +2298,35 @@ class Interpreter {
switch (args.opcode) {
case Decoder::VOpMVvOpcode::kVredsumvs:
return OpVectorvs<intrinsics::Vredsumvs<ElementType>, ElementType, vlmul, vta, vma>(
- args.dst, args.src1, Vec<ElementType{}>{args.src2});
+ args.dst, Vec<ElementType{}>{args.src1}, args.src2);
case Decoder::VOpMVvOpcode::kVredandvs:
return OpVectorvs<intrinsics::Vredandvs<ElementType>, ElementType, vlmul, vta, vma>(
- args.dst, args.src1, Vec<~ElementType{}>{args.src2});
+ args.dst, Vec<~ElementType{}>{args.src1}, args.src2);
case Decoder::VOpMVvOpcode::kVredorvs:
return OpVectorvs<intrinsics::Vredorvs<ElementType>, ElementType, vlmul, vta, vma>(
- args.dst, args.src1, Vec<ElementType{}>{args.src2});
+ args.dst, Vec<ElementType{}>{args.src1}, args.src2);
case Decoder::VOpMVvOpcode::kVredxorvs:
return OpVectorvs<intrinsics::Vredxorvs<ElementType>, ElementType, vlmul, vta, vma>(
- args.dst, args.src1, Vec<ElementType{}>{args.src2});
+ args.dst, Vec<ElementType{}>{args.src1}, args.src2);
case Decoder::VOpMVvOpcode::kVredminuvs:
return OpVectorvs<intrinsics::Vredminvs<UnsignedType>, UnsignedType, vlmul, vta, vma>(
args.dst,
- args.src1,
- Vec<UnsignedType{std::numeric_limits<typename UnsignedType::BaseType>::max()}>(
- args.src2));
+ Vec<UnsignedType{std::numeric_limits<typename UnsignedType::BaseType>::max()}>{
+ args.src1},
+ args.src2);
case Decoder::VOpMVvOpcode::kVredminvs:
return OpVectorvs<intrinsics::Vredminvs<SignedType>, SignedType, vlmul, vta, vma>(
args.dst,
- args.src1,
- Vec<SignedType{std::numeric_limits<typename SignedType::BaseType>::max()}>{args.src2});
+ Vec<SignedType{std::numeric_limits<typename SignedType::BaseType>::max()}>{args.src1},
+ args.src2);
case Decoder::VOpMVvOpcode::kVredmaxuvs:
return OpVectorvs<intrinsics::Vredmaxvs<UnsignedType>, UnsignedType, vlmul, vta, vma>(
- args.dst, args.src1, Vec<UnsignedType{}>{args.src2});
+ args.dst, Vec<UnsignedType{}>{args.src1}, args.src2);
case Decoder::VOpMVvOpcode::kVredmaxvs:
return OpVectorvs<intrinsics::Vredmaxvs<SignedType>, SignedType, vlmul, vta, vma>(
args.dst,
- args.src1,
- Vec<SignedType{std::numeric_limits<typename SignedType::BaseType>::min()}>{args.src2});
+ Vec<SignedType{std::numeric_limits<typename SignedType::BaseType>::min()}>{args.src1},
+ args.src2);
case Decoder::VOpMVvOpcode::kVaadduvv:
return OpVectorvv<intrinsics::Vaaddvv<UnsignedType>, UnsignedType, vlmul, vta, vma, kVxrm>(
args.dst, args.src1, args.src2);
@@ -2635,8 +2666,7 @@ class Interpreter {
if (!IsAligned<kIndexRegistersInvolved>(args.idx)) {
return Undefined();
}
- constexpr size_t kElementsCount =
- static_cast<int>(sizeof(SIMD128Register) / sizeof(IndexElementType));
+ constexpr size_t kElementsCount = sizeof(SIMD128Register) / sizeof(IndexElementType);
alignas(alignof(SIMD128Register))
IndexElementType indexes[kElementsCount * kIndexRegistersInvolved];
memcpy(indexes, state_->cpu.v + args.idx, sizeof(SIMD128Register) * kIndexRegistersInvolved);
@@ -2704,7 +2734,7 @@ class Interpreter {
if (data + kNumRegistersInGroup * kSegmentSize > 32) {
return Undefined();
}
- constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = 16 / sizeof(ElementType);
size_t vstart = GetCsr<CsrName::kVstart>();
size_t vl = GetCsr<CsrName::kVl>();
if constexpr (opcode == Decoder::VSUmOpOpcode::kVsm) {
@@ -2959,7 +2989,7 @@ class Interpreter {
if (!IsAligned<kRegistersInvolved>(dst | src)) {
return Undefined();
}
- constexpr size_t kElementsCount = static_cast<int>(16 / sizeof(ElementType));
+ constexpr size_t kElementsCount = 16 / sizeof(ElementType);
size_t vstart = GetCsr<CsrName::kVstart>();
SetCsr<CsrName::kVstart>(0);
// The usual property that no elements are written if vstart >= vl does not apply to these
@@ -3124,7 +3154,7 @@ class Interpreter {
auto vma,
CsrName... kExtraCsrs,
auto kDefaultElement>
- void OpVectorvs(uint8_t dst, uint8_t src1, Vec<kDefaultElement> src2) {
+ void OpVectorvs(uint8_t dst, Vec<kDefaultElement> src1, uint8_t src2) {
return OpVectorvs<Intrinsic,
ElementType,
NumberOfRegistersInvolved(vlmul),
@@ -3140,8 +3170,8 @@ class Interpreter {
auto vma,
CsrName... kExtraCsrs,
auto kDefaultElement>
- void OpVectorvs(uint8_t dst, uint8_t src1, Vec<kDefaultElement> src2) {
- if (!IsAligned<kRegistersInvolved>(dst | src2.start_no)) {
+ void OpVectorvs(uint8_t dst, Vec<kDefaultElement> src1, uint8_t src2) {
+ if (!IsAligned<kRegistersInvolved>(dst | src1.start_no)) {
return Undefined();
}
size_t vstart = GetCsr<CsrName::kVstart>();
@@ -3155,15 +3185,15 @@ class Interpreter {
return;
}
auto mask = GetMaskForVectorOperations<vma>();
- ElementType arg1 = SIMD128Register{state_->cpu.v[src1]}.Get<ElementType>(0);
+ ElementType init = SIMD128Register{state_->cpu.v[src2]}.Get<ElementType>(0);
for (size_t index = 0; index < kRegistersInvolved; ++index) {
- arg1 = std::get<0>(
+ init = std::get<0>(
Intrinsic(GetCsr<kExtraCsrs>()...,
- arg1,
- GetVectorArgument<ElementType, vta, vma>(src2, vstart, vl, index, mask)));
+ init,
+ GetVectorArgument<ElementType, vta, vma>(src1, vstart, vl, index, mask)));
}
SIMD128Register result{state_->cpu.v[dst]};
- result.Set(arg1, 0);
+ result.Set(init, 0);
result = std::get<0>(intrinsics::VectorMasking<ElementType, vta>(result, result, 0, 1));
state_->cpu.v[dst] = result.Get<__uint128_t>();
}
@@ -3593,8 +3623,8 @@ class Interpreter {
if (!IsAligned<kDestRegistersInvolved>(dst) || !IsAligned<kSourceRegistersInvolved>(src)) {
return Undefined();
}
- int vstart = GetCsr<CsrName::kVstart>();
- int vl = GetCsr<CsrName::kVl>();
+ size_t vstart = GetCsr<CsrName::kVstart>();
+ size_t vl = GetCsr<CsrName::kVl>();
// When vstart >= vl, there are no body elements, and no elements are updated in any destination
// vector register group, including that no tail elements are updated with agnostic values.
if (vstart >= vl) [[unlikely]] {
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 2feb230d..1cbdfcb2 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -1224,7 +1224,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
// instructions that work with double width floats.
// These instructions never use float registers though and thus we don't need to store
// anything into f1 register, if they are used.
- // For Float32/Float64 case we load 1.0 of the appropriate type into f1.
+ // For Float32/Float64 case we load 5.625 of the appropriate type into f1.
ASSERT_LE(vsew, 3);
if (vsew == 2) {
SetFReg<1>(state_.cpu, 0xffff'ffff'40b4'0000); // float 5.625
@@ -1341,19 +1341,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
(kTestVectorInstructionMode == TestVectorInstructionMode::kWidening),
8,
expected_result,
- [] {
- if constexpr (sizeof(ElementType) == sizeof(Int8)) {
- return kMaskInt8;
- } else if constexpr (sizeof(ElementType) == sizeof(Int16)) {
- return kMaskInt16;
- } else if constexpr (sizeof(ElementType) == sizeof(Int32)) {
- return kMaskInt32;
- } else if constexpr (sizeof(ElementType) == sizeof(Int64)) {
- return kMaskInt64;
- } else {
- static_assert(kDependentTypeFalse<ElementType>);
- }
- }()),
+ MaskForElem<ElementType>()),
Verify((insn_bytes &
~(0x01f00000 * (kTestVectorInstructionMode == TestVectorInstructionMode::kVMerge))) |
(1 << 25),
@@ -1528,11 +1516,9 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
- template <bool kIsMasked, typename ElementType>
+ template <typename ElementType>
auto MaskForElem() {
- if constexpr (!kIsMasked) {
- return kNoMask;
- } else if constexpr (std::is_same_v<ElementType, uint8_t>) {
+ if constexpr (std::is_same_v<ElementType, uint8_t>) {
return kMaskInt8;
} else if constexpr (std::is_same_v<ElementType, uint16_t>) {
return kMaskInt16;
@@ -1545,6 +1531,15 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
+ template <bool kIsMasked, typename ElementType>
+ auto MaskForElemIfMasked() {
+ if constexpr (!kIsMasked) {
+ return kNoMask;
+ } else {
+ return MaskForElem<ElementType>();
+ }
+ }
+
template <bool kIsMasked>
void TestVectorIota(uint32_t insn_bytes,
const uint8_t (&expected_result_int8)[8][16],
@@ -1662,7 +1657,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
(Verify(insn_bytes,
BitUtilLog2(sizeof(ElementType)),
expected_result,
- MaskForElem<kIsMasked, ElementType>()),
+ MaskForElemIfMasked<kIsMasked, ElementType>()),
...);
}
@@ -1931,6 +1926,49 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
}
+ void TestVectorFloatPermutationInstruction(uint32_t insn_bytes,
+ const uint32_t (&expected_result_int32)[8][4],
+ const uint64_t (&expected_result_int64)[8][2],
+ const __v2du (&source)[16],
+ uint8_t vlmul,
+ uint64_t skip = 0,
+ bool ignore_vma_for_last = false,
+ bool last_elem_is_f1 = false) {
+ TestVectorPermutationInstruction<TestVectorInstructionKind::kFloat>(insn_bytes,
+ source,
+ vlmul,
+ skip,
+ ignore_vma_for_last,
+ last_elem_is_f1,
+ /* regx1 */ 0x0,
+ expected_result_int32,
+ expected_result_int64);
+ }
+
+ void TestVectorPermutationInstruction(uint32_t insn_bytes,
+ const uint8_t (&expected_result_int8)[8][16],
+ const uint16_t (&expected_result_int16)[8][8],
+ const uint32_t (&expected_result_int32)[8][4],
+ const uint64_t (&expected_result_int64)[8][2],
+ const __v2du (&source)[16],
+ uint8_t vlmul,
+ uint64_t regx1 = 0x0,
+ uint64_t skip = 0,
+ bool ignore_vma_for_last = false,
+ bool last_elem_is_x1 = false) {
+ TestVectorPermutationInstruction<TestVectorInstructionKind::kInteger>(insn_bytes,
+ source,
+ vlmul,
+ skip,
+ ignore_vma_for_last,
+ last_elem_is_x1,
+ regx1,
+ expected_result_int8,
+ expected_result_int16,
+ expected_result_int32,
+ expected_result_int64);
+ }
+
// Unlike regular arithmetic instructions, the result of a permutation
// instruction depends also on vlmul. Also, the vslideup specs mention that
// the destination vector remains unchanged the first |offset| elements (in
@@ -1940,21 +1978,23 @@ class Riscv64InterpreterTest : public ::testing::Test {
//
// If |ignore_vma_for_last| is true, an inactive element at vl-1 will be
// treated as if vma=0 (Undisturbed).
- // If |last_elem_is_x1| is true, the last element of the vector in
+ // If |last_elem_is_reg1| is true, the last element of the vector in
// expected_result (that is, at vl-1) will be expected to be the same as
// |regx1| when VL < VMAX and said element is active.
- void TestVectorPermutationInstruction(uint32_t insn_bytes,
- const __v16qu (&expected_result_int8)[8],
- const __v8hu (&expected_result_int16)[8],
- const __v4su (&expected_result_int32)[8],
- const __v2du (&expected_result_int64)[8],
- const __v2du (&source)[16],
- uint8_t vlmul,
- uint64_t regx1 = 0x0,
- uint64_t skip = 0,
- bool ignore_vma_for_last = false,
- bool last_elem_is_x1 = false) {
- auto Verify = [this, &source, vlmul, regx1, skip, ignore_vma_for_last, last_elem_is_x1](
+ template <TestVectorInstructionKind kTestVectorInstructionKind,
+ typename... ElementType,
+ size_t... kResultsCount,
+ size_t... kElementCount>
+ void TestVectorPermutationInstruction(
+ uint32_t insn_bytes,
+ const __v2du (&source)[16],
+ uint8_t vlmul,
+ uint64_t skip,
+ bool ignore_vma_for_last,
+ bool last_elem_is_reg1,
+ uint64_t regx1,
+ const ElementType (&... expected_result)[kResultsCount][kElementCount]) {
+ auto Verify = [this, &source, vlmul, regx1, skip, ignore_vma_for_last, last_elem_is_reg1](
uint32_t insn_bytes,
uint8_t vsew,
const auto& expected_result_raw,
@@ -1965,8 +2005,24 @@ class Riscv64InterpreterTest : public ::testing::Test {
for (size_t index = 0; index < std::size(source); ++index) {
state_.cpu.v[16 + index] = SIMD128Register{source[index]}.Get<__uint128_t>();
}
- // Set x1 for vx instructions.
- SetXReg<1>(state_.cpu, regx1);
+
+ if constexpr (kTestVectorInstructionKind == TestVectorInstructionKind::kFloat) {
+ UNUSED(regx1);
+ // We only support Float32/Float64 for float instructions, but there are conversion
+ // instructions that work with double width floats.
+ // These instructions never use float registers though and thus we don't need to store
+ // anything into f1 register, if they are used.
+ // For Float32/Float64 case we load 5.625 of the appropriate type into f1.
+ ASSERT_LE(vsew, 3);
+ if (vsew == 2) {
+ SetFReg<1>(state_.cpu, 0xffff'ffff'40b4'0000); // float 5.625
+ } else if (vsew == 3) {
+ SetFReg<1>(state_.cpu, 0x4016'8000'0000'0000); // double 5.625
+ }
+ } else {
+ // Set x1 for vx instructions.
+ SetXReg<1>(state_.cpu, regx1);
+ }
const size_t kElementSize = 1 << vsew;
size_t num_regs = 1 << vlmul;
@@ -2037,7 +2093,7 @@ class Riscv64InterpreterTest : public ::testing::Test {
expected_result[index] = SIMD128Register{expected_result_raw[index]};
}
- if (vlmul == 2 && last_elem_is_x1) {
+ if (vlmul == 2 && last_elem_is_reg1) {
switch (kElementSize) {
case 1:
expected_result[last_reg].template Set<uint8_t>(
@@ -2048,12 +2104,22 @@ class Riscv64InterpreterTest : public ::testing::Test {
static_cast<uint16_t>(GetXReg<1>(state_.cpu)), last_elem);
break;
case 4:
- expected_result[last_reg].template Set<uint32_t>(
- static_cast<uint32_t>(GetXReg<1>(state_.cpu)), last_elem);
+ if constexpr (kTestVectorInstructionKind == TestVectorInstructionKind::kFloat) {
+ expected_result[last_reg].template Set<uint32_t>(
+ static_cast<uint32_t>(GetFReg<1>(state_.cpu)), last_elem);
+ } else {
+ expected_result[last_reg].template Set<uint32_t>(
+ static_cast<uint32_t>(GetXReg<1>(state_.cpu)), last_elem);
+ }
break;
case 8:
- expected_result[last_reg].template Set<uint64_t>(
- static_cast<uint64_t>(GetXReg<1>(state_.cpu)), last_elem);
+ if constexpr (kTestVectorInstructionKind == TestVectorInstructionKind::kFloat) {
+ expected_result[last_reg].template Set<uint64_t>(
+ static_cast<uint64_t>(GetFReg<1>(state_.cpu)), last_elem);
+ } else {
+ expected_result[last_reg].template Set<uint64_t>(
+ static_cast<uint64_t>(GetXReg<1>(state_.cpu)), last_elem);
+ }
break;
default:
FAIL() << "Element size is " << kElementSize;
@@ -2114,16 +2180,12 @@ class Riscv64InterpreterTest : public ::testing::Test {
}
};
- // Some instructions don't support use of mask register, but in these instructions bit
- // #25 is set. Test it and skip masking tests if so.
- Verify(insn_bytes, 0, expected_result_int8, kMaskInt8);
- Verify(insn_bytes, 1, expected_result_int16, kMaskInt16);
- Verify(insn_bytes, 2, expected_result_int32, kMaskInt32);
- Verify(insn_bytes, 3, expected_result_int64, kMaskInt64);
- Verify(insn_bytes | (1 << 25), 0, expected_result_int8, kNoMask);
- Verify(insn_bytes | (1 << 25), 1, expected_result_int16, kNoMask);
- Verify(insn_bytes | (1 << 25), 2, expected_result_int32, kNoMask);
- Verify(insn_bytes | (1 << 25), 3, expected_result_int64, kNoMask);
+ // Test with and without masking enabled.
+ (Verify(
+ insn_bytes, BitUtilLog2(sizeof(ElementType)), expected_result, MaskForElem<ElementType>()),
+ ...);
+ (Verify(insn_bytes | (1 << 25), BitUtilLog2(sizeof(ElementType)), expected_result, kNoMask),
+ ...);
}
protected:
@@ -2662,6 +2724,69 @@ TEST_F(Riscv64InterpreterTest, TestRNU) {
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}},
kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(0xb900c457, // Vnclipu.wx v8, v16, x1, v0.t
+ {{32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40},
+ {40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48},
+ {48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56},
+ {56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0021'c1a1, 0x0023'c3a3, 0x0025'c5a5, 0x0027'c7a7},
+ {0x0029'c9a9, 0x002b'cbab, 0x002d'cdad, 0x002f'cfaf},
+ {0x0031'd1b1, 0x0033'd3b3, 0x0035'd5b5, 0x0037'd7b7},
+ {0x0039'd9b9, 0x003b'dbbb, 0x003d'ddbd, 0x003f'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd00c457, // Vnclip.wx v8, v16, x1, v0.t
+ {{224, 225, 225, 226, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 231, 232},
+ {232, 233, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 239, 240},
+ {240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248},
+ {248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 0}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0xdfbf}},
+ {{0xffe1'c1a1, 0xffe3'c3a3, 0xffe5'c5a5, 0xffe7'c7a7},
+ {0xffe9'c9a9, 0xffeb'cbab, 0xffed'cdad, 0xffef'cfaf},
+ {0xfff1'd1b1, 0xfff3'd3b3, 0xfff5'd5b5, 0xfff7'd7b7},
+ {0xfff9'd9b9, 0xfffb'dbbb, 0xfffd'ddbd, 0xffff'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xb90c0457, // Vnclipu.wv v8, v16, v24, v0.t
+ {{255, 255, 255, 255, 69, 35, 9, 2, 255, 255, 255, 255, 153, 39, 10, 2},
+ {255, 255, 255, 255, 85, 43, 11, 3, 255, 255, 255, 255, 185, 47, 12, 3},
+ {255, 255, 255, 255, 101, 51, 13, 3, 255, 255, 255, 255, 217, 55, 14, 3},
+ {255, 255, 255, 255, 117, 59, 15, 4, 255, 255, 255, 255, 249, 63, 16, 4}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0x4989, 0x0971, 0x009b, 0x000a},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x5999, 0x0b73, 0x00bb, 0x000c},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x69a9, 0x0d75, 0x00db, 0x000e},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x79b9, 0x0f77, 0x00fb, 0x0010}},
+ {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xa726'a525, 0x0057'9757, 0x0000'5b9b, 0x0000'00bf},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xe766'e565, 0x0077'b777, 0x0000'7bbb, 0x0000'00ff}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd0c0457, // Vnclip.wv v8, v16, v24, v0.t
+ {{128, 128, 128, 128, 197, 227, 249, 254, 128, 128, 128, 128, 153, 231, 250, 254},
+ {128, 128, 128, 128, 213, 235, 251, 255, 128, 128, 128, 128, 185, 239, 252, 255},
+ {128, 128, 128, 128, 229, 243, 253, 255, 128, 128, 128, 128, 217, 247, 254, 255},
+ {128, 128, 128, 158, 245, 251, 255, 0, 128, 128, 128, 222, 249, 255, 0, 0}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0xc989, 0xf971, 0xff9b, 0xfffa},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xd999, 0xfb73, 0xffbb, 0xfffc},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xe9a9, 0xfd75, 0xffdb, 0xfffe},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xf9b9, 0xff77, 0xfffb, 0x0000}},
+ {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xa726'a525, 0xffd7'9757, 0xffff'db9b, 0xffff'ffbf},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xe766'e565, 0xfff7'b777, 0xffff'fbbb, 0xffff'ffff}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestRNE) {
@@ -2974,6 +3099,69 @@ TEST_F(Riscv64InterpreterTest, TestRNE) {
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}},
kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(0xb900c457, // Vnclipu.wx v8, v16, x1, v0.t
+ {{32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40},
+ {40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48},
+ {48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56},
+ {56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0021'c1a1, 0x0023'c3a3, 0x0025'c5a5, 0x0027'c7a7},
+ {0x0029'c9a9, 0x002b'cbab, 0x002d'cdad, 0x002f'cfaf},
+ {0x0031'd1b1, 0x0033'd3b3, 0x0035'd5b5, 0x0037'd7b7},
+ {0x0039'd9b9, 0x003b'dbbb, 0x003d'ddbd, 0x003f'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd00c457, // Vnclip.wx v8, v16, x1, v0.t
+ {{224, 225, 225, 226, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 231, 232},
+ {232, 233, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 239, 240},
+ {240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248},
+ {248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 0}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0xdfbf}},
+ {{0xffe1'c1a1, 0xffe3'c3a3, 0xffe5'c5a5, 0xffe7'c7a7},
+ {0xffe9'c9a9, 0xffeb'cbab, 0xffed'cdad, 0xffef'cfaf},
+ {0xfff1'd1b1, 0xfff3'd3b3, 0xfff5'd5b5, 0xfff7'd7b7},
+ {0xfff9'd9b9, 0xfffb'dbbb, 0xfffd'ddbd, 0xffff'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xb90c0457, // Vnclipu.wv v8, v16, v24, v0.t
+ {{255, 255, 255, 255, 69, 35, 9, 2, 255, 255, 255, 255, 153, 39, 10, 2},
+ {255, 255, 255, 255, 85, 43, 11, 3, 255, 255, 255, 255, 185, 47, 12, 3},
+ {255, 255, 255, 255, 101, 51, 13, 3, 255, 255, 255, 255, 217, 55, 14, 3},
+ {255, 255, 255, 255, 117, 59, 15, 4, 255, 255, 255, 255, 249, 63, 16, 4}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0x4989, 0x0971, 0x009b, 0x000a},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x5999, 0x0b73, 0x00bb, 0x000c},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x69a9, 0x0d75, 0x00db, 0x000e},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x79b9, 0x0f77, 0x00fb, 0x0010}},
+ {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xa726'a525, 0x0057'9757, 0x0000'5b9b, 0x0000'00bf},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xe766'e565, 0x0077'b777, 0x0000'7bbb, 0x0000'00ff}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd0c0457, // Vnclip.wv v8, v16, v24, v0.t
+ {{128, 128, 128, 128, 197, 227, 249, 254, 128, 128, 128, 128, 153, 231, 250, 254},
+ {128, 128, 128, 128, 213, 235, 251, 255, 128, 128, 128, 128, 185, 239, 252, 255},
+ {128, 128, 128, 128, 229, 243, 253, 255, 128, 128, 128, 128, 217, 247, 254, 255},
+ {128, 128, 128, 158, 245, 251, 255, 0, 128, 128, 128, 222, 249, 255, 0, 0}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0xc989, 0xf971, 0xff9b, 0xfffa},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xd999, 0xfb73, 0xffbb, 0xfffc},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xe9a9, 0xfd75, 0xffdb, 0xfffe},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xf9b9, 0xff77, 0xfffb, 0x0000}},
+ {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xa726'a525, 0xffd7'9757, 0xffff'db9b, 0xffff'ffbf},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xe766'e565, 0xfff7'b777, 0xffff'fbbb, 0xffff'ffff}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestRDN) {
@@ -3286,6 +3474,69 @@ TEST_F(Riscv64InterpreterTest, TestRDN) {
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}},
kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(0xb900c457, // Vnclipu.wx v8, v16, x1, v0.t
+ {{32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39},
+ {40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47},
+ {48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55},
+ {56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0021'c1a1, 0x0023'c3a3, 0x0025'c5a5, 0x0027'c7a7},
+ {0x0029'c9a9, 0x002b'cbab, 0x002d'cdad, 0x002f'cfaf},
+ {0x0031'd1b1, 0x0033'd3b3, 0x0035'd5b5, 0x0037'd7b7},
+ {0x0039'd9b9, 0x003b'dbbb, 0x003d'ddbd, 0x003f'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd00c457, // Vnclip.wx v8, v16, x1, v0.t
+ {{224, 224, 225, 225, 226, 226, 227, 227, 228, 228, 229, 229, 230, 230, 231, 231},
+ {232, 232, 233, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 239},
+ {240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247},
+ {248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0xdfbf}},
+ {{0xffe1'c1a1, 0xffe3'c3a3, 0xffe5'c5a5, 0xffe7'c7a7},
+ {0xffe9'c9a9, 0xffeb'cbab, 0xffed'cdad, 0xffef'cfaf},
+ {0xfff1'd1b1, 0xfff3'd3b3, 0xfff5'd5b5, 0xfff7'd7b7},
+ {0xfff9'd9b9, 0xfffb'dbbb, 0xfffd'ddbd, 0xffff'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xb90c0457, // Vnclipu.wv v8, v16, v24, v0.t
+ {{255, 255, 255, 255, 68, 34, 8, 2, 255, 255, 255, 255, 153, 38, 9, 2},
+ {255, 255, 255, 255, 84, 42, 10, 2, 255, 255, 255, 255, 185, 46, 11, 2},
+ {255, 255, 255, 255, 100, 50, 12, 3, 255, 255, 255, 255, 217, 54, 13, 3},
+ {255, 255, 255, 255, 116, 58, 14, 3, 255, 255, 255, 255, 249, 62, 15, 3}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0x4989, 0x0971, 0x009b, 0x0009},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x5999, 0x0b73, 0x00bb, 0x000b},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x69a9, 0x0d75, 0x00db, 0x000d},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x79b9, 0x0f77, 0x00fb, 0x000f}},
+ {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xa726'a524, 0x0057'9756, 0x0000'5b9b, 0x0000'00bf},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xe766'e564, 0x0077'b776, 0x0000'7bbb, 0x0000'00ff}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd0c0457, // Vnclip.wv v8, v16, v24, v0.t
+ {{128, 128, 128, 128, 196, 226, 248, 254, 128, 128, 128, 128, 153, 230, 249, 254},
+ {128, 128, 128, 128, 212, 234, 250, 254, 128, 128, 128, 128, 185, 238, 251, 254},
+ {128, 128, 128, 128, 228, 242, 252, 255, 128, 128, 128, 128, 217, 246, 253, 255},
+ {128, 128, 128, 157, 244, 250, 254, 255, 128, 128, 128, 221, 249, 254, 255, 255}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0xc989, 0xf971, 0xff9b, 0xfff9},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xd999, 0xfb73, 0xffbb, 0xfffb},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xe9a9, 0xfd75, 0xffdb, 0xfffd},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xf9b9, 0xff77, 0xfffb, 0xffff}},
+ {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xa726'a524, 0xffd7'9756, 0xffff'db9b, 0xffff'ffbf},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xe766'e564, 0xfff7'b776, 0xffff'fbbb, 0xffff'ffff}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestROD) {
@@ -3598,6 +3849,69 @@ TEST_F(Riscv64InterpreterTest, TestROD) {
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000}},
kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(0xb900c457, // Vnclipu.wx v8, v16, x1, v0.t
+ {{33, 33, 33, 33, 35, 35, 35, 35, 37, 37, 37, 37, 39, 39, 39, 39},
+ {41, 41, 41, 41, 43, 43, 43, 43, 45, 45, 45, 45, 47, 47, 47, 47},
+ {49, 49, 49, 49, 51, 51, 51, 51, 53, 53, 53, 53, 55, 55, 55, 55},
+ {57, 57, 57, 57, 59, 59, 59, 59, 61, 61, 61, 61, 63, 63, 63, 63}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}},
+ {{0x0021'c1a1, 0x0023'c3a3, 0x0025'c5a5, 0x0027'c7a7},
+ {0x0029'c9a9, 0x002b'cbab, 0x002d'cdad, 0x002f'cfaf},
+ {0x0031'd1b1, 0x0033'd3b3, 0x0035'd5b5, 0x0037'd7b7},
+ {0x0039'd9b9, 0x003b'dbbb, 0x003d'ddbd, 0x003f'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd00c457, // Vnclip.wx v8, v16, x1, v0.t
+ {{225, 225, 225, 225, 227, 227, 227, 227, 229, 229, 229, 229, 231, 231, 231, 231},
+ {233, 233, 233, 233, 235, 235, 235, 235, 237, 237, 237, 237, 239, 239, 239, 239},
+ {241, 241, 241, 241, 243, 243, 243, 243, 245, 245, 245, 245, 247, 247, 247, 247},
+ {249, 249, 249, 249, 251, 251, 251, 251, 253, 253, 253, 253, 255, 255, 255, 255}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0xdfbf}},
+ {{0xffe1'c1a1, 0xffe3'c3a3, 0xffe5'c5a5, 0xffe7'c7a7},
+ {0xffe9'c9a9, 0xffeb'cbab, 0xffed'cdad, 0xffef'cfaf},
+ {0xfff1'd1b1, 0xfff3'd3b3, 0xfff5'd5b5, 0xfff7'd7b7},
+ {0xfff9'd9b9, 0xfffb'dbbb, 0xfffd'ddbd, 0xffff'dfbf}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xb90c0457, // Vnclipu.wv v8, v16, v24, v0.t
+ {{255, 255, 255, 255, 69, 35, 9, 3, 255, 255, 255, 255, 153, 39, 9, 3},
+ {255, 255, 255, 255, 85, 43, 11, 3, 255, 255, 255, 255, 185, 47, 11, 3},
+ {255, 255, 255, 255, 101, 51, 13, 3, 255, 255, 255, 255, 217, 55, 13, 3},
+ {255, 255, 255, 255, 117, 59, 15, 3, 255, 255, 255, 255, 249, 63, 15, 3}},
+ {{0xffff, 0xffff, 0xffff, 0xffff, 0x4989, 0x0971, 0x009b, 0x0009},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x5999, 0x0b73, 0x00bb, 0x000b},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x69a9, 0x0d75, 0x00db, 0x000d},
+ {0xffff, 0xffff, 0xffff, 0xffff, 0x79b9, 0x0f77, 0x00fb, 0x000f}},
+ {{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xa726'a525, 0x0057'9757, 0x0000'5b9b, 0x0000'00bf},
+ {0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
+ {0xe766'e565, 0x0077'b777, 0x0000'7bbb, 0x0000'00ff}},
+ kVectorCalculationsSource);
+
+ TestNarrowingVectorInstruction(
+ 0xbd0c0457, // Vnclip.wv v8, v16, v24, v0.t
+ {{128, 128, 128, 128, 197, 227, 249, 255, 128, 128, 128, 128, 153, 231, 249, 255},
+ {128, 128, 128, 128, 213, 235, 251, 255, 128, 128, 128, 128, 185, 239, 251, 255},
+ {128, 128, 128, 128, 229, 243, 253, 255, 128, 128, 128, 128, 217, 247, 253, 255},
+ {128, 128, 128, 157, 245, 251, 255, 255, 128, 128, 128, 221, 249, 255, 255, 255}},
+ {{0x8000, 0x8000, 0x8000, 0x8000, 0xc989, 0xf971, 0xff9b, 0xfff9},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xd999, 0xfb73, 0xffbb, 0xfffb},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xe9a9, 0xfd75, 0xffdb, 0xfffd},
+ {0x8000, 0x8000, 0x8000, 0x8000, 0xf9b9, 0xff77, 0xfffb, 0xffff}},
+ {{0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xa726'a525, 0xffd7'9757, 0xffff'db9b, 0xffff'ffbf},
+ {0x8000'0000, 0x8000'0000, 0x8000'0000, 0x8000'0000},
+ {0xe766'e565, 0xfff7'b777, 0xffff'fbbb, 0xffff'ffff}},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVlXreXX) {
@@ -10061,284 +10375,568 @@ TEST_F(Riscv64InterpreterTest, TestVfsgnj) {
TEST_F(Riscv64InterpreterTest, TestVredsum) {
TestVectorReductionInstruction(
- 0x10c2457, // vredsum.vs v8,v16,v24,v0.t
+ 0x1882457, // vredsum.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{242, 228, 200, 144, /* unused */ 0, 146, 44, 121},
// expected_result_vd0_int16
{0x0172, 0x82e4, 0x88c8, 0xa090, /* unused */ 0, 0x1300, 0xa904, 0xe119},
// expected_result_vd0_int32
- {0xcb44'b932, 0x9407'71e4, 0xa70e'64c8, 0xd312'5090, /* unused */ 0, /* unused */ 0,
- 0x1907'1300, 0xb713'ad09},
+ {0xcb44'b932,
+ 0x9407'71e4,
+ 0xa70e'64c8,
+ 0xd312'5090,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0xb713'ad09},
// expected_result_vd0_int64
- {0xb32f'a926'9f1b'9511, 0x1f99'0d88'fb74'e962, 0xb92c'970e'74e8'52c4, 0xef4e'ad14'6aca'2888,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x2513'1f0e'1907'1300},
+ {0xb32f'a926'9f1b'9511,
+ 0x1f99'0d88'fb74'e962,
+ 0xb92c'970e'74e8'52c4,
+ 0xef4e'ad14'6aca'2888,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
// expected_result_vd0_with_mask_int8
{39, 248, 142, 27, /* unused */ 0, 0, 154, 210},
// expected_result_vd0_with_mask_int16
{0x5f45, 0xc22f, 0x99d0, 0x98bf, /* unused */ 0, 0x1300, 0x1300, 0x4b15},
// expected_result_vd0_with_mask_int32
- {0x2d38'1f29, 0x99a1'838a, 0x1989'ef5c, 0x9cf4'4aa1, /* unused */ 0, /* unused */ 0,
- 0x1907'1300, 0x1907'1300},
+ {0x2d38'1f29,
+ 0x99a1'838a,
+ 0x1989'ef5c,
+ 0x9cf4'4aa1,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0x1907'1300},
// expected_result_vd0_with_mask_int64
- {0x2513'1f0e'1907'1300, 0x917c'8370'7560'6751, 0x4e56'3842'222a'0c13, 0xc833'9e0e'73df'49b5,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x2513'1f0e'1907'1300},
+ {0x2513'1f0e'1907'1300,
+ 0x917c'8370'7560'6751,
+ 0x4e56'3842'222a'0c13,
+ 0xc833'9e0e'73df'49b5,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVfredosum) {
- TestVectorReductionInstruction(
- 0xd0c1457, // vfredosum.vs v8, v16, v24, v0.t
- // expected_result_vd0_int32
- {0x9e0c'9a8e, 0xbe2c'bace, 0xfe6c'fb4e, 0x7e6b'fc4d, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9e0c'9a8e},
- // expected_result_vd0_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xfe6c'fa69'f664'f260, 0x7eec'5def'0cee'0dee,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- // expected_result_vd0_with_mask_int32
- {0x9604'929d, 0xbe2c'ba29, 0xfe6c'fb4e, 0x7e6b'fa84, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9604'9200},
- // expected_result_vd0_with_mask_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xee7c'ea78'e674'e271, 0x6efc'4e0d'ee0d'ee0f,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- kVectorCalculationsSource);
+ TestVectorReductionInstruction(0xd881457, // vfredosum.vs v8, v24, v16, v0.t
+ // expected_result_vd0_int32
+ {0x9e0c'9a8e,
+ 0xbe2c'bace,
+ 0xfe6c'fb4e,
+ 0x7e6b'fc4d,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9e0c'9a8e},
+ // expected_result_vd0_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xfe6c'fa69'f664'f260,
+ 0x7eec'5def'0cee'0dee,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ // expected_result_vd0_with_mask_int32
+ {0x9604'929d,
+ 0xbe2c'ba29,
+ 0xfe6c'fb4e,
+ 0x7e6b'fa84,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9604'9200},
+ // expected_result_vd0_with_mask_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xee7c'ea78'e674'e271,
+ 0x6efc'4e0d'ee0d'ee0f,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ kVectorCalculationsSource);
}
// Currently Vfredusum is implemented as Vfredosum (as explicitly permitted by RVV 1.0).
// If we would implement some speedups which would change results then we may need to alter tests.
TEST_F(Riscv64InterpreterTest, TestVfredusum) {
- TestVectorReductionInstruction(
- 0x50c1457, // vfredusum.vs v8, v16, v24, v0.t
- // expected_result_vd0_int32
- {0x9e0c'9a8e, 0xbe2c'bace, 0xfe6c'fb4e, 0x7e6b'fc4d, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9e0c'9a8e},
- // expected_result_vd0_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xfe6c'fa69'f664'f260, 0x7eec'5def'0cee'0dee,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- // expected_result_vd0_with_mask_int32
- {0x9604'929d, 0xbe2c'ba29, 0xfe6c'fb4e, 0x7e6b'fa84, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9604'9200},
- // expected_result_vd0_with_mask_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xee7c'ea78'e674'e271, 0x6efc'4e0d'ee0d'ee0f,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- kVectorCalculationsSource);
+ TestVectorReductionInstruction(0x5881457, // vfredusum.vs v8, v24, v16, v0.t
+ // expected_result_vd0_int32
+ {0x9e0c'9a8e,
+ 0xbe2c'bace,
+ 0xfe6c'fb4e,
+ 0x7e6b'fc4d,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9e0c'9a8e},
+ // expected_result_vd0_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xfe6c'fa69'f664'f260,
+ 0x7eec'5def'0cee'0dee,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ // expected_result_vd0_with_mask_int32
+ {0x9604'929d,
+ 0xbe2c'ba29,
+ 0xfe6c'fb4e,
+ 0x7e6b'fa84,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9604'9200},
+ // expected_result_vd0_with_mask_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xee7c'ea78'e674'e271,
+ 0x6efc'4e0d'ee0d'ee0f,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredand) {
TestVectorReductionInstruction(
- 0x50c2457, // vredand.vs v8,v16,v24,v0.t
+ 0x5882457, // vredand.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{0, 0, 0, 0, /* unused */ 0, 0, 0, 0},
// expected_result_vd0_int16
{0x8000, 0x8000, 0x8000, 0x0000, /* unused */ 0, 0x8000, 0x8000, 0x8000},
// expected_result_vd0_int32
- {0x8200'8000, 0x8200'8000, 0x8200'8000, 0x0200'0000, /* unused */ 0, /* unused */ 0,
- 0x8200'8000, 0x8200'8000},
+ {0x8200'8000,
+ 0x8200'8000,
+ 0x8200'8000,
+ 0x0200'0000,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8200'8000,
+ 0x8200'8000},
// expected_result_vd0_int64
- {0x8604'8000'8200'8000, 0x8604'8000'8200'8000, 0x8604'8000'8200'8000, 0x0604'0000'0200'0000,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8604'8000'8200'8000},
+ {0x8604'8000'8200'8000,
+ 0x8604'8000'8200'8000,
+ 0x8604'8000'8200'8000,
+ 0x0604'0000'0200'0000,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8604'8000'8200'8000},
// expected_result_vd0_with_mask_int8
{0, 0, 0, 0, /* unused */ 0, 0, 0, 0},
// expected_result_vd0_with_mask_int16
{0x8000, 0x8000, 0x8000, 0x0000, /* unused */ 0, 0x8000, 0x8000, 0x8000},
// expected_result_vd0_with_mask_int32
- {0x8200'8000, 0x8200'8000, 0x8200'8000, 0x0200'0000, /* unused */ 0, /* unused */ 0,
- 0x8200'8000, 0x8200'8000},
+ {0x8200'8000,
+ 0x8200'8000,
+ 0x8200'8000,
+ 0x0200'0000,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8200'8000,
+ 0x8200'8000},
// expected_result_vd0_with_mask_int64
- {0x8604'8000'8200'8000, 0x8604'8000'8200'8000, 0x8604'8000'8200'8000, 0x0604'0000'0200'0000,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8604'8000'8200'8000},
+ {0x8604'8000'8200'8000,
+ 0x8604'8000'8200'8000,
+ 0x8604'8000'8200'8000,
+ 0x0604'0000'0200'0000,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8604'8000'8200'8000},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredor) {
TestVectorReductionInstruction(
- 0x90c2457, // vredor.vs v8,v16,v24,v0.t
+ 0x9882457, // vredor.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{159, 191, 255, 255, /* unused */ 0, 146, 150, 159},
// expected_result_vd0_int16
{0x9f1d, 0xbf3d, 0xff7d, 0xfffd, /* unused */ 0, 0x9300, 0x9704, 0x9f0d},
// expected_result_vd0_int32
- {0x9f1e'9b19, 0xbf3e'bb39, 0xff7e'fb79, 0xfffe'fbf9, /* unused */ 0, /* unused */ 0,
- 0x9706'9300, 0x9f0e'9b09},
+ {0x9f1e'9b19,
+ 0xbf3e'bb39,
+ 0xff7e'fb79,
+ 0xfffe'fbf9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9706'9300,
+ 0x9f0e'9b09},
// expected_result_vd0_int64
- {0x9f1e'9f1d'9716'9311, 0xbf3e'bf3d'b736'b331, 0xff7e'ff7d'f776'f371, 0xfffe'fffd'f7f6'f3f1,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9f0e'9f0d'9706'9300},
+ {0x9f1e'9f1d'9716'9311,
+ 0xbf3e'bf3d'b736'b331,
+ 0xff7e'ff7d'f776'f371,
+ 0xfffe'fffd'f7f6'f3f1,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9f0e'9f0d'9706'9300},
// expected_result_vd0_with_mask_int8
{159, 191, 255, 255, /* unused */ 0, 0, 150, 158},
// expected_result_vd0_with_mask_int16
{0x9f1d, 0xbf3d, 0xff7d, 0xfffd, /* unused */ 0, 0x9300, 0x9300, 0x9f0d},
// expected_result_vd0_with_mask_int32
- {0x9f1e'9b19, 0xbf3e'bb39, 0xff7e'fb79, 0xfffe'fbf9, /* unused */ 0, /* unused */ 0,
- 0x9706'9300, 0x9706'9300},
+ {0x9f1e'9b19,
+ 0xbf3e'bb39,
+ 0xff7e'fb79,
+ 0xfffe'fbf9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9706'9300,
+ 0x9706'9300},
// expected_result_vd0_with_mask_int64
- {0x9f0e'9f0d'9706'9300, 0xbf3e'bf3d'b736'b331, 0xff7e'ff7d'f776'f371, 0xfffe'fffd'f7f6'f3f1,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9f0e'9f0d'9706'9300},
+ {0x9f0e'9f0d'9706'9300,
+ 0xbf3e'bf3d'b736'b331,
+ 0xff7e'ff7d'f776'f371,
+ 0xfffe'fffd'f7f6'f3f1,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9f0e'9f0d'9706'9300},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredxor) {
TestVectorReductionInstruction(
- 0xd0c2457, // vredxor.vs v8,v16,v24,v0.t
+ 0xd882457, // vredxor.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{0, 0, 0, 0, /* unused */ 0, 146, 0, 1},
// expected_result_vd0_int16
{0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x1300, 0x8504, 0x8101},
// expected_result_vd0_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x8302'8100, /* unused */ 0, /* unused */ 0,
- 0x1506'1300, 0x8b0a'8909},
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1506'1300,
+ 0x8b0a'8909},
// expected_result_vd0_int64
- {0x9716'9515'9312'9111, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x190a'1f0d'1506'1300},
+ {0x9716'9515'9312'9111,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x190a'1f0d'1506'1300},
// expected_result_vd0_with_mask_int8
{143, 154, 150, 43, /* unused */ 0, 0, 146, 150},
// expected_result_vd0_with_mask_int16
{0x1f0d, 0xbd3d, 0x9514, 0x8d0d, /* unused */ 0, 0x1300, 0x1300, 0x1705},
// expected_result_vd0_with_mask_int32
- {0x1d0e'1b09, 0x0d1e'0b18, 0xfb7a'f978, 0xab2a'a929, /* unused */ 0, /* unused */ 0,
- 0x1506'1300, 0x1506'1300},
+ {0x1d0e'1b09,
+ 0x0d1e'0b18,
+ 0xfb7a'f978,
+ 0xab2a'a929,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1506'1300,
+ 0x1506'1300},
// expected_result_vd0_with_mask_int64
- {0x190a'1f0d'1506'1300, 0x091a'0f1c'0516'0311, 0x293a'2f3c'2536'2331, 0x77f6'75f5'73f2'71f1,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x190a'1f0d'1506'1300},
+ {0x190a'1f0d'1506'1300,
+ 0x091a'0f1c'0516'0311,
+ 0x293a'2f3c'2536'2331,
+ 0x77f6'75f5'73f2'71f1,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x190a'1f0d'1506'1300},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredminu) {
TestVectorReductionInstruction(
- 0x110c2457, // vredminu.vs v8,v16,v24,v0.t
+ 0x11882457, // vredminu.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{0, 0, 0, 0, /* unused */ 0, 0, 0, 0},
// expected_result_vd0_int16
{0x8100, 0x8100, 0x8100, 0x0291, /* unused */ 0, 0x8100, 0x8100, 0x8100},
// expected_result_vd0_int32
- {0x83028100, 0x83028100, 0x83028100, 0x06940291, /* unused */ 0, /* unused */ 0, 0x83028100,
+ {0x83028100,
+ 0x83028100,
+ 0x83028100,
+ 0x06940291,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x83028100,
0x83028100},
// expected_result_vd0_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x0e9c'0a98'0694'0291,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x0e9c'0a98'0694'0291,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
// expected_result_vd0_with_mask_int8
{0, 0, 0, 0, /* unused */ 0, 0, 0, 0},
// expected_result_vd0_with_mask_int16
{0x8100, 0x8100, 0x8100, 0x0291, /* unused */ 0, 0x8100, 0x8100, 0x8100},
// expected_result_vd0_with_mask_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x0e9c'0a98, /* unused */ 0, /* unused */ 0,
- 0x8302'8100, 0x8302'8100},
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x0e9c'0a98,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8302'8100,
+ 0x8302'8100},
// expected_result_vd0_with_mask_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x1e8c'1a89'1684'1280,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x1e8c'1a89'1684'1280,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredmin) {
TestVectorReductionInstruction(
- 0x150c2457, // vredmin.vs v8,v16,v24,v0.t
+ 0x15882457, // vredmin.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{130, 130, 130, 128, /* unused */ 0, 146, 146, 146},
// expected_result_vd0_int16
{0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x8100, 0x8100, 0x8100},
// expected_result_vd0_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x8302'8100, /* unused */ 0, /* unused */ 0,
- 0x8302'8100, 0x8302'8100},
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8302'8100,
+ 0x8302'8100},
// expected_result_vd0_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
// expected_result_vd0_with_mask_int8
{138, 138, 138, 128, /* unused */ 0, 0, 150, 150},
// expected_result_vd0_with_mask_int16
{0x8100, 0x8100, 0x8100, 0x8100, /* unused */ 0, 0x8100, 0x8100, 0x8100},
// expected_result_vd0_with_mask_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x8302'8100, /* unused */ 0, /* unused */ 0,
- 0x8302'8100, 0x8302'8100},
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8302'8100,
+ 0x8302'8100},
// expected_result_vd0_with_mask_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVfredmin) {
- TestVectorReductionInstruction(
- 0x150c1457, // vfredmin.vs v8, v16, v24, v0.t
- // expected_result_vd0_int32
- {0x9e0c'9a09, 0xbe2c'ba29, 0xfe6c'fa69, 0xfe6c'fa69, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9e0c'9a09},
- // expected_result_vd0_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xfe6c'fa69'f664'f260, 0xfe6c'fa69'f664'f260,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- // expected_result_vd0_with_mask_int32
- {0x9604'9200, 0xbe2c'ba29, 0xfe6c'fa69, 0xfe6c'fa69, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9604'9200},
- // expected_result_vd0_with_mask_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xee7c'ea78'e674'e271, 0xee7c'ea78'e674'e271,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
- kVectorCalculationsSource);
+ TestVectorReductionInstruction(0x15881457, // vfredmin.vs v8, v24, v16, v0.t
+ // expected_result_vd0_int32
+ {0x9e0c'9a09,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0xfe6c'fa69,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9e0c'9a09},
+ // expected_result_vd0_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xfe6c'fa69'f664'f260,
+ 0xfe6c'fa69'f664'f260,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ // expected_result_vd0_with_mask_int32
+ {0x9604'9200,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0xfe6c'fa69,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9604'9200},
+ // expected_result_vd0_with_mask_int64
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xee7c'ea78'e674'e271,
+ 0xee7c'ea78'e674'e271,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
+ kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredmaxu) {
TestVectorReductionInstruction(
- 0x190c2457, // vredmaxu.vs v8,v16,v24,v0.t
+ 0x19882457, // vredmaxu.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{158, 190, 254, 254, /* unused */ 0, 146, 150, 158},
// expected_result_vd0_int16
{0x9e0c, 0xbe2c, 0xfe6c, 0xfe6c, /* unused */ 0, 0x9200, 0x9604, 0x9e0c},
// expected_result_vd0_int32
- {0x9e0c'9a09, 0xbe2c'ba29, 0xfe6c'fa69, 0xfe6c'fa69, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9e0c'9a09},
+ {0x9e0c'9a09,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0xfe6c'fa69,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9e0c'9a09},
// expected_result_vd0_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xfe6c'fa69'f664'f260, 0xfe6c'fa69'f664'f260,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xfe6c'fa69'f664'f260,
+ 0xfe6c'fa69'f664'f260,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
// expected_result_vd0_with_mask_int8
{158, 186, 254, 254, /* unused */ 0, 0, 150, 158},
// expected_result_vd0_with_mask_int16
{0x9e0c, 0xba29, 0xfe6c, 0xfe6c, /* unused */ 0, 0x9200, 0x9200, 0x9e0c},
// expected_result_vd0_with_mask_int32
- {0x9604'9200, 0xbe2c'ba29, 0xfe6c'fa69, 0xfe6c'fa69, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9604'9200},
+ {0x9604'9200,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0xfe6c'fa69,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9604'9200},
// expected_result_vd0_with_mask_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xee7c'ea78'e674'e271, 0xee7c'ea78'e674'e271,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xee7c'ea78'e674'e271,
+ 0xee7c'ea78'e674'e271,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVredmax) {
TestVectorReductionInstruction(
- 0x1d0c2457, // vredmax.vs v8,v16,v24,v0.t
+ 0x1d882457, // vredmax.vs v8,v24,v16,v0.t
// expected_result_vd0_int8
{28, 60, 124, 126, /* unused */ 0, 0, 4, 12},
// expected_result_vd0_int16
{0x9e0c, 0xbe2c, 0xfe6c, 0x7eec, /* unused */ 0, 0x9200, 0x9604, 0x9e0c},
// expected_result_vd0_int32
- {0x9e0c'9a09, 0xbe2c'ba29, 0xfe6c'fa69, 0x7eec'7ae9, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9e0c'9a09},
+ {0x9e0c'9a09,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0x7eec'7ae9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9e0c'9a09},
// expected_result_vd0_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xfe6c'fa69'f664'f260, 0x7eec'7ae9'76e4'72e0,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xfe6c'fa69'f664'f260,
+ 0x7eec'7ae9'76e4'72e0,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
// expected_result_vd0_with_mask_int8
{24, 52, 124, 126, /* unused */ 0, 0, 4, 4},
// expected_result_vd0_with_mask_int16
{0x9e0c, 0xba29, 0xfe6c, 0x7ae9, /* unused */ 0, 0x9200, 0x9200, 0x9e0c},
// expected_result_vd0_with_mask_int32
- {0x9604'9200, 0xbe2c'ba29, 0xfe6c'fa69, 0x7eec'7ae9, /* unused */ 0, /* unused */ 0,
- 0x9604'9200, 0x9604'9200},
+ {0x9604'9200,
+ 0xbe2c'ba29,
+ 0xfe6c'fa69,
+ 0x7eec'7ae9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9604'9200,
+ 0x9604'9200},
// expected_result_vd0_with_mask_int64
- {0x9e0c'9a09'9604'9200, 0xbe2c'ba29'b624'b220, 0xee7c'ea78'e674'e271, 0x6efc'6af8'66f4'62f1,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x9e0c'9a09'9604'9200},
+ {0x9e0c'9a09'9604'9200,
+ 0xbe2c'ba29'b624'b220,
+ 0xee7c'ea78'e674'e271,
+ 0x6efc'6af8'66f4'62f1,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x9e0c'9a09'9604'9200},
kVectorCalculationsSource);
}
TEST_F(Riscv64InterpreterTest, TestVfredmax) {
- TestVectorReductionInstruction(
- 0x1d0c1457, // vfredmax.vs v8, v16, v24, v0.t
- // expected_result_vd0_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x7eec'7ae9, /* unused */ 0, /* unused */ 0,
- 0x8302'8100, 0x8302'8100},
- // expected_result_vd0_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x7eec'7ae9'76e4'72e0,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
- // expected_result_vd0_with_mask_int32
- {0x8302'8100, 0x8302'8100, 0x8302'8100, 0x7eec'7ae9, /* unused */ 0, /* unused */ 0,
- 0x8302'8100, 0x8302'8100},
- // expected_result_vd0_with_mask_int64
- {0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x8706'8504'8302'8100, 0x6efc'6af8'66f4'62f1,
- /* unused */ 0, /* unused */ 0, /* unused */ 0, 0x8706'8504'8302'8100},
- kVectorCalculationsSource);
+ TestVectorReductionInstruction(0x1d881457, // vfredmax.vs v8, v24, v16, v0.t
+ // expected_result_vd0_int32
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x7eec'7ae9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8302'8100,
+ 0x8302'8100},
+ // expected_result_vd0_int64
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x7eec'7ae9'76e4'72e0,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
+ // expected_result_vd0_with_mask_int32
+ {0x8302'8100,
+ 0x8302'8100,
+ 0x8302'8100,
+ 0x7eec'7ae9,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8302'8100,
+ 0x8302'8100},
+ // expected_result_vd0_with_mask_int64
+ {0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x8706'8504'8302'8100,
+ 0x6efc'6af8'66f4'62f1,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x8706'8504'8302'8100},
+ kVectorCalculationsSource);
}
// Note that the expected test outputs for v[f]merge.vXm are identical to those for v[f]mv.v.X.
@@ -12168,6 +12766,155 @@ TEST_F(Riscv64InterpreterTest, TestVslide1down) {
/*last_elem_is_x1=*/true);
}
+TEST_F(Riscv64InterpreterTest, TestVfslide1up) {
+ TestVectorFloatInstruction(0x3980d457, // vfslide1up.vf v8, v24, f1, v0.t
+ {{0x40b4'0000, 0x9604'9200, 0x9e0c'9a09, 0x8614'8211},
+ {0x8e1c'8a18, 0xb624'b220, 0xbe2c'ba29, 0xa634'a231},
+ {0xae3c'aa38, 0xd644'd240, 0xde4c'da49, 0xc654'c251},
+ {0xce5c'ca58, 0xf664'f260, 0xfe6c'fa69, 0xe674'e271},
+ {0xee7c'ea78, 0x1684'1280, 0x1e8c'1a89, 0x0694'0291},
+ {0x0e9c'0a98, 0x36a4'32a0, 0x3eac'3aa9, 0x26b4'22b1},
+ {0x2ebc'2ab8, 0x56c4'52c0, 0x5ecc'5ac9, 0x46d4'42d1},
+ {0x4edc'4ad8, 0x76e4'72e0, 0x7eec'7ae9, 0x66f4'62f1}},
+ {{0x4016'8000'0000'0000, 0x9e0c'9a09'9604'9200},
+ {0x8e1c'8a18'8614'8211, 0xbe2c'ba29'b624'b220},
+ {0xae3c'aa38'a634'a231, 0xde4c'da49'd644'd240},
+ {0xce5c'ca58'c654'c251, 0xfe6c'fa69'f664'f260},
+ {0xee7c'ea78'e674'e271, 0x1e8c'1a89'1684'1280},
+ {0x0e9c'0a98'0694'0291, 0x3eac'3aa9'36a4'32a0},
+ {0x2ebc'2ab8'26b4'22b1, 0x5ecc'5ac9'56c4'52c0},
+ {0x4edc'4ad8'46d4'42d1, 0x7eec'7ae9'76e4'72e0}},
+ kVectorCalculationsSource);
+}
+
+TEST_F(Riscv64InterpreterTest, TestVfslide1down) {
+ // Where the element at the top gets inserted will depend on VLMUL so we use
+ // TestVectorFloatPermutationInstruction instead of TestVectorFloatInstruction.
+
+ // VLMUL = 0
+ TestVectorFloatPermutationInstruction(
+ 0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x9e0c'9a09, 0x8614'8211, 0x8e1c'8a18, 0x40b4'0000}, {}, {}, {}, {}, {}, {}, {}},
+ {{0x8e1c'8a18'8614'8211, 0x4016'8000'0000'0000}, {}, {}, {}, {}, {}, {}, {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/0,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 1
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x9e0c'9a09, 0x8614'8211, 0x8e1c'8a18, 0xb624'b220},
+ {0xbe2c'ba29, 0xa634'a231, 0xae3c'aa38, 0x40b4'0000},
+ {},
+ {},
+ {},
+ {},
+ {},
+ {}},
+ {{0x8e1c'8a18'8614'8211, 0xbe2c'ba29'b624'b220},
+ {0xae3c'aa38'a634'a231, 0x4016'8000'0000'0000},
+ {},
+ {},
+ {},
+ {},
+ {},
+ {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/1,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 2
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x9e0c'9a09, 0x8614'8211, 0x8e1c'8a18, 0xb624'b220},
+ {0xbe2c'ba29, 0xa634'a231, 0xae3c'aa38, 0xd644'd240},
+ {0xde4c'da49, 0xc654'c251, 0xce5c'ca58, 0xf664'f260},
+ {0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78, 0x40b4'0000},
+ {},
+ {},
+ {},
+ {}},
+ {{0x8e1c'8a18'8614'8211, 0xbe2c'ba29'b624'b220},
+ {0xae3c'aa38'a634'a231, 0xde4c'da49'd644'd240},
+ {0xce5c'ca58'c654'c251, 0xfe6c'fa69'f664'f260},
+ {0xee7c'ea78'e674'e271, 0x4016'8000'0000'0000},
+ {},
+ {},
+ {},
+ {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/2,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 3
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x9e0c'9a09, 0x8614'8211, 0x8e1c'8a18, 0xb624'b220},
+ {0xbe2c'ba29, 0xa634'a231, 0xae3c'aa38, 0xd644'd240},
+ {0xde4c'da49, 0xc654'c251, 0xce5c'ca58, 0xf664'f260},
+ {0xfe6c'fa69, 0xe674'e271, 0xee7c'ea78, 0x1684'1280},
+ {0x1e8c'1a89, 0x0694'0291, 0x0e9c'0a98, 0x36a4'32a0},
+ {0x3eac'3aa9, 0x26b4'22b1, 0x2ebc'2ab8, 0x56c4'52c0},
+ {0x5ecc'5ac9, 0x46d4'42d1, 0x4edc'4ad8, 0x76e4'72e0},
+ {0x7eec'7ae9, 0x66f4'62f1, 0x6efc'6af8, 0x40b4'0000}},
+ {{0x8e1c'8a18'8614'8211, 0xbe2c'ba29'b624'b220},
+ {0xae3c'aa38'a634'a231, 0xde4c'da49'd644'd240},
+ {0xce5c'ca58'c654'c251, 0xfe6c'fa69'f664'f260},
+ {0xee7c'ea78'e674'e271, 0x1e8c'1a89'1684'1280},
+ {0x0e9c'0a98'0694'0291, 0x3eac'3aa9'36a4'32a0},
+ {0x2ebc'2ab8'26b4'22b1, 0x5ecc'5ac9'56c4'52c0},
+ {0x4edc'4ad8'46d4'42d1, 0x7eec'7ae9'76e4'72e0},
+ {0x6efc'6af8'66f4'62f1, 0x4016'8000'0000'0000}},
+ kVectorCalculationsSource,
+ /*vlmul=*/3,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 4
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{}, {}, {}, {}, {}, {}, {}, {}},
+ {{}, {}, {}, {}, {}, {}, {}, {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/4,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 5
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{}, {}, {}, {}, {}, {}, {}, {}},
+ {{}, {}, {}, {}, {}, {}, {}, {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/5,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 6
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x40b4'0000}, {}, {}, {}, {}, {}, {}, {}},
+ {{}, {}, {}, {}, {}, {}, {}, {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/6,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+
+ // VLMUL = 7
+ TestVectorFloatPermutationInstruction(0x3d80d457, // vfslide1down.vf v8, v24, f1, v0.t
+ {{0x9e0c'9a09, 0x40b4'0000}, {}, {}, {}, {}, {}, {}, {}},
+ {{0x4016'8000'0000'0000}, {}, {}, {}, {}, {}, {}, {}},
+ kVectorCalculationsSource,
+ /*vlmul=*/7,
+ /*skip=*/0,
+ /*ignore_vma_for_last=*/true,
+ /*last_elem_is_f1=*/true);
+}
+
TEST_F(Riscv64InterpreterTest, TestVwadd) {
TestWideningVectorInstruction(0xc50c2457, // vwadd.vv v8,v16,v24,v0.t
{{0x0000, 0xff13, 0x0006, 0xff19, 0x000d, 0xff1f, 0x0012, 0xff25},
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index 27353bf3..2019aa6b 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -873,6 +873,11 @@ std::tuple<ElementType> WideMultiplySignedUnsigned(ElementType arg1, ElementType
DEFINE_W_ARITHMETIC_INTRINSIC(Vn##name##wx, Narrowwv, return ({ __VA_ARGS__; }); \
, (SIMD128Register src1, ElementType src2), (), (src1, src2))
+#define DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WV(name, ...) \
+ DEFINE_W_ARITHMETIC_INTRINSIC( \
+ Vn##name##wv, Narrowwv, return ({ __VA_ARGS__; }); \
+ , (int8_t csr, SIMD128Register src1, SIMD128Register src2), (csr), (src1, src2))
+
#define DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX(name, ...) \
DEFINE_W_ARITHMETIC_INTRINSIC( \
Vn##name##wx, Narrowwv, return ({ __VA_ARGS__; }); \
@@ -1103,6 +1108,10 @@ DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV(sr, auto [arg1, arg2] = std::tuple{arg
(arg1 >> arg2))
DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WX(sr, auto [arg1, arg2] = std::tuple{args...};
(arg1 >> arg2))
+DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WV(
+ clip,
+ WideType<ElementType>{(std::get<0>(
+ Roundoff(csr, static_cast<typename WideType<ElementType>::BaseType>(args)...)))})
DEFINE_2OP_1CSR_NARROW_ARITHMETIC_INTRINSIC_WX(
clip,
WideType<ElementType>{(std::get<0>(
diff --git a/kernel_api/riscv64/open_emulation.cc b/kernel_api/riscv64/open_emulation.cc
index dacf77ad..e2257df1 100644
--- a/kernel_api/riscv64/open_emulation.cc
+++ b/kernel_api/riscv64/open_emulation.cc
@@ -25,10 +25,7 @@
#include "berberis/kernel_api/tracing.h"
-#define GUEST_O_DIRECTORY 00040000
-#define GUEST_O_NOFOLLOW 00100000
-#define GUEST_O_DIRECT 00200000
-#define GUEST_O_LARGEFILE 00400000
+#define GUEST_O_LARGEFILE 00100000
namespace berberis {
@@ -55,7 +52,7 @@ namespace berberis {
static_assert((O_ACCMODE & ~O_SEARCH) == 00000003);
-// These flags should have the same value on all architectures.
+// These flags should have the same value on guest and host architectures.
static_assert(O_CREAT == 00000100);
static_assert(O_EXCL == 00000200);
static_assert(O_NOCTTY == 00000400);
@@ -65,7 +62,10 @@ static_assert(O_NONBLOCK == 00004000);
static_assert(O_DSYNC == 00010000);
static_assert(FASYNC == 00020000);
static_assert(O_NOATIME == 01000000);
+static_assert(O_DIRECTORY == 0200000);
+static_assert(O_NOFOLLOW == 00400000);
static_assert(O_CLOEXEC == 02000000);
+static_assert(O_DIRECT == 040000);
static_assert(__O_SYNC == 04000000);
static_assert(O_SYNC == (O_DSYNC | __O_SYNC));
static_assert(O_PATH == 010000000);
@@ -73,14 +73,13 @@ static_assert(O_PATH == 010000000);
namespace {
const int kCompatibleOpenFlags = O_ACCMODE | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | O_APPEND |
- O_NONBLOCK | O_DSYNC | FASYNC | O_NOATIME | O_CLOEXEC | __O_SYNC |
- O_PATH;
+ O_NONBLOCK | O_DSYNC | FASYNC | O_NOATIME | O_DIRECTORY |
+ O_NOFOLLOW | O_CLOEXEC | O_DIRECT | __O_SYNC | O_PATH;
} // namespace
int ToHostOpenFlags(int guest_flags) {
- const int kIncompatibleGuestOpenFlags =
- GUEST_O_DIRECTORY | GUEST_O_NOFOLLOW | GUEST_O_DIRECT | GUEST_O_LARGEFILE;
+ const int kIncompatibleGuestOpenFlags = GUEST_O_LARGEFILE;
int unknown_guest_flags = guest_flags & ~(kCompatibleOpenFlags | kIncompatibleGuestOpenFlags);
if (unknown_guest_flags) {
@@ -91,15 +90,6 @@ int ToHostOpenFlags(int guest_flags) {
int host_flags = guest_flags & ~kIncompatibleGuestOpenFlags;
- if (guest_flags & GUEST_O_DIRECTORY) {
- host_flags |= O_DIRECTORY;
- }
- if (guest_flags & GUEST_O_NOFOLLOW) {
- host_flags |= O_NOFOLLOW;
- }
- if (guest_flags & GUEST_O_DIRECT) {
- host_flags |= O_DIRECT;
- }
if (guest_flags & GUEST_O_LARGEFILE) {
host_flags |= O_LARGEFILE;
}
@@ -108,7 +98,7 @@ int ToHostOpenFlags(int guest_flags) {
}
int ToGuestOpenFlags(int host_flags) {
- const int kIncompatibleHostOpenFlags = O_DIRECTORY | O_NOFOLLOW | O_DIRECT | O_LARGEFILE;
+ const int kIncompatibleHostOpenFlags = O_LARGEFILE;
int unknown_host_flags = host_flags & ~(kCompatibleOpenFlags | kIncompatibleHostOpenFlags);
if (unknown_host_flags) {
@@ -119,15 +109,6 @@ int ToGuestOpenFlags(int host_flags) {
int guest_flags = host_flags & ~kIncompatibleHostOpenFlags;
- if (host_flags & O_DIRECTORY) {
- guest_flags |= GUEST_O_DIRECTORY;
- }
- if (host_flags & O_NOFOLLOW) {
- guest_flags |= GUEST_O_NOFOLLOW;
- }
- if (host_flags & O_DIRECT) {
- guest_flags |= GUEST_O_DIRECT;
- }
if (host_flags & O_LARGEFILE) {
guest_flags |= GUEST_O_LARGEFILE;
}
diff --git a/tests/inline_asm_tests/Android.bp b/tests/inline_asm_tests/Android.bp
index bba729f6..90e082e5 100644
--- a/tests/inline_asm_tests/Android.bp
+++ b/tests/inline_asm_tests/Android.bp
@@ -48,3 +48,18 @@ cc_test {
},
static_executable: true,
}
+
+cc_test {
+ name: "inline_asm_tests_riscv64",
+ native_bridge_supported: true,
+ enabled: false,
+ arch: {
+ riscv64: {
+ enabled: true,
+ srcs: [
+ "main_riscv64.cc",
+ ],
+ },
+ },
+ static_executable: true,
+}
diff --git a/tests/inline_asm_tests/main_riscv64.cc b/tests/inline_asm_tests/main_riscv64.cc
new file mode 100644
index 00000000..694909a4
--- /dev/null
+++ b/tests/inline_asm_tests/main_riscv64.cc
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include <cstdint>
+#include <tuple>
+
+namespace {
+
+template <typename T>
+constexpr T BitUtilLog2(T x) {
+ return __builtin_ctz(x);
+}
+
+// TODO(b/301577077): Maybe use __uint128_t instead.
+// Or provide a more versatile wrapper, that one can easily init, copy and compare.
+using __v2du = uint64_t[2];
+
+constexpr __v2du kVectorCalculationsSource[16] = {
+ {0x8706'8504'8302'8100, 0x8f0e'8d0c'8b0a'8908},
+ {0x9716'9514'9312'9110, 0x9f1e'9d1c'9b1a'9918},
+ {0xa726'a524'a322'a120, 0xaf2e'ad2c'ab2a'a928},
+ {0xb736'b534'b332'b130, 0xbf3e'bd3c'bb3a'b938},
+ {0xc746'c544'c342'c140, 0xcf4e'cd4c'cb4a'c948},
+ {0xd756'd554'd352'd150, 0xdf5e'dd5c'db5a'd958},
+ {0xe766'e564'e362'e160, 0xef6e'ed6c'eb6a'e968},
+ {0xf776'f574'f372'f170, 0xff7e'fd7c'fb7a'f978},
+
+ {0x9e0c'9a09'9604'9200, 0x8e1c'8a18'8614'8211},
+ {0xbe2c'ba29'b624'b220, 0xae3c'aa38'a634'a231},
+ {0xde4c'da49'd644'd240, 0xce5c'ca58'c654'c251},
+ {0xfe6c'fa69'f664'f260, 0xee7c'ea78'e674'e271},
+ {0x1e8c'1a89'1684'1280, 0x0e9c'0a98'0694'0291},
+ {0x3eac'3aa9'36a4'32a0, 0x2ebc'2ab8'26b4'22b1},
+ {0x5ecc'5ac9'56c4'52c0, 0x4edc'4ad8'46d4'42d1},
+ {0x7eec'7ae9'76e4'72e0, 0x6efc'6af8'66f4'62f1},
+};
+
+// Easily recognizable bit pattern for target register.
+constexpr __v2du kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
+constexpr __v2du kAgnosticResult = {~uint64_t{0U}, ~uint64_t{0U}};
+
+// Mask in form suitable for storing in v0 and use in v0.t form.
+static constexpr __v2du kMask = {0xd5ad'd6b5'ad6b'b5ad, 0x6af7'57bb'deed'7bb5};
+
+using ExecInsnFunc = void (*)();
+
+void RunTwoVectorArgsOneRes(ExecInsnFunc exec_insn,
+ const __v2du* src,
+ __v2du* res,
+ uint64_t vtype,
+ uint64_t vlmax) {
+ uint64_t vstart, vl;
+ // Mask register is, unconditionally, v0, and we need 8, 16, or 24 to handle full 8-registers
+ // inputs thus we use v8..v15 for destination and place sources into v16..v23 and v24..v31.
+ asm( // Load arguments and undisturbed result.
+ "vsetvli t0, zero, e64, m8, ta, ma\n\t"
+ "vle64.v v8, (%[res])\n\t"
+ "vle64.v v16, (%[src])\n\t"
+ "addi t0, %[src], 128\n\t"
+ "vle64.v v24, (t0)\n\t"
+ // Load mask.
+ "vsetvli t0, zero, e64, m1, ta, ma\n\t"
+ "vle64.v v0, (%[mask])\n\t"
+ // Execute tested instruction.
+ "vsetvl t0, zero, %[vtype]\n\t"
+ "jalr %[exec_insn]\n\t"
+ // Save vstart and vl just after insn execution for checks.
+ "csrr %[vstart], vstart\n\t"
+ "csrr %[vl], vl\n\t"
+ // Store the result.
+ "vsetvli t0, zero, e64, m8, ta, ma\n\t"
+ "vse64.v v8, (%[res])\n\t"
+ : [vstart] "=&r"(vstart), [vl] "=&r"(vl)
+ : [exec_insn] "r"(exec_insn),
+ [src] "r"(src),
+ [res] "r"(res),
+ [vtype] "r"(vtype),
+ [mask] "r"(&kMask)
+ : "t0",
+ "ra",
+ "v0",
+ "v8",
+ "v9",
+ "v10",
+ "v11",
+ "v12",
+ "v13",
+ "v14",
+ "v15",
+ "v16",
+ "v17",
+ "v18",
+ "v19",
+ "v20",
+ "v21",
+ "v22",
+ "v23",
+ "v24",
+ "v25",
+ "v26",
+ "v27",
+ "v28",
+ "v29",
+ "v30",
+ "v31",
+ "memory");
+ // Every vector instruction must set vstart to 0, but shouldn't touch vl.
+ EXPECT_EQ(vstart, 0);
+ EXPECT_EQ(vl, vlmax);
+}
+
+template <typename... ExpectedResultType>
+void TestVectorReductionInstruction(
+ ExecInsnFunc exec_insn,
+ ExecInsnFunc exec_masked_insn,
+ const __v2du (&source)[16],
+ std::tuple<const ExpectedResultType (&)[8],
+ const ExpectedResultType (&)[8]>... expected_result) {
+ // Each expected_result input to this function is the vd[0] value of the reduction, for each
+ // of the possible vlmul, i.e. expected_result_vd0_int8[n] = vd[0], int8, no mask, vlmul=n.
+ //
+ // As vlmul=4 is reserved, expected_result_vd0_*[4] is ignored.
+ auto Verify = [&source](ExecInsnFunc exec_insn,
+ uint8_t vsew,
+ uint8_t vlmul,
+ const auto& expected_result) {
+ for (uint8_t vta = 0; vta < 2; ++vta) {
+ for (uint8_t vma = 0; vma < 2; ++vma) {
+ uint64_t vtype = (vma << 7) | (vta << 6) | (vsew << 3) | vlmul;
+ uint64_t vlmax = 0;
+ asm("vsetvl %0, zero, %1" : "=r"(vlmax) : "r"(vtype));
+ if (vlmax == 0) {
+ continue;
+ }
+
+ __v2du result[8];
+ // Set undisturbed result vector registers.
+ for (size_t index = 0; index < 8; ++index) {
+ memcpy(&result[index], &kUndisturbedResult, sizeof(result[index]));
+ }
+
+ RunTwoVectorArgsOneRes(exec_insn, &kVectorCalculationsSource[0], &result[0], vtype, vlmax);
+
+ // Reduction instructions are unique in that they produce a scalar
+ // output to a single vector register as opposed to a register group.
+ // This allows us to take some short-cuts when validating:
+ //
+ // - The mask setting is only useful during computation, as the body
+ // of the destination is always only element 0, which will always be
+ // written to, regardless of mask setting.
+ // - The tail is guaranteed to be 1..VLEN/SEW, so the vlmul setting
+ // does not affect the elements that the tail policy applies to in the
+ // destination register.
+
+ // Verify that the destination register holds the reduction in the
+ // first element and the tail policy applies to the remaining.
+ __uint128_t expected_result_register;
+ if (vta) {
+ memcpy(&expected_result_register, &kAgnosticResult, sizeof(expected_result_register));
+ } else {
+ memcpy(&expected_result_register, &kUndisturbedResult, sizeof(expected_result_register));
+ }
+ size_t vsew_bits = 8 << vsew;
+ expected_result_register = (expected_result_register >> vsew_bits) << vsew_bits;
+ expected_result_register |= expected_result;
+ EXPECT_TRUE(memcmp(&result[0], &expected_result_register, sizeof(result[0])) == 0);
+
+ // Verify all non-destination registers are undisturbed.
+ for (size_t index = 1; index < 8; ++index) {
+ EXPECT_TRUE(memcmp(&result[index], &kUndisturbedResult, sizeof(result[index])) == 0);
+ }
+ }
+ }
+ };
+
+ for (int vlmul = 0; vlmul < 8; vlmul++) {
+ ((Verify(exec_insn,
+ BitUtilLog2(sizeof(ExpectedResultType)),
+ vlmul,
+ std::get<0>(expected_result)[vlmul]),
+ Verify(exec_masked_insn,
+ BitUtilLog2(sizeof(ExpectedResultType)),
+ vlmul,
+ std::get<1>(expected_result)[vlmul])),
+ ...);
+ }
+}
+
+void TestVectorReductionInstruction(ExecInsnFunc exec_insn,
+ ExecInsnFunc exec_masked_insn,
+ const uint8_t (&expected_result_vd0_int8)[8],
+ const uint16_t (&expected_result_vd0_int16)[8],
+ const uint32_t (&expected_result_vd0_int32)[8],
+ const uint64_t (&expected_result_vd0_int64)[8],
+ const uint8_t (&expected_result_vd0_with_mask_int8)[8],
+ const uint16_t (&expected_result_vd0_with_mask_int16)[8],
+ const uint32_t (&expected_result_vd0_with_mask_int32)[8],
+ const uint64_t (&expected_result_vd0_with_mask_int64)[8],
+ const __v2du (&source)[16]) {
+ TestVectorReductionInstruction(
+ exec_insn,
+ exec_masked_insn,
+ source,
+ std::tuple<const uint8_t(&)[8], const uint8_t(&)[8]>{expected_result_vd0_int8,
+ expected_result_vd0_with_mask_int8},
+ std::tuple<const uint16_t(&)[8], const uint16_t(&)[8]>{expected_result_vd0_int16,
+ expected_result_vd0_with_mask_int16},
+ std::tuple<const uint32_t(&)[8], const uint32_t(&)[8]>{expected_result_vd0_int32,
+ expected_result_vd0_with_mask_int32},
+ std::tuple<const uint64_t(&)[8], const uint64_t(&)[8]>{expected_result_vd0_int64,
+ expected_result_vd0_with_mask_int64});
+}
+
+[[gnu::naked]] void ExecVredsum() {
+ asm("vredsum.vs v8,v16,v24\n\t"
+ "ret\n\t");
+}
+
+[[gnu::naked]] void ExecMaskedVredsum() {
+ asm("vredsum.vs v8,v16,v24,v0.t\n\t"
+ "ret\n\t");
+}
+
+TEST(InlineAsmTestRiscv64, TestVredsum) {
+ TestVectorReductionInstruction(
+ ExecVredsum,
+ ExecMaskedVredsum,
+ // expected_result_vd0_int8
+ {242, 228, 200, 144, /* unused */ 0, 146, 44, 121},
+ // expected_result_vd0_int16
+ {0x0172, 0x82e4, 0x88c8, 0xa090, /* unused */ 0, 0x1300, 0xa904, 0xe119},
+ // expected_result_vd0_int32
+ {0xcb44'b932,
+ 0x9407'71e4,
+ 0xa70e'64c8,
+ 0xd312'5090,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0xb713'ad09},
+ // expected_result_vd0_int64
+ {0xb32f'a926'9f1b'9511,
+ 0x1f99'0d88'fb74'e962,
+ 0xb92c'970e'74e8'52c4,
+ 0xef4e'ad14'6aca'2888,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
+ // expected_result_vd0_with_mask_int8
+ {39, 248, 142, 27, /* unused */ 0, 0, 154, 210},
+ // expected_result_vd0_with_mask_int16
+ {0x5f45, 0xc22f, 0x99d0, 0x98bf, /* unused */ 0, 0x1300, 0x1300, 0x4b15},
+ // expected_result_vd0_with_mask_int32
+ {0x2d38'1f29,
+ 0x99a1'838a,
+ 0x1989'ef5c,
+ 0x9cf4'4aa1,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x1907'1300,
+ 0x1907'1300},
+ // expected_result_vd0_with_mask_int64
+ {0x2513'1f0e'1907'1300,
+ 0x917c'8370'7560'6751,
+ 0x4e56'3842'222a'0c13,
+ 0xc833'9e0e'73df'49b5,
+ /* unused */ 0,
+ /* unused */ 0,
+ /* unused */ 0,
+ 0x2513'1f0e'1907'1300},
+ kVectorCalculationsSource);
+}
+
+} // namespace
diff --git a/tests/run_host_tests.mk b/tests/run_host_tests.mk
index 29906c6d..ab6bca76 100644
--- a/tests/run_host_tests.mk
+++ b/tests/run_host_tests.mk
@@ -115,10 +115,29 @@ endef
ifeq ($(BUILD_BERBERIS_RISCV64_TO_X86_64),true)
-$(eval $(call add_test,berberis_ndk_program_tests,\
+# berberis_ndk_program_tests
+
+$(eval $(call add_test,berberis_ndk_program_tests_interpret_only,\
+ run_test_x86_64_riscv64,\
+ $(TARGET_OUT_TESTCASES)/berberis_ndk_program_tests_static.native_bridge/x86_64/berberis_ndk_program_tests_static,\
+ BERBERIS_MODE=interpret-only))
+
+$(eval $(call add_test,berberis_ndk_program_tests_lite_translate_or_interpret,\
run_test_x86_64_riscv64,\
$(TARGET_OUT_TESTCASES)/berberis_ndk_program_tests_static.native_bridge/x86_64/berberis_ndk_program_tests_static,\
- ))
+ BERBERIS_MODE=lite-translate-or-interpret))
+
+$(eval $(call add_test,berberis_ndk_program_tests_heavy_optimize_or_interpret,\
+ run_test_x86_64_riscv64,\
+ $(TARGET_OUT_TESTCASES)/berberis_ndk_program_tests_static.native_bridge/x86_64/berberis_ndk_program_tests_static,\
+ BERBERIS_MODE=heavy-optimize-or-interpret))
+
+$(eval $(call add_test,berberis_ndk_program_tests_two_gear,\
+ run_test_x86_64_riscv64,\
+ $(TARGET_OUT_TESTCASES)/berberis_ndk_program_tests_static.native_bridge/x86_64/berberis_ndk_program_tests_static,\
+ BERBERIS_MODE=two-gear))
+
+# berberis_host_tests
$(eval $(call add_test,berberis_host_tests,\
run_test,\