aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-15 00:37:31 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-15 00:37:31 +0000
commitf940c1f5262a9c153421c7e34384a6dd3f686d95 (patch)
treea91a3b3bcfd79f16aaad77c86a21d799c005d1d0
parent2e3405345ffa06e045abd38aede28b36ad572647 (diff)
parent87ada30fbde026edeb4a9300daad285f188e6b69 (diff)
downloadbinary_translation-android14-qpr2-release.tar.gz
Change-Id: Ida0c9e19cfea25dcca6d4e4b1db91ba8740b0926
-rw-r--r--base/include/berberis/base/bit_util.h5
-rw-r--r--decoder/include/berberis/decoder/riscv64/decoder.h343
-rw-r--r--interpreter/riscv64/interpreter.cc62
-rw-r--r--interpreter/riscv64/interpreter_test.cc208
-rw-r--r--intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h16
5 files changed, 456 insertions, 178 deletions
diff --git a/base/include/berberis/base/bit_util.h b/base/include/berberis/base/bit_util.h
index e3610dde..4fb08470 100644
--- a/base/include/berberis/base/bit_util.h
+++ b/base/include/berberis/base/bit_util.h
@@ -70,8 +70,9 @@ constexpr bool IsAligned(T* p, size_t align) {
template <typename T>
constexpr T BitUtilLog2(T x) {
static_assert(std::is_integral_v<T>, "Log2: T must be integral");
- DCHECK(IsPowerOf2(x));
- return x == 1 ? 0 : BitUtilLog2(x >> 1) + 1;
+ CHECK(IsPowerOf2(x));
+ // TODO(b/260725458): Use std::countr_zero after C++20 becomes available
+ return __builtin_ctz(x);
}
// Verify that argument value fits into a target.
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 7ad556c1..159d591c 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -341,7 +341,10 @@ class Decoder {
};
enum class VOpMVvOpcode : uint8_t {
+ kVmaddvv = 0b101001,
+ kVnmsubvv = 0b101011,
kVmaccvv = 0b101101,
+ kVnmsacvv = 0b101111,
kMaxValue = 0b111111
};
@@ -390,7 +393,10 @@ class Decoder {
};
enum class VOpMVxOpcode : uint8_t {
+ kVmaddvx = 0b101001,
+ kVnmsubvx = 0b101011,
kVmaccvx = 0b101101,
+ kVnmsacvx = 0b101111,
kMaxValue = 0b111111
};
@@ -760,7 +766,7 @@ class Decoder {
}
uint8_t DecodeCompressedInstruction() {
- CompressedOpcode opcode_bits{(GetBits<uint8_t, 13, 3>() << 2) | GetBits<uint8_t, 0, 2>()};
+ CompressedOpcode opcode_bits{(GetBits<13, 3>() << 2) | GetBits<0, 2>()};
switch (opcode_bits) {
case CompressedOpcode::kAddi4spn:
@@ -837,9 +843,9 @@ class Decoder {
}
void DecodeCompressedLi() {
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
- uint8_t rd = GetBits<uint8_t, 7, 5>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
+ uint8_t rd = GetBits<7, 5>();
int8_t imm = SignExtend<6>((high_imm << 5) + low_imm);
const OpImmArgs args = {
.opcode = OpImmOpcode::kAddi,
@@ -851,11 +857,11 @@ class Decoder {
}
void DecodeCompressedMiscAlu() {
- uint8_t r = GetBits<uint8_t, 7, 3>() + 8;
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+ uint8_t r = GetBits<7, 3>() + 8;
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
uint8_t imm = (high_imm << 5) + low_imm;
- switch (GetBits<uint8_t, 10, 2>()) {
+ switch (GetBits<10, 2>()) {
case 0b00: {
const ShiftImmArgs args = {
.opcode = ShiftImmOpcode::kSrli,
@@ -884,10 +890,10 @@ class Decoder {
return insn_consumer_->OpImm(args);
}
}
- uint8_t rs2 = GetBits<uint8_t, 2, 3>() + 8;
- if (GetBits<uint8_t, 12, 1>() == 0) {
+ uint8_t rs2 = GetBits<2, 3>() + 8;
+ if (GetBits<12, 1>() == 0) {
OpOpcode opcode;
- switch (GetBits<uint8_t, 5, 2>()) {
+ switch (GetBits<5, 2>()) {
case 0b00:
opcode = OpOpcode::kSub;
break;
@@ -910,7 +916,7 @@ class Decoder {
return insn_consumer_->Op(args);
} else {
Op32Opcode opcode;
- switch (GetBits<uint8_t, 5, 2>()) {
+ switch (GetBits<5, 2>()) {
case 0b00:
opcode = Op32Opcode::kSubw;
break;
@@ -932,8 +938,8 @@ class Decoder {
template <auto kOperandType>
void DecodeCompressedStoresp() {
- uint8_t raw_imm = GetBits<uint8_t, 7, 6>();
- uint8_t rs2 = GetBits<uint8_t, 2, 5>();
+ uint8_t raw_imm = GetBits<7, 6>();
+ uint8_t rs2 = GetBits<2, 5>();
constexpr uint8_t k32bit[64] = {
0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03,
0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35, 0x06, 0x16,
@@ -957,9 +963,9 @@ class Decoder {
}
void DecodeCompressedLuiAddi16sp() {
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
- uint8_t rd = GetBits<uint8_t, 7, 5>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
+ uint8_t rd = GetBits<7, 5>();
if (rd != 2) {
int32_t imm = SignExtend<18>((high_imm << 17) + (low_imm << 12));
const UpperImmArgs args = {
@@ -986,8 +992,8 @@ class Decoder {
template <enum LoadStore kLoadStore, auto kOperandType>
void DecodeCompressedLoadStore() {
- uint8_t low_imm = GetBits<uint8_t, 5, 2>();
- uint8_t high_imm = GetBits<uint8_t, 10, 3>();
+ uint8_t low_imm = GetBits<5, 2>();
+ uint8_t high_imm = GetBits<10, 3>();
uint8_t imm;
if constexpr ((uint8_t(kOperandType) & 1) == 0) {
constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
@@ -995,8 +1001,8 @@ class Decoder {
} else {
imm = (low_imm << 6 | high_imm << 3);
}
- uint8_t rd = GetBits<uint8_t, 2, 3>();
- uint8_t rs = GetBits<uint8_t, 7, 3>();
+ uint8_t rd = GetBits<2, 3>();
+ uint8_t rs = GetBits<7, 3>();
if constexpr (kLoadStore == LoadStore::kStore) {
const StoreArgsTemplate<decltype(kOperandType)> args = {
.operand_type = kOperandType,
@@ -1018,9 +1024,9 @@ class Decoder {
template <auto kOperandType>
void DecodeCompressedLoadsp() {
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
- uint8_t rd = GetBits<uint8_t, 7, 5>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
+ uint8_t rd = GetBits<7, 5>();
constexpr uint8_t k32bitLow[32] = {0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31,
0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33,
0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35,
@@ -1041,10 +1047,10 @@ class Decoder {
}
void DecodeCompressedAddi() {
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
int8_t imm = SignExtend<6>(high_imm << 5 | low_imm);
- uint8_t r = GetBits<uint8_t, 7, 5>();
+ uint8_t r = GetBits<7, 5>();
if (r == 0 || imm == 0) {
insn_consumer_->Nop();
}
@@ -1058,10 +1064,10 @@ class Decoder {
}
void DecodeCompressedAddiw() {
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
int8_t imm = SignExtend<6>(high_imm << 5 | low_imm);
- uint8_t r = GetBits<uint8_t, 7, 5>();
+ uint8_t r = GetBits<7, 5>();
const OpImm32Args args = {
.opcode = OpImm32Opcode::kAddiw,
.dst = r,
@@ -1076,12 +1082,12 @@ class Decoder {
constexpr uint8_t kBLow[32] = {0x00, 0x20, 0x02, 0x22, 0x04, 0x24, 0x06, 0x26, 0x40, 0x60, 0x42,
0x62, 0x44, 0x64, 0x46, 0x66, 0x80, 0xa0, 0x82, 0xa2, 0x84, 0xa4,
0x86, 0xa6, 0xc0, 0xe0, 0xc2, 0xe2, 0xc4, 0xe4, 0xc6, 0xe6};
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 10, 3>();
- uint8_t rs = GetBits<uint8_t, 7, 3>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<10, 3>();
+ uint8_t rs = GetBits<7, 3>();
const BranchArgs args = {
- .opcode = BranchOpcode(GetBits<uint8_t, 13, 1>()),
+ .opcode = BranchOpcode(GetBits<13, 1>()),
.src1 = uint8_t(8 + rs),
.src2 = 0,
.offset = static_cast<int16_t>(SignExtend<9>(kBHigh[high_imm] + kBLow[low_imm])),
@@ -1104,8 +1110,7 @@ class Decoder {
};
const JumpAndLinkArgs args = {
.dst = 0,
- .offset =
- bit_cast<int16_t>(kJHigh[GetBits<uint16_t, 8, 5>()]) | kJLow[GetBits<uint16_t, 2, 6>()],
+ .offset = bit_cast<int16_t>(kJHigh[GetBits<8, 5>()]) | kJLow[GetBits<2, 6>()],
.insn_len = 2,
};
insn_consumer_->JumpAndLink(args);
@@ -1116,8 +1121,7 @@ class Decoder {
0x0, 0x40, 0x80, 0xc0, 0x4, 0x44, 0x84, 0xc4, 0x8, 0x48, 0x88, 0xc8, 0xc, 0x4c, 0x8c, 0xcc};
constexpr uint8_t kAddi4spnLow[16] = {
0x0, 0x2, 0x1, 0x3, 0x10, 0x12, 0x11, 0x13, 0x20, 0x22, 0x21, 0x23, 0x30, 0x32, 0x31, 0x33};
- int16_t imm = (kAddi4spnHigh[GetBits<uint8_t, 9, 4>()] | kAddi4spnLow[GetBits<uint8_t, 5, 4>()])
- << 2;
+ int16_t imm = (kAddi4spnHigh[GetBits<9, 4>()] | kAddi4spnLow[GetBits<5, 4>()]) << 2;
// If immediate is zero then this instruction is treated as unimplemented.
// This includes RISC-V dedicated 16bit “unimplemented instruction” 0x0000.
if (imm == 0) {
@@ -1125,7 +1129,7 @@ class Decoder {
}
const OpImmArgs args = {
.opcode = OpImmOpcode::kAddi,
- .dst = uint8_t(8 + GetBits<uint8_t, 2, 3>()),
+ .dst = uint8_t(8 + GetBits<2, 3>()),
.src = 2,
.imm = imm,
};
@@ -1133,9 +1137,9 @@ class Decoder {
}
void DecodeCompressedJr_Jalr_Mv_Add() {
- uint8_t r = GetBits<uint8_t, 7, 5>();
- uint8_t rs2 = GetBits<uint8_t, 2, 5>();
- if (GetBits<uint8_t, 12, 1>()) {
+ uint8_t r = GetBits<7, 5>();
+ uint8_t rs2 = GetBits<2, 5>();
+ if (GetBits<12, 1>()) {
if (r == 0 && rs2 == 0) {
const SystemArgs args = {
.opcode = SystemOpcode::kEbreak,
@@ -1180,9 +1184,9 @@ class Decoder {
}
void DecodeCompressedSlli() {
- uint8_t r = GetBits<uint8_t, 7, 5>();
- uint8_t low_imm = GetBits<uint8_t, 2, 5>();
- uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+ uint8_t r = GetBits<7, 5>();
+ uint8_t low_imm = GetBits<2, 5>();
+ uint8_t high_imm = GetBits<12, 1>();
uint8_t imm = (high_imm << 5) + low_imm;
const ShiftImmArgs args = {
.opcode = ShiftImmOpcode::kSlli,
@@ -1194,7 +1198,7 @@ class Decoder {
}
uint8_t DecodeBaseInstruction() {
- BaseOpcode opcode_bits{GetBits<uint8_t, 2, 5>()};
+ BaseOpcode opcode_bits{GetBits<2, 5>()};
switch (opcode_bits) {
case BaseOpcode::kLoad:
@@ -1264,11 +1268,13 @@ class Decoder {
}
private:
- template <typename ResultType, uint32_t start, uint32_t size>
- ResultType GetBits() {
- static_assert(std::is_unsigned_v<ResultType>, "Only unsigned types are supported");
- static_assert(sizeof(ResultType) * CHAR_BIT >= size, "Too small ResultType for size");
+ template <uint32_t start, uint32_t size>
+ auto GetBits() {
static_assert((start + size) <= 32 && size > 0, "Invalid start or size value");
+ using ResultType = std::conditional_t<
+ size == 1,
+ bool,
+ std::conditional_t<size <= 8, uint8_t, std::conditional_t<size <= 16, uint16_t, uint32_t>>>;
uint32_t shifted_val = code_ << (32 - start - size);
return static_cast<ResultType>(shifted_val >> (32 - size));
}
@@ -1294,32 +1300,32 @@ class Decoder {
}
void DecodeMiscMem() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
+ uint8_t low_opcode = GetBits<12, 3>();
switch (low_opcode) {
case 0b000: {
- uint8_t high_opcode = GetBits<uint8_t, 28, 4>();
+ uint8_t high_opcode = GetBits<28, 4>();
FenceOpcode opcode = FenceOpcode{high_opcode};
const FenceArgs args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
- .sw = bool(GetBits<uint8_t, 20, 1>()),
- .sr = bool(GetBits<uint8_t, 21, 1>()),
- .so = bool(GetBits<uint8_t, 22, 1>()),
- .si = bool(GetBits<uint8_t, 23, 1>()),
- .pw = bool(GetBits<uint8_t, 24, 1>()),
- .pr = bool(GetBits<uint8_t, 25, 1>()),
- .po = bool(GetBits<uint8_t, 26, 1>()),
- .pi = bool(GetBits<uint8_t, 27, 1>()),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
+ .sw = GetBits<20, 1>(),
+ .sr = GetBits<21, 1>(),
+ .so = GetBits<22, 1>(),
+ .si = GetBits<23, 1>(),
+ .pw = GetBits<24, 1>(),
+ .pr = GetBits<25, 1>(),
+ .po = GetBits<26, 1>(),
+ .pi = GetBits<27, 1>(),
};
insn_consumer_->Fence(args);
break;
}
case 0b001: {
- uint16_t imm = GetBits<uint16_t, 20, 12>();
+ uint16_t imm = GetBits<20, 12>();
const FenceIArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
.imm = SignExtend<12>(imm),
};
insn_consumer_->FenceI(args);
@@ -1332,8 +1338,8 @@ class Decoder {
template <typename OpcodeType>
void DecodeOp() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
- uint8_t high_opcode = GetBits<uint8_t, 25, 7>();
+ uint8_t low_opcode = GetBits<12, 3>();
+ uint8_t high_opcode = GetBits<25, 7>();
uint16_t opcode_bits = static_cast<int16_t>(low_opcode | (high_opcode << 3));
OpcodeType opcode{opcode_bits};
OpSingleInputOpcode single_input_opcode{opcode_bits};
@@ -1348,29 +1354,29 @@ class Decoder {
}
const OpArgsTemplate<OpcodeType> args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src1 = GetBits<uint8_t, 15, 5>(),
- .src2 = GetBits<uint8_t, 20, 5>(),
+ .dst = GetBits<7, 5>(),
+ .src1 = GetBits<15, 5>(),
+ .src2 = GetBits<20, 5>(),
};
insn_consumer_->Op(args);
}
void DecodeSingleInputOp(OpSingleInputOpcode opcode) {
- uint8_t src1 = GetBits<uint8_t, 15, 5>();
- uint8_t src2 = GetBits<uint8_t, 20, 5>();
+ uint8_t src1 = GetBits<15, 5>();
+ uint8_t src2 = GetBits<20, 5>();
if (src2 != 0) {
return Undefined();
}
- const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<uint8_t, 7, 5>(), .src = src1};
+ const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<7, 5>(), .src = src1};
insn_consumer_->OpSingleInput(args);
}
void DecodeAmo() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
- uint8_t high_opcode = GetBits<uint8_t, 27, 5>();
+ uint8_t low_opcode = GetBits<12, 3>();
+ uint8_t high_opcode = GetBits<27, 5>();
// lr instruction must have rs2 == 0
- if (high_opcode == 0b00010 && GetBits<uint8_t, 20, 5>() != 0) {
+ if (high_opcode == 0b00010 && GetBits<20, 5>() != 0) {
return Undefined();
}
AmoOpcode opcode = AmoOpcode{high_opcode};
@@ -1378,43 +1384,43 @@ class Decoder {
const AmoArgs args = {
.opcode = opcode,
.operand_type = operand_type,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src1 = GetBits<uint8_t, 15, 5>(),
- .src2 = GetBits<uint8_t, 20, 5>(),
- .rl = bool(GetBits<uint8_t, 25, 1>()),
- .aq = bool(GetBits<uint8_t, 26, 1>()),
+ .dst = GetBits<7, 5>(),
+ .src1 = GetBits<15, 5>(),
+ .src2 = GetBits<20, 5>(),
+ .rl = GetBits<25, 1>(),
+ .aq = GetBits<26, 1>(),
};
insn_consumer_->Amo(args);
}
void DecodeFma() {
- uint8_t operand_type = GetBits<uint8_t, 25, 2>();
- uint8_t opcode_bits = GetBits<uint8_t, 2, 2>();
+ uint8_t operand_type = GetBits<25, 2>();
+ uint8_t opcode_bits = GetBits<2, 2>();
const FmaArgs args = {
.opcode = FmaOpcode(opcode_bits),
.operand_type = FloatOperandType(operand_type),
- .dst = GetBits<uint8_t, 7, 5>(),
- .src1 = GetBits<uint8_t, 15, 5>(),
- .src2 = GetBits<uint8_t, 20, 5>(),
- .src3 = GetBits<uint8_t, 27, 5>(),
- .rm = GetBits<uint8_t, 12, 3>(),
+ .dst = GetBits<7, 5>(),
+ .src1 = GetBits<15, 5>(),
+ .src2 = GetBits<20, 5>(),
+ .src3 = GetBits<27, 5>(),
+ .rm = GetBits<12, 3>(),
};
insn_consumer_->Fma(args);
}
void DecodeLui() {
- int32_t imm = GetBits<uint32_t, 12, 20>();
+ int32_t imm = GetBits<12, 20>();
const UpperImmArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
+ .dst = GetBits<7, 5>(),
.imm = imm << 12,
};
insn_consumer_->Lui(args);
}
void DecodeAuipc() {
- int32_t imm = GetBits<uint32_t, 12, 20>();
+ int32_t imm = GetBits<12, 20>();
const UpperImmArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
+ .dst = GetBits<7, 5>(),
.imm = imm << 12,
};
insn_consumer_->Auipc(args);
@@ -1424,19 +1430,19 @@ class Decoder {
void DecodeLoad() {
OperandTypeEnum operand_type;
if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) {
- auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()];
+ auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()];
if (!decoded_operand_type.has_value()) {
return Undefined();
}
operand_type = *decoded_operand_type;
} else {
- operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()};
+ operand_type = OperandTypeEnum{GetBits<12, 3>()};
}
const LoadArgsTemplate<OperandTypeEnum> args = {
.operand_type = operand_type,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
- .offset = SignExtend<12>(GetBits<uint16_t, 20, 12>()),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
+ .offset = SignExtend<12>(GetBits<20, 12>()),
};
insn_consumer_->Load(args);
}
@@ -1445,23 +1451,23 @@ class Decoder {
void DecodeStore() {
OperandTypeEnum operand_type;
if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) {
- auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()];
+ auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()];
if (!decoded_operand_type.has_value()) {
return Undefined();
}
operand_type = *decoded_operand_type;
} else {
- operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()};
+ operand_type = OperandTypeEnum{GetBits<12, 3>()};
}
- uint16_t low_imm = GetBits<uint16_t, 7, 5>();
- uint16_t high_imm = GetBits<uint16_t, 25, 7>();
+ uint16_t low_imm = GetBits<7, 5>();
+ uint16_t high_imm = GetBits<25, 7>();
const StoreArgsTemplate<OperandTypeEnum> args = {
.operand_type = operand_type,
- .src = GetBits<uint8_t, 15, 5>(),
+ .src = GetBits<15, 5>(),
.offset = SignExtend<12>(static_cast<int16_t>(low_imm | (high_imm << 5))),
- .data = GetBits<uint8_t, 20, 5>(),
+ .data = GetBits<20, 5>(),
};
insn_consumer_->Store(args);
}
@@ -1471,37 +1477,36 @@ class Decoder {
typename BitmanipOpcodeType,
uint32_t kShiftFieldSize>
void DecodeOp() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
+ uint8_t low_opcode = GetBits<12, 3>();
if (low_opcode != 0b001 && low_opcode != 0b101) {
OpOpcodeType opcode{low_opcode};
- uint16_t imm = GetBits<uint16_t, 20, 12>();
+ uint16_t imm = GetBits<20, 12>();
const OpImmArgsTemplate<OpOpcodeType> args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
.imm = SignExtend<12>(imm),
};
insn_consumer_->OpImm(args);
- } else if ((GetBits<uint16_t, 31, 1>() +
- GetBits<uint16_t, 20 + kShiftFieldSize, 10 - kShiftFieldSize>()) ==
+ } else if ((GetBits<31, 1>() + GetBits<20 + kShiftFieldSize, 10 - kShiftFieldSize>()) ==
0) { // For Canonical Shift Instructions from RV64G the opcode contains all
// zeros except for the 30th (second highest) bit.
- uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>();
+ uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>();
ShiftOpcodeType opcode{
static_cast<std::underlying_type_t<ShiftOpcodeType>>(low_opcode | (high_opcode << 3))};
const ShiftImmArgsTemplate<ShiftOpcodeType> args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
- .imm = GetBits<uint8_t, 20, kShiftFieldSize>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
+ .imm = GetBits<20, kShiftFieldSize>(),
};
insn_consumer_->OpImm(args);
} else {
- uint8_t shamt = GetBits<uint8_t, 20, kShiftFieldSize>();
- uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>();
+ uint8_t shamt = GetBits<20, kShiftFieldSize>();
+ uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>();
BitmanipOpcodeType opcode{static_cast<uint16_t>(low_opcode | (high_opcode << 3))};
bool has_shamt = false;
@@ -1527,14 +1532,14 @@ class Decoder {
}
// TODO(b/291851792): Refactor instructions with shamt into ShiftImmArgs
if (!has_shamt) {
- high_opcode = GetBits<uint16_t, 20, 12>();
+ high_opcode = GetBits<20, 12>();
opcode = BitmanipOpcodeType{static_cast<uint16_t>(low_opcode | (high_opcode << 3))};
shamt = 0;
}
const BitmanipImmArgsTemplate<BitmanipOpcodeType> args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
.shamt = shamt,
};
insn_consumer_->OpImm(args);
@@ -1542,20 +1547,20 @@ class Decoder {
}
void DecodeBranch() {
- BranchOpcode opcode{GetBits<uint8_t, 12, 3>()};
+ BranchOpcode opcode{GetBits<12, 3>()};
// Decode the offset.
- auto low_imm = GetBits<uint16_t, 8, 4>();
- auto mid_imm = GetBits<uint16_t, 25, 6>();
- auto bit11_imm = GetBits<uint16_t, 7, 1>();
- auto bit12_imm = GetBits<uint16_t, 31, 1>();
+ auto low_imm = GetBits<8, 4>();
+ auto mid_imm = GetBits<25, 6>();
+ auto bit11_imm = GetBits<7, 1>();
+ auto bit12_imm = GetBits<31, 1>();
auto offset =
static_cast<int16_t>(low_imm | (mid_imm << 4) | (bit11_imm << 10) | (bit12_imm << 11));
const BranchArgs args = {
.opcode = opcode,
- .src1 = GetBits<uint8_t, 15, 5>(),
- .src2 = GetBits<uint8_t, 20, 5>(),
+ .src1 = GetBits<15, 5>(),
+ .src2 = GetBits<20, 5>(),
// The offset is encoded as 2-byte units, we need to multiply by 2.
.offset = SignExtend<13>(static_cast<int16_t>(offset * 2)),
};
@@ -1564,15 +1569,15 @@ class Decoder {
void DecodeJumpAndLink() {
// Decode the offset.
- auto low_imm = GetBits<uint32_t, 21, 10>();
- auto mid_imm = GetBits<uint32_t, 12, 8>();
- auto bit11_imm = GetBits<uint32_t, 20, 1>();
- auto bit20_imm = GetBits<uint32_t, 31, 1>();
+ auto low_imm = GetBits<21, 10>();
+ auto mid_imm = GetBits<12, 8>();
+ auto bit11_imm = GetBits<20, 1>();
+ auto bit20_imm = GetBits<31, 1>();
auto offset =
static_cast<int32_t>(low_imm | (bit11_imm << 10) | (mid_imm << 11) | (bit20_imm << 19));
const JumpAndLinkArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
+ .dst = GetBits<7, 5>(),
// The offset is encoded as 2-byte units, we need to multiply by 2.
.offset = SignExtend<21>(offset * 2),
.insn_len = 4,
@@ -1584,13 +1589,13 @@ class Decoder {
// Bit #29 = 1: means rm is an opcode extension and not operand.
// Bit #30 = 1: means rs2 is an opcode extension and not operand.
// Bit #31 = 1: selects general purpose register instead of floating point register as target.
- uint8_t operand_type = GetBits<uint8_t, 25, 2>();
- uint8_t opcode_bits = GetBits<uint8_t, 27, 2>();
- uint8_t rd = GetBits<uint8_t, 7, 5>();
- uint8_t rs1 = GetBits<uint8_t, 15, 5>();
- uint8_t rs2 = GetBits<uint8_t, 20, 5>();
- uint8_t rm = GetBits<uint8_t, 12, 3>();
- switch (GetBits<uint8_t, 29, 3>()) {
+ uint8_t operand_type = GetBits<25, 2>();
+ uint8_t opcode_bits = GetBits<27, 2>();
+ uint8_t rd = GetBits<7, 5>();
+ uint8_t rs1 = GetBits<15, 5>();
+ uint8_t rs2 = GetBits<20, 5>();
+ uint8_t rm = GetBits<12, 3>();
+ switch (GetBits<29, 3>()) {
case 0b000: {
const OpFpArgs args = {
.opcode = OpFpOpcode(opcode_bits),
@@ -1728,14 +1733,14 @@ class Decoder {
}
void DecodeOpV() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
- bool vm = GetBits<uint8_t, 25, 1>();
- uint8_t opcode = GetBits<uint8_t, 26, 6>();
- uint8_t dst = GetBits<uint8_t, 7, 5>();
- // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 2rd operand.
+ uint8_t low_opcode = GetBits<12, 3>();
+ bool vm = GetBits<25, 1>();
+ uint8_t opcode = GetBits<26, 6>();
+ uint8_t dst = GetBits<7, 5>();
+ // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 3rd operand.
// FMA instructions are exception, but there are not that many of these.
- uint8_t src1 = GetBits<uint8_t, 20, 5>();
- uint8_t src2 = GetBits<uint8_t, 15, 5>();
+ uint8_t src1 = GetBits<20, 5>();
+ uint8_t src2 = GetBits<15, 5>();
switch (low_opcode) {
case 0b000: {
const VOpIVvArgs args = {
@@ -1788,25 +1793,25 @@ class Decoder {
return insn_consumer_->OpVector(args);
}
case 0b111:
- if (GetBits<uint8_t, 31, 1>() == 0) {
+ if (GetBits<31, 1>() == 0) {
const VsetvliArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
- .vtype = GetBits<uint16_t, 20, 11>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
+ .vtype = GetBits<20, 11>(),
};
return insn_consumer_->Vsetvli(args);
- } else if (GetBits<uint8_t, 30, 1>() == 1) {
+ } else if (GetBits<30, 1>() == 1) {
const VsetivliArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
- .avl = GetBits<uint8_t, 15, 5>(),
- .vtype = GetBits<uint16_t, 20, 10>(),
+ .dst = GetBits<7, 5>(),
+ .avl = GetBits<15, 5>(),
+ .vtype = GetBits<20, 10>(),
};
return insn_consumer_->Vsetivli(args);
- } else if (GetBits<uint8_t, 25, 6>() == 0) {
+ } else if (GetBits<25, 6>() == 0) {
const VsetvlArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
- .src1 = GetBits<uint8_t, 15, 5>(),
- .src2 = GetBits<uint8_t, 20, 5>(),
+ .dst = GetBits<7, 5>(),
+ .src1 = GetBits<15, 5>(),
+ .src2 = GetBits<20, 5>(),
};
return insn_consumer_->Vsetvl(args);
}
@@ -1814,46 +1819,46 @@ class Decoder {
}
void DecodeSystem() {
- uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
+ uint8_t low_opcode = GetBits<12, 2>();
if (low_opcode == 0b00) {
- int32_t opcode = GetBits<uint32_t, 7, 25>();
+ int32_t opcode = GetBits<7, 25>();
const SystemArgs args = {
.opcode = SystemOpcode(opcode),
};
return insn_consumer_->System(args);
}
- if (GetBits<uint8_t, 14, 1>()) {
+ if (GetBits<14, 1>()) {
CsrImmOpcode opcode = CsrImmOpcode(low_opcode);
const CsrImmArgs args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .imm = GetBits<uint8_t, 15, 5>(),
- .csr = GetBits<uint16_t, 20, 12>(),
+ .dst = GetBits<7, 5>(),
+ .imm = GetBits<15, 5>(),
+ .csr = GetBits<20, 12>(),
};
return insn_consumer_->Csr(args);
}
CsrOpcode opcode = CsrOpcode(low_opcode);
const CsrArgs args = {
.opcode = opcode,
- .dst = GetBits<uint8_t, 7, 5>(),
- .src = GetBits<uint8_t, 15, 5>(),
- .csr = GetBits<uint16_t, 20, 12>(),
+ .dst = GetBits<7, 5>(),
+ .src = GetBits<15, 5>(),
+ .csr = GetBits<20, 12>(),
};
return insn_consumer_->Csr(args);
}
void DecodeJumpAndLinkRegister() {
- if (GetBits<uint8_t, 12, 3>() != 0b000) {
+ if (GetBits<12, 3>() != 0b000) {
Undefined();
return;
}
// Decode sign-extend offset.
- int16_t offset = GetBits<uint16_t, 20, 12>();
+ int16_t offset = GetBits<20, 12>();
offset = static_cast<int16_t>(offset << 4) >> 4;
const JumpAndLinkRegisterArgs args = {
- .dst = GetBits<uint8_t, 7, 5>(),
- .base = GetBits<uint8_t, 15, 5>(),
+ .dst = GetBits<7, 5>(),
+ .base = GetBits<15, 5>(),
.offset = offset,
.insn_len = 4,
};
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index b48d9349..18b94dc3 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -654,9 +654,18 @@ class Interpreter {
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
void OpVector(const Decoder::VOpMVvArgs& args) {
switch (args.opcode) {
+ case Decoder::VOpMVvOpcode::kVmaddvv:
+ return OpVectorvv<intrinsics::Vmaddvv<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVnmsubvv:
+ return OpVectorvv<intrinsics::Vnmsubvv<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, args.src2);
case Decoder::VOpMVvOpcode::kVmaccvv:
return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>(
args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVnmsacvv:
+ return OpVectorvv<intrinsics::Vnmsacvv<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, args.src2);
default:
Unimplemented();
}
@@ -730,9 +739,18 @@ class Interpreter {
template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
switch (args.opcode) {
+ case Decoder::VOpMVxOpcode::kVmaddvx:
+ return OpVectorvx<intrinsics::Vmaddvx<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, arg2);
+ case Decoder::VOpMVxOpcode::kVnmsubvx:
+ return OpVectorvx<intrinsics::Vnmsubvx<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, arg2);
case Decoder::VOpMVxOpcode::kVmaccvx:
return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>(
args.dst, args.src1, arg2);
+ case Decoder::VOpMVxOpcode::kVnmsacvx:
+ return OpVectorvx<intrinsics::Vnmsacvx<ElementType, vta>, ElementType, vlmul, vta>(
+ args.dst, args.src1, arg2);
default:
Unimplemented();
}
@@ -926,12 +944,30 @@ class Interpreter {
InactiveProcessing vma>
void OpVector(const Decoder::VOpMVvArgs& args) {
switch (args.opcode) {
+ case Decoder::VOpMVvOpcode::kVmaddvv:
+ return OpVectorvv<intrinsics::Vmaddvvm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVnmsubvv:
+ return OpVectorvv<intrinsics::Vnmsubvvm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, args.src2);
case Decoder::VOpMVvOpcode::kVmaccvv:
return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>,
- ElementType,
- vlmul,
- vta,
- vma>(args.dst, args.src1, args.src2);
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, args.src2);
+ case Decoder::VOpMVvOpcode::kVnmsacvv:
+ return OpVectorvv<intrinsics::Vnmsacvvm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, args.src2);
default:
Unimplemented();
}
@@ -1023,12 +1059,30 @@ class Interpreter {
InactiveProcessing vma>
void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
switch (args.opcode) {
+ case Decoder::VOpMVxOpcode::kVmaddvx:
+ return OpVectorvx<intrinsics::Vmaddvxm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, arg2);
+ case Decoder::VOpMVxOpcode::kVnmsubvx:
+ return OpVectorvx<intrinsics::Vnmsubvxm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, arg2);
case Decoder::VOpMVxOpcode::kVmaccvx:
return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>,
ElementType,
vlmul,
vta,
vma>(args.dst, args.src1, arg2);
+ case Decoder::VOpMVxOpcode::kVnmsacvx:
+ return OpVectorvx<intrinsics::Vnmsacvxm<ElementType, vta, vma>,
+ ElementType,
+ vlmul,
+ vta,
+ vma>(args.dst, args.src1, arg2);
default:
Unimplemented();
}
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 724c4892..f4fe0a56 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -1853,6 +1853,212 @@ TEST_F(Riscv64InterpreterTest, TestVmacc) {
{0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465},
{0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}});
}
-} // namespace
+TEST_F(Riscv64InterpreterTest, TestVnmsac) {
+ TestVectorInstruction(0xbd882457, // vnmsac.vv v8, v16, v24, v0.t
+ {{85, 83, 77, 67, 49, 35, 13, 243, 205, 179, 141, 99, 53, 3, 205, 147},
+ {85, 19, 205, 131, 33, 227, 141, 51, 189, 115, 13, 163, 53, 195, 77, 211},
+ {85, 211, 77, 195, 17, 163, 13, 115, 173, 51, 141, 227, 53, 131, 205, 19},
+ {85, 147, 205, 3, 1, 99, 141, 179, 157, 243, 13, 35, 53, 67, 77, 83},
+ {85, 83, 77, 67, 241, 35, 13, 243, 141, 179, 141, 99, 53, 3, 205, 147},
+ {85, 19, 205, 131, 225, 227, 141, 51, 125, 115, 13, 163, 53, 195, 77, 211},
+ {85, 211, 77, 195, 209, 163, 13, 115, 109, 51, 141, 227, 53, 131, 205, 19},
+ {85, 147, 205, 3, 193, 99, 141, 179, 93, 243, 13, 35, 53, 67, 77, 83}},
+ {{0x5555, 0x3d4d, 0x0031, 0xad0d, 0x2bcd, 0x9c8d, 0xe435, 0x0bcd},
+ {0x1355, 0xfacd, 0xad21, 0x698d, 0xd7bd, 0x580d, 0x9f35, 0xc64d},
+ {0xcd55, 0xb44d, 0x5611, 0x220d, 0x7fad, 0x0f8d, 0x5635, 0x7ccd},
+ {0x8355, 0x69cd, 0xfb01, 0xd68d, 0x239d, 0xc30d, 0x0935, 0x2f4d},
+ {0x3555, 0x1b4d, 0x9bf1, 0x870d, 0xc38d, 0x728d, 0xb835, 0xddcd},
+ {0xe355, 0xc8cd, 0x38e1, 0x338d, 0x5f7d, 0x1e0d, 0x6335, 0x884d},
+ {0x8d55, 0x724d, 0xd1d1, 0xdc0d, 0xf76d, 0xc58d, 0x0a35, 0x2ecd},
+ {0x3355, 0x17cd, 0x66c1, 0x808d, 0x8b5d, 0x690d, 0xad35, 0xd14d}},
+ {{0x4d53'5555, 0x65bd'0031, 0x8068'2bcd, 0xa960'e435},
+ {0xc68f'1355, 0xcbe6'ad21, 0xe38f'd7bd, 0x1996'9f35},
+ {0x33c2'cd55, 0x2608'5611, 0x3aaf'7fad, 0x7dc4'5635},
+ {0x94ee'8355, 0x7421'fb01, 0x85c7'239d, 0xd5ea'0935},
+ {0xea12'3555, 0xb633'9bf1, 0xc4d6'c38d, 0x2207'b835},
+ {0x332d'e355, 0xec3d'38e1, 0xf7de'5f7d, 0x621d'6335},
+ {0x7041'8d55, 0x163e'd1d1, 0x1edd'f76d, 0x962b'0a35},
+ {0xa14d'3355, 0x3438'66c1, 0x39d5'8b5d, 0xbe30'ad35}},
+ {{0xe20d'2c41'4d53'5555, 0x4fdc'3c72'8068'2bcd},
+ {0xbead'4fa7'c68f'1355, 0x1e70'55d0'e38f'd7bd},
+ {0x7f35'5efe'33c2'cd55, 0xd0ec'5b1f'3aaf'7fad},
+ {0x23a5'5a44'94ee'8355, 0x6750'4c5d'85c7'239d},
+ {0xabfd'417a'ea12'3555, 0xe19c'298b'c4d6'c38d},
+ {0x183d'14a1'332d'e355, 0x3fcf'f2a9'f7de'5f7d},
+ {0x6864'd3b7'7041'8d55, 0x81eb'a7b8'1edd'f76d},
+ {0x9c74'7ebd'a14d'3355, 0xa7ef'48b6'39d5'8b5d}});
+ TestVectorInstruction(0xbd00e457, // vnmsac.vx v8, x1, v16, v0.t
+ {{85, 171, 1, 87, 173, 3, 89, 175, 5, 91, 177, 7, 93, 179, 9, 95},
+ {181, 11, 97, 183, 13, 99, 185, 15, 101, 187, 17, 103, 189, 19, 105, 191},
+ {21, 107, 193, 23, 109, 195, 25, 111, 197, 27, 113, 199, 29, 115, 201, 31},
+ {117, 203, 33, 119, 205, 35, 121, 207, 37, 123, 209, 39, 125, 211, 41, 127},
+ {213, 43, 129, 215, 45, 131, 217, 47, 133, 219, 49, 135, 221, 51, 137, 223},
+ {53, 139, 225, 55, 141, 227, 57, 143, 229, 59, 145, 231, 61, 147, 233, 63},
+ {149, 235, 65, 151, 237, 67, 153, 239, 69, 155, 241, 71, 157, 243, 73, 159},
+ {245, 75, 161, 247, 77, 163, 249, 79, 165, 251, 81, 167, 253, 83, 169, 255}},
+ {{0xab55, 0x0201, 0x58ad, 0xaf59, 0x0605, 0x5cb1, 0xb35d, 0x0a09},
+ {0x60b5, 0xb761, 0x0e0d, 0x64b9, 0xbb65, 0x1211, 0x68bd, 0xbf69},
+ {0x1615, 0x6cc1, 0xc36d, 0x1a19, 0x70c5, 0xc771, 0x1e1d, 0x74c9},
+ {0xcb75, 0x2221, 0x78cd, 0xcf79, 0x2625, 0x7cd1, 0xd37d, 0x2a29},
+ {0x80d5, 0xd781, 0x2e2d, 0x84d9, 0xdb85, 0x3231, 0x88dd, 0xdf89},
+ {0x3635, 0x8ce1, 0xe38d, 0x3a39, 0x90e5, 0xe791, 0x3e3d, 0x94e9},
+ {0xeb95, 0x4241, 0x98ed, 0xef99, 0x4645, 0x9cf1, 0xf39d, 0x4a49},
+ {0xa0f5, 0xf7a1, 0x4e4d, 0xa4f9, 0xfba5, 0x5251, 0xa8fd, 0xffa9}},
+ {{0x5756'ab55, 0xaf59'58ad, 0x075c'0605, 0x5f5e'b35d},
+ {0xb761'60b5, 0x0f64'0e0d, 0x6766'bb65, 0xbf69'68bd},
+ {0x176c'1615, 0x6f6e'c36d, 0xc771'70c5, 0x1f74'1e1d},
+ {0x7776'cb75, 0xcf79'78cd, 0x277c'2625, 0x7f7e'd37d},
+ {0xd781'80d5, 0x2f84'2e2d, 0x8786'db85, 0xdf89'88dd},
+ {0x378c'3635, 0x8f8e'e38d, 0xe791'90e5, 0x3f94'3e3d},
+ {0x9796'eb95, 0xef99'98ed, 0x479c'4645, 0x9f9e'f39d},
+ {0xf7a1'a0f5, 0x4fa4'4e4d, 0xa7a6'fba5, 0xffa9'a8fd}},
+ {{0xaf59'58ad'5756'ab55, 0x0a09'5e08'075c'0605},
+ {0x64b9'6362'b761'60b5, 0xbf69'68bd'6766'bb65},
+ {0x1a19'6e18'176c'1615, 0x74c9'7372'c771'70c5},
+ {0xcf79'78cd'7776'cb75, 0x2a29'7e28'277c'2625},
+ {0x84d9'8382'd781'80d5, 0xdf89'88dd'8786'db85},
+ {0x3a39'8e38'378c'3635, 0x94e9'9392'e791'90e5},
+ {0xef99'98ed'9796'eb95, 0x4a49'9e48'479c'4645},
+ {0xa4f9'a3a2'f7a1'a0f5, 0xffa9'a8fd'a7a6'fba5}});
+}
+
+TEST_F(Riscv64InterpreterTest, TestVmadd) {
+ TestVectorInstruction(0xa5882457, // vmadd.vv v8, v16, v24, v0.t
+ {{0, 87, 174, 5, 93, 179, 10, 97, 185, 15, 102, 189, 20, 107, 194, 25},
+ {112, 199, 30, 117, 205, 35, 122, 209, 41, 127, 214, 45, 132, 219, 50, 137},
+ {224, 55, 142, 229, 61, 147, 234, 65, 153, 239, 70, 157, 244, 75, 162, 249},
+ {80, 167, 254, 85, 173, 3, 90, 177, 9, 95, 182, 13, 100, 187, 18, 105},
+ {192, 23, 110, 197, 29, 115, 202, 33, 121, 207, 38, 125, 212, 43, 130, 217},
+ {48, 135, 222, 53, 141, 227, 58, 145, 233, 63, 150, 237, 68, 155, 242, 73},
+ {160, 247, 78, 165, 253, 83, 170, 1, 89, 175, 6, 93, 180, 11, 98, 185},
+ {16, 103, 190, 21, 109, 195, 26, 113, 201, 31, 118, 205, 36, 123, 210, 41}},
+ {{0x5700, 0xafae, 0x085d, 0x610a, 0xb9b9, 0x1266, 0x6b14, 0xc3c2},
+ {0x1c70, 0x751e, 0xcdcd, 0x267a, 0x7f29, 0xd7d6, 0x3084, 0x8932},
+ {0xe1e0, 0x3a8e, 0x933d, 0xebea, 0x4499, 0x9d46, 0xf5f4, 0x4ea2},
+ {0xa750, 0xfffe, 0x58ad, 0xb15a, 0x0a09, 0x62b6, 0xbb64, 0x1412},
+ {0x6cc0, 0xc56e, 0x1e1d, 0x76ca, 0xcf79, 0x2826, 0x80d4, 0xd982},
+ {0x3230, 0x8ade, 0xe38d, 0x3c3a, 0x94e9, 0xed96, 0x4644, 0x9ef2},
+ {0xf7a0, 0x504e, 0xa8fd, 0x01aa, 0x5a59, 0xb306, 0x0bb4, 0x6462},
+ {0xbd10, 0x15be, 0x6e6d, 0xc71a, 0x1fc9, 0x7876, 0xd124, 0x29d2}},
+ {{0x0503'5700, 0x610a'085d, 0xbd10'b9b9, 0x1917'6b14},
+ {0x751e'1c70, 0xd124'cdcd, 0x2d2b'7f29, 0x8932'3084},
+ {0xe538'e1e0, 0x413f'933d, 0x9d46'4499, 0xf94c'f5f4},
+ {0x5553'a750, 0xb15a'58ad, 0x0d61'0a09, 0x6967'bb64},
+ {0xc56e'6cc0, 0x2175'1e1d, 0x7d7b'cf79, 0xd982'80d4},
+ {0x3589'3230, 0x918f'e38d, 0xed96'94e9, 0x499d'4644},
+ {0xa5a3'f7a0, 0x01aa'a8fd, 0x5db1'5a59, 0xb9b8'0bb4},
+ {0x15be'bd10, 0x71c5'6e6d, 0xcdcc'1fc9, 0x29d2'd124}},
+ {{0x610a'085d'0503'5700, 0xc3c2'15be'bd10'b9b9},
+ {0x267a'2322'751e'1c70, 0x8932'3084'2d2b'7f29},
+ {0xebea'3de7'e538'e1e0, 0x4ea2'4b49'9d46'4499},
+ {0xb15a'58ad'5553'a750, 0x1412'660f'0d61'0a09},
+ {0x76ca'7372'c56e'6cc0, 0xd982'80d4'7d7b'cf79},
+ {0x3c3a'8e38'3589'3230, 0x9ef2'9b99'ed96'94e9},
+ {0x01aa'a8fd'a5a3'f7a0, 0x6462'b65f'5db1'5a59},
+ {0xc71a'c3c3'15be'bd10, 0x29d2'd124'cdcc'1fc9}});
+ TestVectorInstruction(0xa500e457, // vmadd.vx v8, x1, v16, v0.t
+ {{114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129},
+ {130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145},
+ {146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161},
+ {162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177},
+ {178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193},
+ {194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209},
+ {210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225},
+ {226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241}},
+ {{0x1d72, 0x1f74, 0x2176, 0x2378, 0x257a, 0x277c, 0x297e, 0x2b80},
+ {0x2d82, 0x2f84, 0x3186, 0x3388, 0x358a, 0x378c, 0x398e, 0x3b90},
+ {0x3d92, 0x3f94, 0x4196, 0x4398, 0x459a, 0x479c, 0x499e, 0x4ba0},
+ {0x4da2, 0x4fa4, 0x51a6, 0x53a8, 0x55aa, 0x57ac, 0x59ae, 0x5bb0},
+ {0x5db2, 0x5fb4, 0x61b6, 0x63b8, 0x65ba, 0x67bc, 0x69be, 0x6bc0},
+ {0x6dc2, 0x6fc4, 0x71c6, 0x73c8, 0x75ca, 0x77cc, 0x79ce, 0x7bd0},
+ {0x7dd2, 0x7fd4, 0x81d6, 0x83d8, 0x85da, 0x87dc, 0x89de, 0x8be0},
+ {0x8de2, 0x8fe4, 0x91e6, 0x93e8, 0x95ea, 0x97ec, 0x99ee, 0x9bf0}},
+ {{0x74c9'1d72, 0x78cd'2176, 0x7cd1'257a, 0x80d5'297e},
+ {0x84d9'2d82, 0x88dd'3186, 0x8ce1'358a, 0x90e5'398e},
+ {0x94e9'3d92, 0x98ed'4196, 0x9cf1'459a, 0xa0f5'499e},
+ {0xa4f9'4da2, 0xa8fd'51a6, 0xad01'55aa, 0xb105'59ae},
+ {0xb509'5db2, 0xb90d'61b6, 0xbd11'65ba, 0xc115'69be},
+ {0xc519'6dc2, 0xc91d'71c6, 0xcd21'75ca, 0xd125'79ce},
+ {0xd529'7dd2, 0xd92d'81d6, 0xdd31'85da, 0xe135'89de},
+ {0xe539'8de2, 0xe93d'91e6, 0xed41'95ea, 0xf145'99ee}},
+ {{0x2377'cc20'74c9'1d72, 0x2b7f'd428'7cd1'257a},
+ {0x3387'dc30'84d9'2d82, 0x3b8f'e438'8ce1'358a},
+ {0x4397'ec40'94e9'3d92, 0x4b9f'f448'9cf1'459a},
+ {0x53a7'fc50'a4f9'4da2, 0x5bb0'0458'ad01'55aa},
+ {0x63b8'0c60'b509'5db2, 0x6bc0'1468'bd11'65ba},
+ {0x73c8'1c70'c519'6dc2, 0x7bd0'2478'cd21'75ca},
+ {0x83d8'2c80'd529'7dd2, 0x8be0'3488'dd31'85da},
+ {0x93e8'3c90'e539'8de2, 0x9bf0'4498'ed41'95ea}});
+}
+
+TEST_F(Riscv64InterpreterTest, TestVnmsub) {
+ TestVectorInstruction(0xad882457, // vnmsub.vv v8, v16, v24, v0.t
+ {{0, 173, 90, 7, 181, 97, 14, 187, 105, 21, 194, 111, 28, 201, 118, 35},
+ {208, 125, 42, 215, 133, 49, 222, 139, 57, 229, 146, 63, 236, 153, 70, 243},
+ {160, 77, 250, 167, 85, 1, 174, 91, 9, 181, 98, 15, 188, 105, 22, 195},
+ {112, 29, 202, 119, 37, 209, 126, 43, 217, 133, 50, 223, 140, 57, 230, 147},
+ {64, 237, 154, 71, 245, 161, 78, 251, 169, 85, 2, 175, 92, 9, 182, 99},
+ {16, 189, 106, 23, 197, 113, 30, 203, 121, 37, 210, 127, 44, 217, 134, 51},
+ {224, 141, 58, 231, 149, 65, 238, 155, 73, 245, 162, 79, 252, 169, 86, 3},
+ {176, 93, 10, 183, 101, 17, 190, 107, 25, 197, 114, 31, 204, 121, 38, 211}},
+ {{0xad00, 0x5c5a, 0x0bb5, 0xbb0e, 0x6a69, 0x19c2, 0xc91c, 0x7876},
+ {0x27d0, 0xd72a, 0x8685, 0x35de, 0xe539, 0x9492, 0x43ec, 0xf346},
+ {0xa2a0, 0x51fa, 0x0155, 0xb0ae, 0x6009, 0x0f62, 0xbebc, 0x6e16},
+ {0x1d70, 0xccca, 0x7c25, 0x2b7e, 0xdad9, 0x8a32, 0x398c, 0xe8e6},
+ {0x9840, 0x479a, 0xf6f5, 0xa64e, 0x55a9, 0x0502, 0xb45c, 0x63b6},
+ {0x1310, 0xc26a, 0x71c5, 0x211e, 0xd079, 0x7fd2, 0x2f2c, 0xde86},
+ {0x8de0, 0x3d3a, 0xec95, 0x9bee, 0x4b49, 0xfaa2, 0xa9fc, 0x5956},
+ {0x08b0, 0xb80a, 0x6765, 0x16be, 0xc619, 0x7572, 0x24cc, 0xd426}},
+ {{0x0704'ad00, 0xbb0e'0bb5, 0x6f17'6a69, 0x2320'c91c},
+ {0xd72a'27d0, 0x8b33'8685, 0x3f3c'e539, 0xf346'43ec},
+ {0xa74f'a2a0, 0x5b59'0155, 0x0f62'6009, 0xc36b'bebc},
+ {0x7775'1d70, 0x2b7e'7c25, 0xdf87'dad9, 0x9391'398c},
+ {0x479a'9840, 0xfba3'f6f5, 0xafad'55a9, 0x63b6'b45c},
+ {0x17c0'1310, 0xcbc9'71c5, 0x7fd2'd079, 0x33dc'2f2c},
+ {0xe7e5'8de0, 0x9bee'ec95, 0x4ff8'4b49, 0x0401'a9fc},
+ {0xb80b'08b0, 0x6c14'6765, 0x201d'c619, 0xd427'24cc}},
+ {{0xbb0e'0bb5'0704'ad00, 0x7876'1e71'6f17'6a69},
+ {0x35de'312f'd72a'27d0, 0xf346'43ec'3f3c'e539},
+ {0xb0ae'56aa'a74f'a2a0, 0x6e16'6967'0f62'6009},
+ {0x2b7e'7c25'7775'1d70, 0xe8e6'8ee1'df87'dad9},
+ {0xa64e'a1a0'479a'9840, 0x63b6'b45c'afad'55a9},
+ {0x211e'c71b'17c0'1310, 0xde86'd9d7'7fd2'd079},
+ {0x9bee'ec95'e7e5'8de0, 0x5956'ff52'4ff8'4b49},
+ {0x16bf'1210'b80b'08b0, 0xd427'24cd'201d'c619}});
+ TestVectorInstruction(0xad00e457, // vnmsub.vx v8, x1, v16, v0.t
+ {{142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157},
+ {158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173},
+ {174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189},
+ {190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205},
+ {206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221},
+ {222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237},
+ {238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253},
+ {254, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}},
+ {{0xe48e, 0xe690, 0xe892, 0xea94, 0xec96, 0xee98, 0xf09a, 0xf29c},
+ {0xf49e, 0xf6a0, 0xf8a2, 0xfaa4, 0xfca6, 0xfea8, 0x00aa, 0x02ac},
+ {0x04ae, 0x06b0, 0x08b2, 0x0ab4, 0x0cb6, 0x0eb8, 0x10ba, 0x12bc},
+ {0x14be, 0x16c0, 0x18c2, 0x1ac4, 0x1cc6, 0x1ec8, 0x20ca, 0x22cc},
+ {0x24ce, 0x26d0, 0x28d2, 0x2ad4, 0x2cd6, 0x2ed8, 0x30da, 0x32dc},
+ {0x34de, 0x36e0, 0x38e2, 0x3ae4, 0x3ce6, 0x3ee8, 0x40ea, 0x42ec},
+ {0x44ee, 0x46f0, 0x48f2, 0x4af4, 0x4cf6, 0x4ef8, 0x50fa, 0x52fc},
+ {0x54fe, 0x5700, 0x5902, 0x5b04, 0x5d06, 0x5f08, 0x610a, 0x630c}},
+ {{0x913a'e48e, 0x953e'e892, 0x9942'ec96, 0x9d46'f09a},
+ {0xa14a'f49e, 0xa54e'f8a2, 0xa952'fca6, 0xad57'00aa},
+ {0xb15b'04ae, 0xb55f'08b2, 0xb963'0cb6, 0xbd67'10ba},
+ {0xc16b'14be, 0xc56f'18c2, 0xc973'1cc6, 0xcd77'20ca},
+ {0xd17b'24ce, 0xd57f'28d2, 0xd983'2cd6, 0xdd87'30da},
+ {0xe18b'34de, 0xe58f'38e2, 0xe993'3ce6, 0xed97'40ea},
+ {0xf19b'44ee, 0xf59f'48f2, 0xf9a3'4cf6, 0xfda7'50fa},
+ {0x01ab'54fe, 0x05af'5902, 0x09b3'5d06, 0x0db7'610a}},
+ {{0xea94'3de7'913a'e48e, 0xf29c'45ef'9942'ec96},
+ {0xfaa4'4df7'a14a'f49e, 0x02ac'55ff'a952'fca6},
+ {0x0ab4'5e07'b15b'04ae, 0x12bc'660f'b963'0cb6},
+ {0x1ac4'6e17'c16b'14be, 0x22cc'761f'c973'1cc6},
+ {0x2ad4'7e27'd17b'24ce, 0x32dc'862f'd983'2cd6},
+ {0x3ae4'8e37'e18b'34de, 0x42ec'963f'e993'3ce6},
+ {0x4af4'9e47'f19b'44ee, 0x52fc'a64f'f9a3'4cf6},
+ {0x5b04'ae58'01ab'54fe, 0x630c'b660'09b3'5d06}});
+}
+} // namespace
} // namespace berberis
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index bb3b19d9..d034e33b 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -250,9 +250,21 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...};
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...};
(arg1 << mask_bits(arg2)))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...};
- ((arg1 * arg2) + vd));
+ ((arg2 * arg1) + vd))
DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...};
- ((arg1 * arg2) + vd));
+ ((arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsac, auto [arg1, arg2] = std::tuple{args...};
+ (-(arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsac, auto [arg1, arg2] = std::tuple{args...};
+ (-(arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(madd, auto [arg1, arg2] = std::tuple{args...};
+ ((arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(madd, auto [arg1, arg2] = std::tuple{args...};
+ ((arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2] = std::tuple{args...};
+ (-(arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2] = std::tuple{args...};
+ (-(arg2 * vd) + arg1))
#undef DEFINE_ARITHMETIC_INTRINSIC
#undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS