Snap for 11220357 from 87ada30fbde026edeb4a9300daad285f188e6b69 to 24Q1-releaseandroid-14.0.0_r37 android-14.0.0_r36 android-14.0.0_r35 android-14.0.0_r34 android-14.0.0_r33 android-14.0.0_r32 android-14.0.0_r31 android-14.0.0_r30 android-14.0.0_r29 android14-qpr2-s5-release android14-qpr2-s4-release android14-qpr2-s3-release android14-qpr2-s2-release android14-qpr2-s1-release android14-qpr2-release

Change-Id: Ida0c9e19cfea25dcca6d4e4b1db91ba8740b0926
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-12-15 00:37:31 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-12-15 00:37:31 +0000
commit: f940c1f5262a9c153421c7e34384a6dd3f686d95 (patch)
tree: a91a3b3bcfd79f16aaad77c86a21d799c005d1d0
parent: 2e3405345ffa06e045abd38aede28b36ad572647 (diff)
parent: 87ada30fbde026edeb4a9300daad285f188e6b69 (diff)
download: binary_translation-android14-qpr2-release.tar.gz
5 files changed, 456 insertions, 178 deletions
diff --git a/base/include/berberis/base/bit_util.h b/base/include/berberis/base/bit_util.h
index e3610dde..4fb08470 100644
--- a/base/include/berberis/base/bit_util.h
+++ b/base/include/berberis/base/bit_util.h
@@ -70,8 +70,9 @@ constexpr bool IsAligned(T* p, size_t align) {
 template <typename T>
 constexpr T BitUtilLog2(T x) {
   static_assert(std::is_integral_v<T>, "Log2: T must be integral");
-  DCHECK(IsPowerOf2(x));
-  return x == 1 ? 0 : BitUtilLog2(x >> 1) + 1;
+  CHECK(IsPowerOf2(x));
+  // TODO(b/260725458): Use std::countr_zero after C++20 becomes available
+  return __builtin_ctz(x);
 }
 
 // Verify that argument value fits into a target.
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 7ad556c1..159d591c 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -341,7 +341,10 @@ class Decoder {
   };
 
   enum class VOpMVvOpcode : uint8_t {
+    kVmaddvv = 0b101001,
+    kVnmsubvv = 0b101011,
     kVmaccvv = 0b101101,
+    kVnmsacvv = 0b101111,
     kMaxValue = 0b111111
   };
 
@@ -390,7 +393,10 @@ class Decoder {
   };
 
   enum class VOpMVxOpcode : uint8_t {
+    kVmaddvx = 0b101001,
+    kVnmsubvx = 0b101011,
     kVmaccvx = 0b101101,
+    kVnmsacvx = 0b101111,
     kMaxValue = 0b111111
   };
 
@@ -760,7 +766,7 @@ class Decoder {
   }
 
   uint8_t DecodeCompressedInstruction() {
-    CompressedOpcode opcode_bits{(GetBits<uint8_t, 13, 3>() << 2) | GetBits<uint8_t, 0, 2>()};
+    CompressedOpcode opcode_bits{(GetBits<13, 3>() << 2) | GetBits<0, 2>()};
 
     switch (opcode_bits) {
       case CompressedOpcode::kAddi4spn:
@@ -837,9 +843,9 @@ class Decoder {
   }
 
   void DecodeCompressedLi() {
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
-    uint8_t rd = GetBits<uint8_t, 7, 5>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
+    uint8_t rd = GetBits<7, 5>();
     int8_t imm = SignExtend<6>((high_imm << 5) + low_imm);
     const OpImmArgs args = {
         .opcode = OpImmOpcode::kAddi,
@@ -851,11 +857,11 @@ class Decoder {
   }
 
   void DecodeCompressedMiscAlu() {
-    uint8_t r = GetBits<uint8_t, 7, 3>() + 8;
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+    uint8_t r = GetBits<7, 3>() + 8;
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
     uint8_t imm = (high_imm << 5) + low_imm;
-    switch (GetBits<uint8_t, 10, 2>()) {
+    switch (GetBits<10, 2>()) {
       case 0b00: {
         const ShiftImmArgs args = {
             .opcode = ShiftImmOpcode::kSrli,
@@ -884,10 +890,10 @@ class Decoder {
         return insn_consumer_->OpImm(args);
       }
     }
-    uint8_t rs2 = GetBits<uint8_t, 2, 3>() + 8;
-    if (GetBits<uint8_t, 12, 1>() == 0) {
+    uint8_t rs2 = GetBits<2, 3>() + 8;
+    if (GetBits<12, 1>() == 0) {
       OpOpcode opcode;
-      switch (GetBits<uint8_t, 5, 2>()) {
+      switch (GetBits<5, 2>()) {
         case 0b00:
           opcode = OpOpcode::kSub;
           break;
@@ -910,7 +916,7 @@ class Decoder {
       return insn_consumer_->Op(args);
     } else {
       Op32Opcode opcode;
-      switch (GetBits<uint8_t, 5, 2>()) {
+      switch (GetBits<5, 2>()) {
         case 0b00:
           opcode = Op32Opcode::kSubw;
           break;
@@ -932,8 +938,8 @@ class Decoder {
 
   template <auto kOperandType>
   void DecodeCompressedStoresp() {
-    uint8_t raw_imm = GetBits<uint8_t, 7, 6>();
-    uint8_t rs2 = GetBits<uint8_t, 2, 5>();
+    uint8_t raw_imm = GetBits<7, 6>();
+    uint8_t rs2 = GetBits<2, 5>();
     constexpr uint8_t k32bit[64] = {
         0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03,
         0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35, 0x06, 0x16,
@@ -957,9 +963,9 @@ class Decoder {
   }
 
   void DecodeCompressedLuiAddi16sp() {
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
-    uint8_t rd = GetBits<uint8_t, 7, 5>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
+    uint8_t rd = GetBits<7, 5>();
     if (rd != 2) {
       int32_t imm = SignExtend<18>((high_imm << 17) + (low_imm << 12));
       const UpperImmArgs args = {
@@ -986,8 +992,8 @@ class Decoder {
 
   template <enum LoadStore kLoadStore, auto kOperandType>
   void DecodeCompressedLoadStore() {
-    uint8_t low_imm = GetBits<uint8_t, 5, 2>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
+    uint8_t low_imm = GetBits<5, 2>();
+    uint8_t high_imm = GetBits<10, 3>();
     uint8_t imm;
     if constexpr ((uint8_t(kOperandType) & 1) == 0) {
       constexpr uint8_t kLwLow[4] = {0x0, 0x40, 0x04, 0x44};
@@ -995,8 +1001,8 @@ class Decoder {
     } else {
       imm = (low_imm << 6 | high_imm << 3);
     }
-    uint8_t rd = GetBits<uint8_t, 2, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
+    uint8_t rd = GetBits<2, 3>();
+    uint8_t rs = GetBits<7, 3>();
     if constexpr (kLoadStore == LoadStore::kStore) {
       const StoreArgsTemplate<decltype(kOperandType)> args = {
           .operand_type = kOperandType,
@@ -1018,9 +1024,9 @@ class Decoder {
 
   template <auto kOperandType>
   void DecodeCompressedLoadsp() {
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
-    uint8_t rd = GetBits<uint8_t, 7, 5>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
+    uint8_t rd = GetBits<7, 5>();
     constexpr uint8_t k32bitLow[32] = {0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31,
                                        0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33,
                                        0x04, 0x14, 0x24, 0x34, 0x05, 0x15, 0x25, 0x35,
@@ -1041,10 +1047,10 @@ class Decoder {
   }
 
   void DecodeCompressedAddi() {
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
     int8_t imm = SignExtend<6>(high_imm << 5 | low_imm);
-    uint8_t r = GetBits<uint8_t, 7, 5>();
+    uint8_t r = GetBits<7, 5>();
     if (r == 0 || imm == 0) {
       insn_consumer_->Nop();
     }
@@ -1058,10 +1064,10 @@ class Decoder {
   }
 
   void DecodeCompressedAddiw() {
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
     int8_t imm = SignExtend<6>(high_imm << 5 | low_imm);
-    uint8_t r = GetBits<uint8_t, 7, 5>();
+    uint8_t r = GetBits<7, 5>();
     const OpImm32Args args = {
         .opcode = OpImm32Opcode::kAddiw,
         .dst = r,
@@ -1076,12 +1082,12 @@ class Decoder {
     constexpr uint8_t kBLow[32] = {0x00, 0x20, 0x02, 0x22, 0x04, 0x24, 0x06, 0x26, 0x40, 0x60, 0x42,
                                    0x62, 0x44, 0x64, 0x46, 0x66, 0x80, 0xa0, 0x82, 0xa2, 0x84, 0xa4,
                                    0x86, 0xa6, 0xc0, 0xe0, 0xc2, 0xe2, 0xc4, 0xe4, 0xc6, 0xe6};
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 10, 3>();
-    uint8_t rs = GetBits<uint8_t, 7, 3>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<10, 3>();
+    uint8_t rs = GetBits<7, 3>();
 
     const BranchArgs args = {
-        .opcode = BranchOpcode(GetBits<uint8_t, 13, 1>()),
+        .opcode = BranchOpcode(GetBits<13, 1>()),
         .src1 = uint8_t(8 + rs),
         .src2 = 0,
         .offset = static_cast<int16_t>(SignExtend<9>(kBHigh[high_imm] + kBLow[low_imm])),
@@ -1104,8 +1110,7 @@ class Decoder {
     };
     const JumpAndLinkArgs args = {
         .dst = 0,
-        .offset =
-            bit_cast<int16_t>(kJHigh[GetBits<uint16_t, 8, 5>()]) | kJLow[GetBits<uint16_t, 2, 6>()],
+        .offset = bit_cast<int16_t>(kJHigh[GetBits<8, 5>()]) | kJLow[GetBits<2, 6>()],
         .insn_len = 2,
     };
     insn_consumer_->JumpAndLink(args);
@@ -1116,8 +1121,7 @@ class Decoder {
         0x0, 0x40, 0x80, 0xc0, 0x4, 0x44, 0x84, 0xc4, 0x8, 0x48, 0x88, 0xc8, 0xc, 0x4c, 0x8c, 0xcc};
     constexpr uint8_t kAddi4spnLow[16] = {
         0x0, 0x2, 0x1, 0x3, 0x10, 0x12, 0x11, 0x13, 0x20, 0x22, 0x21, 0x23, 0x30, 0x32, 0x31, 0x33};
-    int16_t imm = (kAddi4spnHigh[GetBits<uint8_t, 9, 4>()] | kAddi4spnLow[GetBits<uint8_t, 5, 4>()])
-                  << 2;
+    int16_t imm = (kAddi4spnHigh[GetBits<9, 4>()] | kAddi4spnLow[GetBits<5, 4>()]) << 2;
     // If immediate is zero then this instruction is treated as unimplemented.
     // This includes RISC-V dedicated 16bit “unimplemented instruction” 0x0000.
     if (imm == 0) {
@@ -1125,7 +1129,7 @@ class Decoder {
     }
     const OpImmArgs args = {
         .opcode = OpImmOpcode::kAddi,
-        .dst = uint8_t(8 + GetBits<uint8_t, 2, 3>()),
+        .dst = uint8_t(8 + GetBits<2, 3>()),
         .src = 2,
         .imm = imm,
     };
@@ -1133,9 +1137,9 @@ class Decoder {
   }
 
   void DecodeCompressedJr_Jalr_Mv_Add() {
-    uint8_t r = GetBits<uint8_t, 7, 5>();
-    uint8_t rs2 = GetBits<uint8_t, 2, 5>();
-    if (GetBits<uint8_t, 12, 1>()) {
+    uint8_t r = GetBits<7, 5>();
+    uint8_t rs2 = GetBits<2, 5>();
+    if (GetBits<12, 1>()) {
       if (r == 0 && rs2 == 0) {
         const SystemArgs args = {
             .opcode = SystemOpcode::kEbreak,
@@ -1180,9 +1184,9 @@ class Decoder {
   }
 
   void DecodeCompressedSlli() {
-    uint8_t r = GetBits<uint8_t, 7, 5>();
-    uint8_t low_imm = GetBits<uint8_t, 2, 5>();
-    uint8_t high_imm = GetBits<uint8_t, 12, 1>();
+    uint8_t r = GetBits<7, 5>();
+    uint8_t low_imm = GetBits<2, 5>();
+    uint8_t high_imm = GetBits<12, 1>();
     uint8_t imm = (high_imm << 5) + low_imm;
     const ShiftImmArgs args = {
         .opcode = ShiftImmOpcode::kSlli,
@@ -1194,7 +1198,7 @@ class Decoder {
   }
 
   uint8_t DecodeBaseInstruction() {
-    BaseOpcode opcode_bits{GetBits<uint8_t, 2, 5>()};
+    BaseOpcode opcode_bits{GetBits<2, 5>()};
 
     switch (opcode_bits) {
       case BaseOpcode::kLoad:
@@ -1264,11 +1268,13 @@ class Decoder {
   }
 
  private:
-  template <typename ResultType, uint32_t start, uint32_t size>
-  ResultType GetBits() {
-    static_assert(std::is_unsigned_v<ResultType>, "Only unsigned types are supported");
-    static_assert(sizeof(ResultType) * CHAR_BIT >= size, "Too small ResultType for size");
+  template <uint32_t start, uint32_t size>
+  auto GetBits() {
     static_assert((start + size) <= 32 && size > 0, "Invalid start or size value");
+    using ResultType = std::conditional_t<
+        size == 1,
+        bool,
+        std::conditional_t<size <= 8, uint8_t, std::conditional_t<size <= 16, uint16_t, uint32_t>>>;
     uint32_t shifted_val = code_ << (32 - start - size);
     return static_cast<ResultType>(shifted_val >> (32 - size));
   }
@@ -1294,32 +1300,32 @@ class Decoder {
   }
 
   void DecodeMiscMem() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
+    uint8_t low_opcode = GetBits<12, 3>();
     switch (low_opcode) {
       case 0b000: {
-        uint8_t high_opcode = GetBits<uint8_t, 28, 4>();
+        uint8_t high_opcode = GetBits<28, 4>();
         FenceOpcode opcode = FenceOpcode{high_opcode};
         const FenceArgs args = {
             .opcode = opcode,
-            .dst = GetBits<uint8_t, 7, 5>(),
-            .src = GetBits<uint8_t, 15, 5>(),
-            .sw = bool(GetBits<uint8_t, 20, 1>()),
-            .sr = bool(GetBits<uint8_t, 21, 1>()),
-            .so = bool(GetBits<uint8_t, 22, 1>()),
-            .si = bool(GetBits<uint8_t, 23, 1>()),
-            .pw = bool(GetBits<uint8_t, 24, 1>()),
-            .pr = bool(GetBits<uint8_t, 25, 1>()),
-            .po = bool(GetBits<uint8_t, 26, 1>()),
-            .pi = bool(GetBits<uint8_t, 27, 1>()),
+            .dst = GetBits<7, 5>(),
+            .src = GetBits<15, 5>(),
+            .sw = GetBits<20, 1>(),
+            .sr = GetBits<21, 1>(),
+            .so = GetBits<22, 1>(),
+            .si = GetBits<23, 1>(),
+            .pw = GetBits<24, 1>(),
+            .pr = GetBits<25, 1>(),
+            .po = GetBits<26, 1>(),
+            .pi = GetBits<27, 1>(),
         };
         insn_consumer_->Fence(args);
         break;
       }
       case 0b001: {
-        uint16_t imm = GetBits<uint16_t, 20, 12>();
+        uint16_t imm = GetBits<20, 12>();
         const FenceIArgs args = {
-            .dst = GetBits<uint8_t, 7, 5>(),
-            .src = GetBits<uint8_t, 15, 5>(),
+            .dst = GetBits<7, 5>(),
+            .src = GetBits<15, 5>(),
             .imm = SignExtend<12>(imm),
         };
         insn_consumer_->FenceI(args);
@@ -1332,8 +1338,8 @@ class Decoder {
 
   template <typename OpcodeType>
   void DecodeOp() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
-    uint8_t high_opcode = GetBits<uint8_t, 25, 7>();
+    uint8_t low_opcode = GetBits<12, 3>();
+    uint8_t high_opcode = GetBits<25, 7>();
     uint16_t opcode_bits = static_cast<int16_t>(low_opcode | (high_opcode << 3));
     OpcodeType opcode{opcode_bits};
     OpSingleInputOpcode single_input_opcode{opcode_bits};
@@ -1348,29 +1354,29 @@ class Decoder {
     }
     const OpArgsTemplate<OpcodeType> args = {
         .opcode = opcode,
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .src1 = GetBits<uint8_t, 15, 5>(),
-        .src2 = GetBits<uint8_t, 20, 5>(),
+        .dst = GetBits<7, 5>(),
+        .src1 = GetBits<15, 5>(),
+        .src2 = GetBits<20, 5>(),
     };
     insn_consumer_->Op(args);
   }
 
   void DecodeSingleInputOp(OpSingleInputOpcode opcode) {
-    uint8_t src1 = GetBits<uint8_t, 15, 5>();
-    uint8_t src2 = GetBits<uint8_t, 20, 5>();
+    uint8_t src1 = GetBits<15, 5>();
+    uint8_t src2 = GetBits<20, 5>();
 
     if (src2 != 0) {
       return Undefined();
     }
-    const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<uint8_t, 7, 5>(), .src = src1};
+    const OpSingleInputArgs args = {.opcode = opcode, .dst = GetBits<7, 5>(), .src = src1};
     insn_consumer_->OpSingleInput(args);
   }
 
   void DecodeAmo() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
-    uint8_t high_opcode = GetBits<uint8_t, 27, 5>();
+    uint8_t low_opcode = GetBits<12, 3>();
+    uint8_t high_opcode = GetBits<27, 5>();
     // lr instruction must have rs2 == 0
-    if (high_opcode == 0b00010 && GetBits<uint8_t, 20, 5>() != 0) {
+    if (high_opcode == 0b00010 && GetBits<20, 5>() != 0) {
       return Undefined();
     }
     AmoOpcode opcode = AmoOpcode{high_opcode};
@@ -1378,43 +1384,43 @@ class Decoder {
     const AmoArgs args = {
         .opcode = opcode,
         .operand_type = operand_type,
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .src1 = GetBits<uint8_t, 15, 5>(),
-        .src2 = GetBits<uint8_t, 20, 5>(),
-        .rl = bool(GetBits<uint8_t, 25, 1>()),
-        .aq = bool(GetBits<uint8_t, 26, 1>()),
+        .dst = GetBits<7, 5>(),
+        .src1 = GetBits<15, 5>(),
+        .src2 = GetBits<20, 5>(),
+        .rl = GetBits<25, 1>(),
+        .aq = GetBits<26, 1>(),
     };
     insn_consumer_->Amo(args);
   }
 
   void DecodeFma() {
-    uint8_t operand_type = GetBits<uint8_t, 25, 2>();
-    uint8_t opcode_bits = GetBits<uint8_t, 2, 2>();
+    uint8_t operand_type = GetBits<25, 2>();
+    uint8_t opcode_bits = GetBits<2, 2>();
     const FmaArgs args = {
         .opcode = FmaOpcode(opcode_bits),
         .operand_type = FloatOperandType(operand_type),
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .src1 = GetBits<uint8_t, 15, 5>(),
-        .src2 = GetBits<uint8_t, 20, 5>(),
-        .src3 = GetBits<uint8_t, 27, 5>(),
-        .rm = GetBits<uint8_t, 12, 3>(),
+        .dst = GetBits<7, 5>(),
+        .src1 = GetBits<15, 5>(),
+        .src2 = GetBits<20, 5>(),
+        .src3 = GetBits<27, 5>(),
+        .rm = GetBits<12, 3>(),
     };
     insn_consumer_->Fma(args);
   }
 
   void DecodeLui() {
-    int32_t imm = GetBits<uint32_t, 12, 20>();
+    int32_t imm = GetBits<12, 20>();
     const UpperImmArgs args = {
-        .dst = GetBits<uint8_t, 7, 5>(),
+        .dst = GetBits<7, 5>(),
         .imm = imm << 12,
     };
     insn_consumer_->Lui(args);
   }
 
   void DecodeAuipc() {
-    int32_t imm = GetBits<uint32_t, 12, 20>();
+    int32_t imm = GetBits<12, 20>();
     const UpperImmArgs args = {
-        .dst = GetBits<uint8_t, 7, 5>(),
+        .dst = GetBits<7, 5>(),
         .imm = imm << 12,
     };
     insn_consumer_->Auipc(args);
@@ -1424,19 +1430,19 @@ class Decoder {
   void DecodeLoad() {
     OperandTypeEnum operand_type;
     if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) {
-      auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()];
+      auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()];
       if (!decoded_operand_type.has_value()) {
         return Undefined();
       }
       operand_type = *decoded_operand_type;
     } else {
-      operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()};
+      operand_type = OperandTypeEnum{GetBits<12, 3>()};
     }
     const LoadArgsTemplate<OperandTypeEnum> args = {
         .operand_type = operand_type,
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .src = GetBits<uint8_t, 15, 5>(),
-        .offset = SignExtend<12>(GetBits<uint16_t, 20, 12>()),
+        .dst = GetBits<7, 5>(),
+        .src = GetBits<15, 5>(),
+        .offset = SignExtend<12>(GetBits<20, 12>()),
     };
     insn_consumer_->Load(args);
   }
@@ -1445,23 +1451,23 @@ class Decoder {
   void DecodeStore() {
     OperandTypeEnum operand_type;
     if constexpr (std::is_same_v<OperandTypeEnum, FloatOperandType>) {
-      auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<uint8_t, 12, 3>()];
+      auto decoded_operand_type = kLoadStoreWidthToFloatOperandType[GetBits<12, 3>()];
       if (!decoded_operand_type.has_value()) {
         return Undefined();
       }
       operand_type = *decoded_operand_type;
     } else {
-      operand_type = OperandTypeEnum{GetBits<uint8_t, 12, 3>()};
+      operand_type = OperandTypeEnum{GetBits<12, 3>()};
     }
 
-    uint16_t low_imm = GetBits<uint16_t, 7, 5>();
-    uint16_t high_imm = GetBits<uint16_t, 25, 7>();
+    uint16_t low_imm = GetBits<7, 5>();
+    uint16_t high_imm = GetBits<25, 7>();
 
     const StoreArgsTemplate<OperandTypeEnum> args = {
         .operand_type = operand_type,
-        .src = GetBits<uint8_t, 15, 5>(),
+        .src = GetBits<15, 5>(),
         .offset = SignExtend<12>(static_cast<int16_t>(low_imm | (high_imm << 5))),
-        .data = GetBits<uint8_t, 20, 5>(),
+        .data = GetBits<20, 5>(),
     };
     insn_consumer_->Store(args);
   }
@@ -1471,37 +1477,36 @@ class Decoder {
             typename BitmanipOpcodeType,
             uint32_t kShiftFieldSize>
   void DecodeOp() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
+    uint8_t low_opcode = GetBits<12, 3>();
     if (low_opcode != 0b001 && low_opcode != 0b101) {
       OpOpcodeType opcode{low_opcode};
 
-      uint16_t imm = GetBits<uint16_t, 20, 12>();
+      uint16_t imm = GetBits<20, 12>();
 
       const OpImmArgsTemplate<OpOpcodeType> args = {
           .opcode = opcode,
-          .dst = GetBits<uint8_t, 7, 5>(),
-          .src = GetBits<uint8_t, 15, 5>(),
+          .dst = GetBits<7, 5>(),
+          .src = GetBits<15, 5>(),
           .imm = SignExtend<12>(imm),
       };
       insn_consumer_->OpImm(args);
-    } else if ((GetBits<uint16_t, 31, 1>() +
-                GetBits<uint16_t, 20 + kShiftFieldSize, 10 - kShiftFieldSize>()) ==
+    } else if ((GetBits<31, 1>() + GetBits<20 + kShiftFieldSize, 10 - kShiftFieldSize>()) ==
                0) {  // For Canonical Shift Instructions from RV64G the opcode contains all
                      // zeros except for the 30th (second highest) bit.
-      uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>();
+      uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>();
       ShiftOpcodeType opcode{
           static_cast<std::underlying_type_t<ShiftOpcodeType>>(low_opcode | (high_opcode << 3))};
 
       const ShiftImmArgsTemplate<ShiftOpcodeType> args = {
           .opcode = opcode,
-          .dst = GetBits<uint8_t, 7, 5>(),
-          .src = GetBits<uint8_t, 15, 5>(),
-          .imm = GetBits<uint8_t, 20, kShiftFieldSize>(),
+          .dst = GetBits<7, 5>(),
+          .src = GetBits<15, 5>(),
+          .imm = GetBits<20, kShiftFieldSize>(),
       };
       insn_consumer_->OpImm(args);
     } else {
-      uint8_t shamt = GetBits<uint8_t, 20, kShiftFieldSize>();
-      uint16_t high_opcode = GetBits<uint16_t, 20 + kShiftFieldSize, 12 - kShiftFieldSize>();
+      uint8_t shamt = GetBits<20, kShiftFieldSize>();
+      uint16_t high_opcode = GetBits<20 + kShiftFieldSize, 12 - kShiftFieldSize>();
       BitmanipOpcodeType opcode{static_cast<uint16_t>(low_opcode | (high_opcode << 3))};
       bool has_shamt = false;
 
@@ -1527,14 +1532,14 @@ class Decoder {
       }
       // TODO(b/291851792): Refactor instructions with shamt into ShiftImmArgs
       if (!has_shamt) {
-        high_opcode = GetBits<uint16_t, 20, 12>();
+        high_opcode = GetBits<20, 12>();
         opcode = BitmanipOpcodeType{static_cast<uint16_t>(low_opcode | (high_opcode << 3))};
         shamt = 0;
       }
       const BitmanipImmArgsTemplate<BitmanipOpcodeType> args = {
           .opcode = opcode,
-          .dst = GetBits<uint8_t, 7, 5>(),
-          .src = GetBits<uint8_t, 15, 5>(),
+          .dst = GetBits<7, 5>(),
+          .src = GetBits<15, 5>(),
           .shamt = shamt,
       };
       insn_consumer_->OpImm(args);
@@ -1542,20 +1547,20 @@ class Decoder {
   }
 
   void DecodeBranch() {
-    BranchOpcode opcode{GetBits<uint8_t, 12, 3>()};
+    BranchOpcode opcode{GetBits<12, 3>()};
 
     // Decode the offset.
-    auto low_imm = GetBits<uint16_t, 8, 4>();
-    auto mid_imm = GetBits<uint16_t, 25, 6>();
-    auto bit11_imm = GetBits<uint16_t, 7, 1>();
-    auto bit12_imm = GetBits<uint16_t, 31, 1>();
+    auto low_imm = GetBits<8, 4>();
+    auto mid_imm = GetBits<25, 6>();
+    auto bit11_imm = GetBits<7, 1>();
+    auto bit12_imm = GetBits<31, 1>();
     auto offset =
         static_cast<int16_t>(low_imm | (mid_imm << 4) | (bit11_imm << 10) | (bit12_imm << 11));
 
     const BranchArgs args = {
         .opcode = opcode,
-        .src1 = GetBits<uint8_t, 15, 5>(),
-        .src2 = GetBits<uint8_t, 20, 5>(),
+        .src1 = GetBits<15, 5>(),
+        .src2 = GetBits<20, 5>(),
         // The offset is encoded as 2-byte units, we need to multiply by 2.
         .offset = SignExtend<13>(static_cast<int16_t>(offset * 2)),
     };
@@ -1564,15 +1569,15 @@ class Decoder {
 
   void DecodeJumpAndLink() {
     // Decode the offset.
-    auto low_imm = GetBits<uint32_t, 21, 10>();
-    auto mid_imm = GetBits<uint32_t, 12, 8>();
-    auto bit11_imm = GetBits<uint32_t, 20, 1>();
-    auto bit20_imm = GetBits<uint32_t, 31, 1>();
+    auto low_imm = GetBits<21, 10>();
+    auto mid_imm = GetBits<12, 8>();
+    auto bit11_imm = GetBits<20, 1>();
+    auto bit20_imm = GetBits<31, 1>();
     auto offset =
         static_cast<int32_t>(low_imm | (bit11_imm << 10) | (mid_imm << 11) | (bit20_imm << 19));
 
     const JumpAndLinkArgs args = {
-        .dst = GetBits<uint8_t, 7, 5>(),
+        .dst = GetBits<7, 5>(),
         // The offset is encoded as 2-byte units, we need to multiply by 2.
         .offset = SignExtend<21>(offset * 2),
         .insn_len = 4,
@@ -1584,13 +1589,13 @@ class Decoder {
     // Bit #29 = 1: means rm is an opcode extension and not operand.
     // Bit #30 = 1: means rs2 is an opcode extension and not operand.
     // Bit #31 = 1: selects general purpose register instead of floating point register as target.
-    uint8_t operand_type = GetBits<uint8_t, 25, 2>();
-    uint8_t opcode_bits = GetBits<uint8_t, 27, 2>();
-    uint8_t rd = GetBits<uint8_t, 7, 5>();
-    uint8_t rs1 = GetBits<uint8_t, 15, 5>();
-    uint8_t rs2 = GetBits<uint8_t, 20, 5>();
-    uint8_t rm = GetBits<uint8_t, 12, 3>();
-    switch (GetBits<uint8_t, 29, 3>()) {
+    uint8_t operand_type = GetBits<25, 2>();
+    uint8_t opcode_bits = GetBits<27, 2>();
+    uint8_t rd = GetBits<7, 5>();
+    uint8_t rs1 = GetBits<15, 5>();
+    uint8_t rs2 = GetBits<20, 5>();
+    uint8_t rm = GetBits<12, 3>();
+    switch (GetBits<29, 3>()) {
       case 0b000: {
         const OpFpArgs args = {
             .opcode = OpFpOpcode(opcode_bits),
@@ -1728,14 +1733,14 @@ class Decoder {
   }
 
   void DecodeOpV() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 3>();
-    bool vm = GetBits<uint8_t, 25, 1>();
-    uint8_t opcode = GetBits<uint8_t, 26, 6>();
-    uint8_t dst = GetBits<uint8_t, 7, 5>();
-    // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 2rd operand.
+    uint8_t low_opcode = GetBits<12, 3>();
+    bool vm = GetBits<25, 1>();
+    uint8_t opcode = GetBits<26, 6>();
+    uint8_t dst = GetBits<7, 5>();
+    // Note: in vector instructions vs2 field is 2nd operand while vs1 field is 3rd operand.
     // FMA instructions are exception, but there are not that many of these.
-    uint8_t src1 = GetBits<uint8_t, 20, 5>();
-    uint8_t src2 = GetBits<uint8_t, 15, 5>();
+    uint8_t src1 = GetBits<20, 5>();
+    uint8_t src2 = GetBits<15, 5>();
     switch (low_opcode) {
       case 0b000: {
         const VOpIVvArgs args = {
@@ -1788,25 +1793,25 @@ class Decoder {
         return insn_consumer_->OpVector(args);
       }
       case 0b111:
-        if (GetBits<uint8_t, 31, 1>() == 0) {
+        if (GetBits<31, 1>() == 0) {
           const VsetvliArgs args = {
-              .dst = GetBits<uint8_t, 7, 5>(),
-              .src = GetBits<uint8_t, 15, 5>(),
-              .vtype = GetBits<uint16_t, 20, 11>(),
+              .dst = GetBits<7, 5>(),
+              .src = GetBits<15, 5>(),
+              .vtype = GetBits<20, 11>(),
           };
           return insn_consumer_->Vsetvli(args);
-        } else if (GetBits<uint8_t, 30, 1>() == 1) {
+        } else if (GetBits<30, 1>() == 1) {
           const VsetivliArgs args = {
-              .dst = GetBits<uint8_t, 7, 5>(),
-              .avl = GetBits<uint8_t, 15, 5>(),
-              .vtype = GetBits<uint16_t, 20, 10>(),
+              .dst = GetBits<7, 5>(),
+              .avl = GetBits<15, 5>(),
+              .vtype = GetBits<20, 10>(),
           };
           return insn_consumer_->Vsetivli(args);
-        } else if (GetBits<uint8_t, 25, 6>() == 0) {
+        } else if (GetBits<25, 6>() == 0) {
           const VsetvlArgs args = {
-              .dst = GetBits<uint8_t, 7, 5>(),
-              .src1 = GetBits<uint8_t, 15, 5>(),
-              .src2 = GetBits<uint8_t, 20, 5>(),
+              .dst = GetBits<7, 5>(),
+              .src1 = GetBits<15, 5>(),
+              .src2 = GetBits<20, 5>(),
           };
           return insn_consumer_->Vsetvl(args);
         }
@@ -1814,46 +1819,46 @@ class Decoder {
   }
 
   void DecodeSystem() {
-    uint8_t low_opcode = GetBits<uint8_t, 12, 2>();
+    uint8_t low_opcode = GetBits<12, 2>();
     if (low_opcode == 0b00) {
-      int32_t opcode = GetBits<uint32_t, 7, 25>();
+      int32_t opcode = GetBits<7, 25>();
       const SystemArgs args = {
           .opcode = SystemOpcode(opcode),
       };
       return insn_consumer_->System(args);
     }
-    if (GetBits<uint8_t, 14, 1>()) {
+    if (GetBits<14, 1>()) {
       CsrImmOpcode opcode = CsrImmOpcode(low_opcode);
       const CsrImmArgs args = {
           .opcode = opcode,
-          .dst = GetBits<uint8_t, 7, 5>(),
-          .imm = GetBits<uint8_t, 15, 5>(),
-          .csr = GetBits<uint16_t, 20, 12>(),
+          .dst = GetBits<7, 5>(),
+          .imm = GetBits<15, 5>(),
+          .csr = GetBits<20, 12>(),
       };
       return insn_consumer_->Csr(args);
     }
     CsrOpcode opcode = CsrOpcode(low_opcode);
     const CsrArgs args = {
         .opcode = opcode,
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .src = GetBits<uint8_t, 15, 5>(),
-        .csr = GetBits<uint16_t, 20, 12>(),
+        .dst = GetBits<7, 5>(),
+        .src = GetBits<15, 5>(),
+        .csr = GetBits<20, 12>(),
     };
     return insn_consumer_->Csr(args);
   }
 
   void DecodeJumpAndLinkRegister() {
-    if (GetBits<uint8_t, 12, 3>() != 0b000) {
+    if (GetBits<12, 3>() != 0b000) {
       Undefined();
       return;
     }
     // Decode sign-extend offset.
-    int16_t offset = GetBits<uint16_t, 20, 12>();
+    int16_t offset = GetBits<20, 12>();
     offset = static_cast<int16_t>(offset << 4) >> 4;
 
     const JumpAndLinkRegisterArgs args = {
-        .dst = GetBits<uint8_t, 7, 5>(),
-        .base = GetBits<uint8_t, 15, 5>(),
+        .dst = GetBits<7, 5>(),
+        .base = GetBits<15, 5>(),
         .offset = offset,
         .insn_len = 4,
     };
diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index b48d9349..18b94dc3 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc
@@ -654,9 +654,18 @@ class Interpreter {
   template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
   void OpVector(const Decoder::VOpMVvArgs& args) {
     switch (args.opcode) {
+      case Decoder::VOpMVvOpcode::kVmaddvv:
+        return OpVectorvv<intrinsics::Vmaddvv<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, args.src2);
+      case Decoder::VOpMVvOpcode::kVnmsubvv:
+        return OpVectorvv<intrinsics::Vnmsubvv<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVmaccvv:
         return OpVectorvv<intrinsics::Vmaccvv<ElementType, vta>, ElementType, vlmul, vta>(
             args.dst, args.src1, args.src2);
+      case Decoder::VOpMVvOpcode::kVnmsacvv:
+        return OpVectorvv<intrinsics::Vnmsacvv<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, args.src2);
       default:
         Unimplemented();
     }
@@ -730,9 +739,18 @@ class Interpreter {
   template <typename ElementType, VectorRegisterGroupMultiplier vlmul, TailProcessing vta>
   void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
     switch (args.opcode) {
+      case Decoder::VOpMVxOpcode::kVmaddvx:
+        return OpVectorvx<intrinsics::Vmaddvx<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, arg2);
+      case Decoder::VOpMVxOpcode::kVnmsubvx:
+        return OpVectorvx<intrinsics::Vnmsubvx<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, arg2);
       case Decoder::VOpMVxOpcode::kVmaccvx:
         return OpVectorvx<intrinsics::Vmaccvx<ElementType, vta>, ElementType, vlmul, vta>(
             args.dst, args.src1, arg2);
+      case Decoder::VOpMVxOpcode::kVnmsacvx:
+        return OpVectorvx<intrinsics::Vnmsacvx<ElementType, vta>, ElementType, vlmul, vta>(
+            args.dst, args.src1, arg2);
       default:
         Unimplemented();
     }
@@ -926,12 +944,30 @@ class Interpreter {
             InactiveProcessing vma>
   void OpVector(const Decoder::VOpMVvArgs& args) {
     switch (args.opcode) {
+      case Decoder::VOpMVvOpcode::kVmaddvv:
+        return OpVectorvv<intrinsics::Vmaddvvm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
+      case Decoder::VOpMVvOpcode::kVnmsubvv:
+        return OpVectorvv<intrinsics::Vnmsubvvm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVmaccvv:
         return OpVectorvv<intrinsics::Vmaccvvm<ElementType, vta, vma>,
-                                               ElementType,
-                                               vlmul,
-                                               vta,
-                                               vma>(args.dst, args.src1, args.src2);
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
+      case Decoder::VOpMVvOpcode::kVnmsacvv:
+        return OpVectorvv<intrinsics::Vnmsacvvm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       default:
         Unimplemented();
     }
@@ -1023,12 +1059,30 @@ class Interpreter {
             InactiveProcessing vma>
   void OpVector(const Decoder::VOpMVxArgs& args, Register arg2) {
     switch (args.opcode) {
+      case Decoder::VOpMVxOpcode::kVmaddvx:
+        return OpVectorvx<intrinsics::Vmaddvxm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, arg2);
+      case Decoder::VOpMVxOpcode::kVnmsubvx:
+        return OpVectorvx<intrinsics::Vnmsubvxm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, arg2);
       case Decoder::VOpMVxOpcode::kVmaccvx:
         return OpVectorvx<intrinsics::Vmaccvxm<ElementType, vta, vma>,
                           ElementType,
                           vlmul,
                           vta,
                           vma>(args.dst, args.src1, arg2);
+      case Decoder::VOpMVxOpcode::kVnmsacvx:
+        return OpVectorvx<intrinsics::Vnmsacvxm<ElementType, vta, vma>,
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, arg2);
       default:
         Unimplemented();
     }
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 724c4892..f4fe0a56 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -1853,6 +1853,212 @@ TEST_F(Riscv64InterpreterTest, TestVmacc) {
        {0xbb11'11bd'1313'bf15, 0x6061'0c62'630e'6465},
        {0x05b1'0707'b309'09b5, 0xab01'01ad'0303'af05}});
 }
-}  // namespace
 
+TEST_F(Riscv64InterpreterTest, TestVnmsac) {
+  TestVectorInstruction(0xbd882457,  // vnmsac.vv v8, v16, v24, v0.t
+      {{85, 83, 77, 67, 49, 35, 13, 243, 205, 179, 141, 99, 53, 3, 205, 147},
+       {85, 19, 205, 131, 33, 227, 141, 51, 189, 115, 13, 163, 53, 195, 77, 211},
+       {85, 211, 77, 195, 17, 163, 13, 115, 173, 51, 141, 227, 53, 131, 205, 19},
+       {85, 147, 205, 3, 1, 99, 141, 179, 157, 243, 13, 35, 53, 67, 77, 83},
+       {85, 83, 77, 67, 241, 35, 13, 243, 141, 179, 141, 99, 53, 3, 205, 147},
+       {85, 19, 205, 131, 225, 227, 141, 51, 125, 115, 13, 163, 53, 195, 77, 211},
+       {85, 211, 77, 195, 209, 163, 13, 115, 109, 51, 141, 227, 53, 131, 205, 19},
+       {85, 147, 205, 3, 193, 99, 141, 179, 93, 243, 13, 35, 53, 67, 77, 83}},
+      {{0x5555, 0x3d4d, 0x0031, 0xad0d, 0x2bcd, 0x9c8d, 0xe435, 0x0bcd},
+       {0x1355, 0xfacd, 0xad21, 0x698d, 0xd7bd, 0x580d, 0x9f35, 0xc64d},
+       {0xcd55, 0xb44d, 0x5611, 0x220d, 0x7fad, 0x0f8d, 0x5635, 0x7ccd},
+       {0x8355, 0x69cd, 0xfb01, 0xd68d, 0x239d, 0xc30d, 0x0935, 0x2f4d},
+       {0x3555, 0x1b4d, 0x9bf1, 0x870d, 0xc38d, 0x728d, 0xb835, 0xddcd},
+       {0xe355, 0xc8cd, 0x38e1, 0x338d, 0x5f7d, 0x1e0d, 0x6335, 0x884d},
+       {0x8d55, 0x724d, 0xd1d1, 0xdc0d, 0xf76d, 0xc58d, 0x0a35, 0x2ecd},
+       {0x3355, 0x17cd, 0x66c1, 0x808d, 0x8b5d, 0x690d, 0xad35, 0xd14d}},
+      {{0x4d53'5555, 0x65bd'0031, 0x8068'2bcd, 0xa960'e435},
+       {0xc68f'1355, 0xcbe6'ad21, 0xe38f'd7bd, 0x1996'9f35},
+       {0x33c2'cd55, 0x2608'5611, 0x3aaf'7fad, 0x7dc4'5635},
+       {0x94ee'8355, 0x7421'fb01, 0x85c7'239d, 0xd5ea'0935},
+       {0xea12'3555, 0xb633'9bf1, 0xc4d6'c38d, 0x2207'b835},
+       {0x332d'e355, 0xec3d'38e1, 0xf7de'5f7d, 0x621d'6335},
+       {0x7041'8d55, 0x163e'd1d1, 0x1edd'f76d, 0x962b'0a35},
+       {0xa14d'3355, 0x3438'66c1, 0x39d5'8b5d, 0xbe30'ad35}},
+      {{0xe20d'2c41'4d53'5555, 0x4fdc'3c72'8068'2bcd},
+       {0xbead'4fa7'c68f'1355, 0x1e70'55d0'e38f'd7bd},
+       {0x7f35'5efe'33c2'cd55, 0xd0ec'5b1f'3aaf'7fad},
+       {0x23a5'5a44'94ee'8355, 0x6750'4c5d'85c7'239d},
+       {0xabfd'417a'ea12'3555, 0xe19c'298b'c4d6'c38d},
+       {0x183d'14a1'332d'e355, 0x3fcf'f2a9'f7de'5f7d},
+       {0x6864'd3b7'7041'8d55, 0x81eb'a7b8'1edd'f76d},
+       {0x9c74'7ebd'a14d'3355, 0xa7ef'48b6'39d5'8b5d}});
+  TestVectorInstruction(0xbd00e457,  // vnmsac.vx v8, x1, v16, v0.t
+      {{85, 171, 1, 87, 173, 3, 89, 175, 5, 91, 177, 7, 93, 179, 9, 95},
+       {181, 11, 97, 183, 13, 99, 185, 15, 101, 187, 17, 103, 189, 19, 105, 191},
+       {21, 107, 193, 23, 109, 195, 25, 111, 197, 27, 113, 199, 29, 115, 201, 31},
+       {117, 203, 33, 119, 205, 35, 121, 207, 37, 123, 209, 39, 125, 211, 41, 127},
+       {213, 43, 129, 215, 45, 131, 217, 47, 133, 219, 49, 135, 221, 51, 137, 223},
+       {53, 139, 225, 55, 141, 227, 57, 143, 229, 59, 145, 231, 61, 147, 233, 63},
+       {149, 235, 65, 151, 237, 67, 153, 239, 69, 155, 241, 71, 157, 243, 73, 159},
+       {245, 75, 161, 247, 77, 163, 249, 79, 165, 251, 81, 167, 253, 83, 169, 255}},
+      {{0xab55, 0x0201, 0x58ad, 0xaf59, 0x0605, 0x5cb1, 0xb35d, 0x0a09},
+       {0x60b5, 0xb761, 0x0e0d, 0x64b9, 0xbb65, 0x1211, 0x68bd, 0xbf69},
+       {0x1615, 0x6cc1, 0xc36d, 0x1a19, 0x70c5, 0xc771, 0x1e1d, 0x74c9},
+       {0xcb75, 0x2221, 0x78cd, 0xcf79, 0x2625, 0x7cd1, 0xd37d, 0x2a29},
+       {0x80d5, 0xd781, 0x2e2d, 0x84d9, 0xdb85, 0x3231, 0x88dd, 0xdf89},
+       {0x3635, 0x8ce1, 0xe38d, 0x3a39, 0x90e5, 0xe791, 0x3e3d, 0x94e9},
+       {0xeb95, 0x4241, 0x98ed, 0xef99, 0x4645, 0x9cf1, 0xf39d, 0x4a49},
+       {0xa0f5, 0xf7a1, 0x4e4d, 0xa4f9, 0xfba5, 0x5251, 0xa8fd, 0xffa9}},
+      {{0x5756'ab55, 0xaf59'58ad, 0x075c'0605, 0x5f5e'b35d},
+       {0xb761'60b5, 0x0f64'0e0d, 0x6766'bb65, 0xbf69'68bd},
+       {0x176c'1615, 0x6f6e'c36d, 0xc771'70c5, 0x1f74'1e1d},
+       {0x7776'cb75, 0xcf79'78cd, 0x277c'2625, 0x7f7e'd37d},
+       {0xd781'80d5, 0x2f84'2e2d, 0x8786'db85, 0xdf89'88dd},
+       {0x378c'3635, 0x8f8e'e38d, 0xe791'90e5, 0x3f94'3e3d},
+       {0x9796'eb95, 0xef99'98ed, 0x479c'4645, 0x9f9e'f39d},
+       {0xf7a1'a0f5, 0x4fa4'4e4d, 0xa7a6'fba5, 0xffa9'a8fd}},
+      {{0xaf59'58ad'5756'ab55, 0x0a09'5e08'075c'0605},
+       {0x64b9'6362'b761'60b5, 0xbf69'68bd'6766'bb65},
+       {0x1a19'6e18'176c'1615, 0x74c9'7372'c771'70c5},
+       {0xcf79'78cd'7776'cb75, 0x2a29'7e28'277c'2625},
+       {0x84d9'8382'd781'80d5, 0xdf89'88dd'8786'db85},
+       {0x3a39'8e38'378c'3635, 0x94e9'9392'e791'90e5},
+       {0xef99'98ed'9796'eb95, 0x4a49'9e48'479c'4645},
+       {0xa4f9'a3a2'f7a1'a0f5, 0xffa9'a8fd'a7a6'fba5}});
+}
+
+TEST_F(Riscv64InterpreterTest, TestVmadd) {
+  TestVectorInstruction(0xa5882457,  // vmadd.vv v8, v16, v24, v0.t
+      {{0, 87, 174, 5, 93, 179, 10, 97, 185, 15, 102, 189, 20, 107, 194, 25},
+       {112, 199, 30, 117, 205, 35, 122, 209, 41, 127, 214, 45, 132, 219, 50, 137},
+       {224, 55, 142, 229, 61, 147, 234, 65, 153, 239, 70, 157, 244, 75, 162, 249},
+       {80, 167, 254, 85, 173, 3, 90, 177, 9, 95, 182, 13, 100, 187, 18, 105},
+       {192, 23, 110, 197, 29, 115, 202, 33, 121, 207, 38, 125, 212, 43, 130, 217},
+       {48, 135, 222, 53, 141, 227, 58, 145, 233, 63, 150, 237, 68, 155, 242, 73},
+       {160, 247, 78, 165, 253, 83, 170, 1, 89, 175, 6, 93, 180, 11, 98, 185},
+       {16, 103, 190, 21, 109, 195, 26, 113, 201, 31, 118, 205, 36, 123, 210, 41}},
+      {{0x5700, 0xafae, 0x085d, 0x610a, 0xb9b9, 0x1266, 0x6b14, 0xc3c2},
+       {0x1c70, 0x751e, 0xcdcd, 0x267a, 0x7f29, 0xd7d6, 0x3084, 0x8932},
+       {0xe1e0, 0x3a8e, 0x933d, 0xebea, 0x4499, 0x9d46, 0xf5f4, 0x4ea2},
+       {0xa750, 0xfffe, 0x58ad, 0xb15a, 0x0a09, 0x62b6, 0xbb64, 0x1412},
+       {0x6cc0, 0xc56e, 0x1e1d, 0x76ca, 0xcf79, 0x2826, 0x80d4, 0xd982},
+       {0x3230, 0x8ade, 0xe38d, 0x3c3a, 0x94e9, 0xed96, 0x4644, 0x9ef2},
+       {0xf7a0, 0x504e, 0xa8fd, 0x01aa, 0x5a59, 0xb306, 0x0bb4, 0x6462},
+       {0xbd10, 0x15be, 0x6e6d, 0xc71a, 0x1fc9, 0x7876, 0xd124, 0x29d2}},
+      {{0x0503'5700, 0x610a'085d, 0xbd10'b9b9, 0x1917'6b14},
+       {0x751e'1c70, 0xd124'cdcd, 0x2d2b'7f29, 0x8932'3084},
+       {0xe538'e1e0, 0x413f'933d, 0x9d46'4499, 0xf94c'f5f4},
+       {0x5553'a750, 0xb15a'58ad, 0x0d61'0a09, 0x6967'bb64},
+       {0xc56e'6cc0, 0x2175'1e1d, 0x7d7b'cf79, 0xd982'80d4},
+       {0x3589'3230, 0x918f'e38d, 0xed96'94e9, 0x499d'4644},
+       {0xa5a3'f7a0, 0x01aa'a8fd, 0x5db1'5a59, 0xb9b8'0bb4},
+       {0x15be'bd10, 0x71c5'6e6d, 0xcdcc'1fc9, 0x29d2'd124}},
+      {{0x610a'085d'0503'5700, 0xc3c2'15be'bd10'b9b9},
+       {0x267a'2322'751e'1c70, 0x8932'3084'2d2b'7f29},
+       {0xebea'3de7'e538'e1e0, 0x4ea2'4b49'9d46'4499},
+       {0xb15a'58ad'5553'a750, 0x1412'660f'0d61'0a09},
+       {0x76ca'7372'c56e'6cc0, 0xd982'80d4'7d7b'cf79},
+       {0x3c3a'8e38'3589'3230, 0x9ef2'9b99'ed96'94e9},
+       {0x01aa'a8fd'a5a3'f7a0, 0x6462'b65f'5db1'5a59},
+       {0xc71a'c3c3'15be'bd10, 0x29d2'd124'cdcc'1fc9}});
+  TestVectorInstruction(0xa500e457, // vmadd.vx v8, x1, v16, v0.t
+      {{114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129},
+       {130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145},
+       {146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161},
+       {162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177},
+       {178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193},
+       {194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209},
+       {210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225},
+       {226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241}},
+      {{0x1d72, 0x1f74, 0x2176, 0x2378, 0x257a, 0x277c, 0x297e, 0x2b80},
+       {0x2d82, 0x2f84, 0x3186, 0x3388, 0x358a, 0x378c, 0x398e, 0x3b90},
+       {0x3d92, 0x3f94, 0x4196, 0x4398, 0x459a, 0x479c, 0x499e, 0x4ba0},
+       {0x4da2, 0x4fa4, 0x51a6, 0x53a8, 0x55aa, 0x57ac, 0x59ae, 0x5bb0},
+       {0x5db2, 0x5fb4, 0x61b6, 0x63b8, 0x65ba, 0x67bc, 0x69be, 0x6bc0},
+       {0x6dc2, 0x6fc4, 0x71c6, 0x73c8, 0x75ca, 0x77cc, 0x79ce, 0x7bd0},
+       {0x7dd2, 0x7fd4, 0x81d6, 0x83d8, 0x85da, 0x87dc, 0x89de, 0x8be0},
+       {0x8de2, 0x8fe4, 0x91e6, 0x93e8, 0x95ea, 0x97ec, 0x99ee, 0x9bf0}},
+      {{0x74c9'1d72, 0x78cd'2176, 0x7cd1'257a, 0x80d5'297e},
+       {0x84d9'2d82, 0x88dd'3186, 0x8ce1'358a, 0x90e5'398e},
+       {0x94e9'3d92, 0x98ed'4196, 0x9cf1'459a, 0xa0f5'499e},
+       {0xa4f9'4da2, 0xa8fd'51a6, 0xad01'55aa, 0xb105'59ae},
+       {0xb509'5db2, 0xb90d'61b6, 0xbd11'65ba, 0xc115'69be},
+       {0xc519'6dc2, 0xc91d'71c6, 0xcd21'75ca, 0xd125'79ce},
+       {0xd529'7dd2, 0xd92d'81d6, 0xdd31'85da, 0xe135'89de},
+       {0xe539'8de2, 0xe93d'91e6, 0xed41'95ea, 0xf145'99ee}},
+      {{0x2377'cc20'74c9'1d72, 0x2b7f'd428'7cd1'257a},
+       {0x3387'dc30'84d9'2d82, 0x3b8f'e438'8ce1'358a},
+       {0x4397'ec40'94e9'3d92, 0x4b9f'f448'9cf1'459a},
+       {0x53a7'fc50'a4f9'4da2, 0x5bb0'0458'ad01'55aa},
+       {0x63b8'0c60'b509'5db2, 0x6bc0'1468'bd11'65ba},
+       {0x73c8'1c70'c519'6dc2, 0x7bd0'2478'cd21'75ca},
+       {0x83d8'2c80'd529'7dd2, 0x8be0'3488'dd31'85da},
+       {0x93e8'3c90'e539'8de2, 0x9bf0'4498'ed41'95ea}});
+}
+
+TEST_F(Riscv64InterpreterTest, TestVnmsub) {
+  TestVectorInstruction(0xad882457,  // vnmsub.vv v8, v16, v24, v0.t
+      {{0, 173, 90, 7, 181, 97, 14, 187, 105, 21, 194, 111, 28, 201, 118, 35},
+       {208, 125, 42, 215, 133, 49, 222, 139, 57, 229, 146, 63, 236, 153, 70, 243},
+       {160, 77, 250, 167, 85, 1, 174, 91, 9, 181, 98, 15, 188, 105, 22, 195},
+       {112, 29, 202, 119, 37, 209, 126, 43, 217, 133, 50, 223, 140, 57, 230, 147},
+       {64, 237, 154, 71, 245, 161, 78, 251, 169, 85, 2, 175, 92, 9, 182, 99},
+       {16, 189, 106, 23, 197, 113, 30, 203, 121, 37, 210, 127, 44, 217, 134, 51},
+       {224, 141, 58, 231, 149, 65, 238, 155, 73, 245, 162, 79, 252, 169, 86, 3},
+       {176, 93, 10, 183, 101, 17, 190, 107, 25, 197, 114, 31, 204, 121, 38, 211}},
+      {{0xad00, 0x5c5a, 0x0bb5, 0xbb0e, 0x6a69, 0x19c2, 0xc91c, 0x7876},
+       {0x27d0, 0xd72a, 0x8685, 0x35de, 0xe539, 0x9492, 0x43ec, 0xf346},
+       {0xa2a0, 0x51fa, 0x0155, 0xb0ae, 0x6009, 0x0f62, 0xbebc, 0x6e16},
+       {0x1d70, 0xccca, 0x7c25, 0x2b7e, 0xdad9, 0x8a32, 0x398c, 0xe8e6},
+       {0x9840, 0x479a, 0xf6f5, 0xa64e, 0x55a9, 0x0502, 0xb45c, 0x63b6},
+       {0x1310, 0xc26a, 0x71c5, 0x211e, 0xd079, 0x7fd2, 0x2f2c, 0xde86},
+       {0x8de0, 0x3d3a, 0xec95, 0x9bee, 0x4b49, 0xfaa2, 0xa9fc, 0x5956},
+       {0x08b0, 0xb80a, 0x6765, 0x16be, 0xc619, 0x7572, 0x24cc, 0xd426}},
+      {{0x0704'ad00, 0xbb0e'0bb5, 0x6f17'6a69, 0x2320'c91c},
+       {0xd72a'27d0, 0x8b33'8685, 0x3f3c'e539, 0xf346'43ec},
+       {0xa74f'a2a0, 0x5b59'0155, 0x0f62'6009, 0xc36b'bebc},
+       {0x7775'1d70, 0x2b7e'7c25, 0xdf87'dad9, 0x9391'398c},
+       {0x479a'9840, 0xfba3'f6f5, 0xafad'55a9, 0x63b6'b45c},
+       {0x17c0'1310, 0xcbc9'71c5, 0x7fd2'd079, 0x33dc'2f2c},
+       {0xe7e5'8de0, 0x9bee'ec95, 0x4ff8'4b49, 0x0401'a9fc},
+       {0xb80b'08b0, 0x6c14'6765, 0x201d'c619, 0xd427'24cc}},
+      {{0xbb0e'0bb5'0704'ad00, 0x7876'1e71'6f17'6a69},
+       {0x35de'312f'd72a'27d0, 0xf346'43ec'3f3c'e539},
+       {0xb0ae'56aa'a74f'a2a0, 0x6e16'6967'0f62'6009},
+       {0x2b7e'7c25'7775'1d70, 0xe8e6'8ee1'df87'dad9},
+       {0xa64e'a1a0'479a'9840, 0x63b6'b45c'afad'55a9},
+       {0x211e'c71b'17c0'1310, 0xde86'd9d7'7fd2'd079},
+       {0x9bee'ec95'e7e5'8de0, 0x5956'ff52'4ff8'4b49},
+       {0x16bf'1210'b80b'08b0, 0xd427'24cd'201d'c619}});
+  TestVectorInstruction(0xad00e457, // vnmsub.vx v8, x1, v16, v0.t
+      {{142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157},
+       {158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173},
+       {174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189},
+       {190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205},
+       {206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221},
+       {222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237},
+       {238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253},
+       {254, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}},
+      {{0xe48e, 0xe690, 0xe892, 0xea94, 0xec96, 0xee98, 0xf09a, 0xf29c},
+       {0xf49e, 0xf6a0, 0xf8a2, 0xfaa4, 0xfca6, 0xfea8, 0x00aa, 0x02ac},
+       {0x04ae, 0x06b0, 0x08b2, 0x0ab4, 0x0cb6, 0x0eb8, 0x10ba, 0x12bc},
+       {0x14be, 0x16c0, 0x18c2, 0x1ac4, 0x1cc6, 0x1ec8, 0x20ca, 0x22cc},
+       {0x24ce, 0x26d0, 0x28d2, 0x2ad4, 0x2cd6, 0x2ed8, 0x30da, 0x32dc},
+       {0x34de, 0x36e0, 0x38e2, 0x3ae4, 0x3ce6, 0x3ee8, 0x40ea, 0x42ec},
+       {0x44ee, 0x46f0, 0x48f2, 0x4af4, 0x4cf6, 0x4ef8, 0x50fa, 0x52fc},
+       {0x54fe, 0x5700, 0x5902, 0x5b04, 0x5d06, 0x5f08, 0x610a, 0x630c}},
+      {{0x913a'e48e, 0x953e'e892, 0x9942'ec96, 0x9d46'f09a},
+       {0xa14a'f49e, 0xa54e'f8a2, 0xa952'fca6, 0xad57'00aa},
+       {0xb15b'04ae, 0xb55f'08b2, 0xb963'0cb6, 0xbd67'10ba},
+       {0xc16b'14be, 0xc56f'18c2, 0xc973'1cc6, 0xcd77'20ca},
+       {0xd17b'24ce, 0xd57f'28d2, 0xd983'2cd6, 0xdd87'30da},
+       {0xe18b'34de, 0xe58f'38e2, 0xe993'3ce6, 0xed97'40ea},
+       {0xf19b'44ee, 0xf59f'48f2, 0xf9a3'4cf6, 0xfda7'50fa},
+       {0x01ab'54fe, 0x05af'5902, 0x09b3'5d06, 0x0db7'610a}},
+      {{0xea94'3de7'913a'e48e, 0xf29c'45ef'9942'ec96},
+       {0xfaa4'4df7'a14a'f49e, 0x02ac'55ff'a952'fca6},
+       {0x0ab4'5e07'b15b'04ae, 0x12bc'660f'b963'0cb6},
+       {0x1ac4'6e17'c16b'14be, 0x22cc'761f'c973'1cc6},
+       {0x2ad4'7e27'd17b'24ce, 0x32dc'862f'd983'2cd6},
+       {0x3ae4'8e37'e18b'34de, 0x42ec'963f'e993'3ce6},
+       {0x4af4'9e47'f19b'44ee, 0x52fc'a64f'f9a3'4cf6},
+       {0x5b04'ae58'01ab'54fe, 0x630c'b660'09b3'5d06}});
+}
+}  // namespace
 }  // namespace berberis
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index bb3b19d9..d034e33b 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -250,9 +250,21 @@ DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(sll, auto [arg1, arg2] = std::tuple{args...};
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(sll, auto [arg1, arg2] = std::tuple{args...};
                                    (arg1 << mask_bits(arg2)))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(macc, auto [arg1, arg2] = std::tuple{args...};
-                                   ((arg1 * arg2) + vd));
+                                   ((arg2 * arg1) + vd))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(macc, auto [arg1, arg2] = std::tuple{args...};
-                                   ((arg1 * arg2) + vd));
+                                   ((arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsac, auto [arg1, arg2] = std::tuple{args...};
+                                   (-(arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsac, auto [arg1, arg2] = std::tuple{args...};
+                                   (-(arg2 * arg1) + vd))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(madd, auto [arg1, arg2] = std::tuple{args...};
+                                   ((arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(madd, auto [arg1, arg2] = std::tuple{args...};
+                                   ((arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(nmsub, auto [arg1, arg2] = std::tuple{args...};
+                                   (-(arg2 * vd) + arg1))
+DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(nmsub, auto [arg1, arg2] = std::tuple{args...};
+                                   (-(arg2 * vd) + arg1))
 #undef DEFINE_ARITHMETIC_INTRINSIC
 #undef DEFINE_ARITHMETIC_PARAMETERS_OR_ARGUMENTS
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-12-15 00:37:31 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-12-15 00:37:31 +0000
commit	f940c1f5262a9c153421c7e34384a6dd3f686d95 (patch)
tree	a91a3b3bcfd79f16aaad77c86a21d799c005d1d0
parent	2e3405345ffa06e045abd38aede28b36ad572647 (diff)
parent	87ada30fbde026edeb4a9300daad285f188e6b69 (diff)
download	binary_translation-android14-qpr2-release.tar.gz