diff options
author | Vladimir Marko <vmarko@google.com> | 2024-01-16 14:33:30 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2024-01-16 14:33:30 +0000 |
commit | 6170212a4b9d2932efe8ae9b1027b4a9acb2123d (patch) | |
tree | 1aec788f360fc33e419c5f60cee4beb029fab791 | |
parent | 791968a661b60e6ef19fb8ebcf07bb93643b193b (diff) | |
parent | b8b45dde53a935497f6e2e2bc059584e3ab3432a (diff) | |
download | art-6170212a4b9d2932efe8ae9b1027b4a9acb2123d.tar.gz |
arm64: Simplify SystemArrayCopy intrinsics. am: ae805cf3f9 am: 89305a3e40 am: b8b45dde53
Original change: https://android-review.googlesource.com/c/platform/art/+/2909997
Change-Id: I7ba5c96e0fcf27e1b0054d04771643ccc7e4e5c6
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 721 |
1 files changed, 274 insertions, 447 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 3183dac348..27a41762e2 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -114,6 +114,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { Register tmp_reg = WRegisterFrom(tmp_); __ Bind(GetEntryLabel()); + // The source range and destination pointer were initialized before entering the slow-path. vixl::aarch64::Label slow_copy_loop; __ Bind(&slow_copy_loop); __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); @@ -2731,14 +2732,12 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { // so if we choose to jump to the slow path we will end up in the native implementation. static constexpr int32_t kSystemArrayCopyCharThreshold = 192; -static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, - uint32_t at, - HInstruction* input) { +static Location LocationForSystemArrayCopyInput(HInstruction* input) { HIntConstant* const_input = input->AsIntConstantOrNull(); - if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { - locations->SetInAt(at, Location::RequiresRegister()); + if (const_input != nullptr && vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { + return Location::ConstantLocation(const_input); } else { - locations->SetInAt(at, Location::RegisterOrConstant(input)); + return Location::RequiresRegister(); } } @@ -2771,10 +2770,10 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length). locations->SetInAt(0, Location::RequiresRegister()); - SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); + locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1))); locations->SetInAt(2, Location::RequiresRegister()); - SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); - SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); + locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3))); + locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4))); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -2782,92 +2781,97 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { } static void CheckSystemArrayCopyPosition(MacroAssembler* masm, - const Location& pos, - const Register& input, - const Location& length, + Register array, + Location pos, + Location length, SlowPathCodeARM64* slow_path, - const Register& temp, - bool length_is_input_length = false) { + Register temp, + bool length_is_array_length, + bool position_sign_checked) { const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); if (pos.IsConstant()) { int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); if (pos_const == 0) { - if (!length_is_input_length) { - // Check that length(input) >= length. - __ Ldr(temp, MemOperand(input, length_offset)); + if (!length_is_array_length) { + // Check that length(array) >= length. + __ Ldr(temp, MemOperand(array, length_offset)); __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); __ B(slow_path->GetEntryLabel(), lt); } } else { - // Check that length(input) >= pos. - __ Ldr(temp, MemOperand(input, length_offset)); - __ Subs(temp, temp, pos_const); - __ B(slow_path->GetEntryLabel(), lt); + // Calculate length(array) - pos. + // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow + // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path. + __ Ldr(temp, MemOperand(array, length_offset)); + __ Sub(temp, temp, pos_const); - // Check that (length(input) - pos) >= length. + // Check that (length(array) - pos) >= length. __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); __ B(slow_path->GetEntryLabel(), lt); } - } else if (length_is_input_length) { + } else if (length_is_array_length) { // The only way the copy can succeed is if pos is zero. __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel()); } else { // Check that pos >= 0. Register pos_reg = WRegisterFrom(pos); - __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel()); + if (!position_sign_checked) { + __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel()); + } - // Check that pos <= length(input) && (length(input) - pos) >= length. - __ Ldr(temp, MemOperand(input, length_offset)); - __ Subs(temp, temp, pos_reg); - // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt). - __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge); + // Calculate length(array) - pos. + // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow + // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path. + __ Ldr(temp, MemOperand(array, length_offset)); + __ Sub(temp, temp, pos_reg); + + // Check that (length(array) - pos) >= length. + __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); __ B(slow_path->GetEntryLabel(), lt); } } +static void GenArrayAddress(MacroAssembler* masm, + Register dest, + Register base, + Location pos, + DataType::Type type, + int32_t data_offset) { + if (pos.IsConstant()) { + int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); + __ Add(dest, base, DataType::Size(type) * constant + data_offset); + } else { + if (data_offset != 0) { + __ Add(dest, base, data_offset); + base = dest; + } + __ Add(dest, base, Operand(XRegisterFrom(pos), LSL, DataType::SizeShift(type))); + } +} + // Compute base source address, base destination address, and end // source address for System.arraycopy* intrinsics in `src_base`, // `dst_base` and `src_end` respectively. static void GenSystemArrayCopyAddresses(MacroAssembler* masm, DataType::Type type, - const Register& src, - const Location& src_pos, - const Register& dst, - const Location& dst_pos, - const Location& copy_length, - const Register& src_base, - const Register& dst_base, - const Register& src_end) { + Register src, + Location src_pos, + Register dst, + Location dst_pos, + Location copy_length, + Register src_base, + Register dst_base, + Register src_end) { // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16) << "Unexpected element type: " << type; const int32_t element_size = DataType::Size(type); - const int32_t element_size_shift = DataType::SizeShift(type); const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); - if (src_pos.IsConstant()) { - int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_base, src, element_size * constant + data_offset); - } else { - __ Add(src_base, src, data_offset); - __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift)); - } - - if (dst_pos.IsConstant()) { - int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); - __ Add(dst_base, dst, element_size * constant + data_offset); - } else { - __ Add(dst_base, dst, data_offset); - __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); - } - + GenArrayAddress(masm, src_base, src, src_pos, type, data_offset); + GenArrayAddress(masm, dst_base, dst, dst_pos, type, data_offset); if (src_end.IsValid()) { - if (copy_length.IsConstant()) { - int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_end, src_base, element_size * constant); - } else { - __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); - } + GenArrayAddress(masm, src_end, src_base, copy_length, type, /*data_offset=*/ 0); } } @@ -2913,20 +2917,22 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { Register src_stop_addr = WRegisterFrom(locations->GetTemp(2)); CheckSystemArrayCopyPosition(masm, - src_pos, src, + src_pos, length, slow_path, src_curr_addr, - false); + /*length_is_array_length=*/ false, + /*position_sign_checked=*/ false); CheckSystemArrayCopyPosition(masm, - dst_pos, dst, + dst_pos, length, slow_path, src_curr_addr, - false); + /*length_is_array_length=*/ false, + /*position_sign_checked=*/ false); src_curr_addr = src_curr_addr.X(); dst_curr_addr = dst_curr_addr.X(); @@ -3043,9 +3049,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { // We can choose to use the native implementation there for longer copy lengths. static constexpr int32_t kSystemArrayCopyThreshold = 128; -// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers. -// We want to use two temporary registers in order to reduce the register pressure in arm64. -// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. @@ -3053,67 +3056,27 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { return; } - // Check to see if we have known failures that will cause us to have to bail out - // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); - - // The positions must be non-negative. - if ((src_pos != nullptr && src_pos->GetValue() < 0) || - (dest_pos != nullptr && dest_pos->GetValue() < 0)) { - // We will have to fail anyways. - return; - } - - // The length must be >= 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); - if (length != nullptr) { - int32_t len = length->GetValue(); - if (len < 0 || len >= kSystemArrayCopyThreshold) { - // Just call as normal. - return; - } - } - - SystemArrayCopyOptimizations optimizations(invoke); - - if (optimizations.GetDestinationIsSource()) { - if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { - // We only support backward copying if source and destination are the same. - return; + constexpr size_t kInitialNumTemps = 2u; // We need at least two temps. + LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary( + invoke, kSystemArrayCopyThreshold, kInitialNumTemps); + if (locations != nullptr) { + locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1))); + locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3))); + locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4))); + if (codegen_->EmitBakerReadBarrier()) { + // Temporary register IP0, obtained from the VIXL scratch register + // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 + // (because that register is clobbered by ReadBarrierMarkRegX + // entry points). It cannot be used in calls to + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier + // either. For these reasons, get a third extra temporary register + // from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } else { + // Cases other than Baker read barriers: the third temporary will + // be acquired from the VIXL scratch register pool. } } - - if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { - // We currently don't intrinsify primitive copying. - return; - } - - ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); - // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). - locations->SetInAt(0, Location::RequiresRegister()); - SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); - locations->SetInAt(2, Location::RequiresRegister()); - SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); - SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - if (codegen_->EmitBakerReadBarrier()) { - // Temporary register IP0, obtained from the VIXL scratch register - // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 - // (because that register is clobbered by ReadBarrierMarkRegX - // entry points). It cannot be used in calls to - // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier - // either. For these reasons, get a third extra temporary register - // from the register allocator. - locations->AddTemp(Location::RequiresRegister()); - } else { - // Cases other than Baker read barriers: the third temporary will - // be acquired from the VIXL scratch register pool. - } } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { @@ -3147,38 +3110,37 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { vixl::aarch64::Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - // If source and destination are the same, we go to slow path if we need to do - // forward copying. - if (src_pos.IsConstant()) { - int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - if (dest_pos.IsConstant()) { - int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - if (optimizations.GetDestinationIsSource()) { - // Checked when building locations. - DCHECK_GE(src_pos_constant, dest_pos_constant); - } else if (src_pos_constant < dest_pos_constant) { - __ Cmp(src, dest); - __ B(intrinsic_slow_path->GetEntryLabel(), eq); + // If source and destination are the same, we go to slow path if we need to do forward copying. + // We do not need to do this check if the source and destination positions are the same. + if (!optimizations.GetSourcePositionIsDestinationPosition()) { + if (src_pos.IsConstant()) { + int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ Cmp(src, dest); + __ B(intrinsic_slow_path->GetEntryLabel(), eq); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); + __ B(&conditions_on_positions_validated, ne); + } + __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); + __ B(intrinsic_slow_path->GetEntryLabel(), gt); } - // Checked when building locations. - DCHECK(!optimizations.GetDestinationIsSource() || - (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); __ B(&conditions_on_positions_validated, ne); } - __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); - __ B(intrinsic_slow_path->GetEntryLabel(), gt); + __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), + OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); + __ B(intrinsic_slow_path->GetEntryLabel(), lt); } - } else { - if (!optimizations.GetDestinationIsSource()) { - __ Cmp(src, dest); - __ B(&conditions_on_positions_validated, ne); - } - __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), - OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); - __ B(intrinsic_slow_path->GetEntryLabel(), lt); } __ Bind(&conditions_on_positions_validated); @@ -3194,9 +3156,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // We have already checked in the LocationsBuilder for the constant case. - if (!length.IsConstant() && - !optimizations.GetCountIsSourceLength() && - !optimizations.GetCountIsDestinationLength()) { + if (!length.IsConstant()) { // Merge the following two comparisons into one: // If the length is negative, bail out (delegate to libcore's native implementation). // If the length >= 128 then (currently) prefer native implementation. @@ -3205,252 +3165,155 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // Validity checks: source. CheckSystemArrayCopyPosition(masm, - src_pos, src, + src_pos, length, intrinsic_slow_path, temp1, - optimizations.GetCountIsSourceLength()); + optimizations.GetCountIsSourceLength(), + /*position_sign_checked=*/ false); // Validity checks: dest. + bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition(); CheckSystemArrayCopyPosition(masm, - dest_pos, dest, + dest_pos, length, intrinsic_slow_path, temp1, - optimizations.GetCountIsDestinationLength()); - { - // We use a block to end the scratch scope before the write barrier, thus - // freeing the temporary registers so they can be used in `MarkGCCard`. - UseScratchRegisterScope temps(masm); - Location temp3_loc; // Used only for Baker read barrier. - Register temp3; + optimizations.GetCountIsDestinationLength(), + dest_position_sign_checked); + + auto check_non_primitive_array_class = [&](Register klass, Register temp) { + // No read barrier is needed for reading a chain of constant references for comparing + // with null, or for reading a constant primitive value, see `ReadBarrierOption`. + // /* HeapReference<Class> */ temp = klass->component_type_ + __ Ldr(temp, HeapOperand(klass, component_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp); + // Check that the component type is not null. + __ Cbz(temp, intrinsic_slow_path->GetEntryLabel()); + // Check that the component type is not a primitive. + // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_); + __ Ldr(temp, HeapOperand(temp, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp, intrinsic_slow_path->GetEntryLabel()); + }; + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + if (codegen_->EmitBakerReadBarrier()) { - temp3_loc = locations->GetTemp(2); - temp3 = WRegisterFrom(temp3_loc); + Location temp3_loc = locations->GetTemp(2); + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + dest.W(), + class_offset, + temp3_loc, + /* needs_null_check= */ false, + /* use_load_acquire= */ false); + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + src.W(), + class_offset, + temp3_loc, + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { - temp3 = temps.AcquireW(); + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); } - if (!optimizations.GetDoesNotNeedTypeCheck()) { - // Check whether all elements of the source array are assignable to the component - // type of the destination array. We do two checks: the classes are the same, - // or the destination is Object[]. If none of these checks succeed, we go to the - // slow path. - - if (codegen_->EmitBakerReadBarrier()) { - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = src->klass_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp1_loc, - src.W(), - class_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp1_loc, - temp1, - component_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); - // If heap poisoning is enabled, `temp1` has been unpoisoned - // by the previous call to GenerateFieldLoadWithBakerReadBarrier. - // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); - __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); - } - - // /* HeapReference<Class> */ temp1 = dest->klass_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp1_loc, - dest.W(), - class_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // - // Register `temp1` is not trashed by the read barrier emitted - // by GenerateFieldLoadWithBakerReadBarrier below, as that - // method produces a call to a ReadBarrierMarkRegX entry point, - // which saves all potentially live registers, including - // temporaries such a `temp1`. - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp2_loc, - temp1, - component_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); - // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the previous call to GenerateFieldLoadWithBakerReadBarrier. - // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); - __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); - } - - // For the same reason given earlier, `temp1` is not trashed by the - // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. - // /* HeapReference<Class> */ temp2 = src->klass_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp2_loc, - src.W(), - class_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - // Note: if heap poisoning is on, we are comparing two unpoisoned references here. - __ Cmp(temp1, temp2); - - if (optimizations.GetDestinationIsTypedObjectArray()) { - vixl::aarch64::Label do_copy; - __ B(&do_copy, eq); - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp1_loc, - temp1, - component_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - // We do not need to emit a read barrier for the following - // heap reference load, as `temp1` is only used in a - // comparison with null below, and this reference is not - // kept afterwards. - __ Ldr(temp1, HeapOperand(temp1, super_offset)); - __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ B(intrinsic_slow_path->GetEntryLabel(), ne); - } - } else { - // Non read barrier code. - - // /* HeapReference<Class> */ temp1 = dest->klass_ - __ Ldr(temp1, MemOperand(dest, class_offset)); - // /* HeapReference<Class> */ temp2 = src->klass_ - __ Ldr(temp2, MemOperand(src, class_offset)); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); - } - - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ Ldr(temp3, HeapOperand(temp2, component_offset)); - __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); - } - - __ Cmp(temp1, temp2); - - if (optimizations.GetDestinationIsTypedObjectArray()) { - vixl::aarch64::Label do_copy; - __ B(&do_copy, eq); - if (!did_unpoison) { - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ Ldr(temp1, HeapOperand(temp1, component_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ Ldr(temp1, HeapOperand(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ B(intrinsic_slow_path->GetEntryLabel(), ne); - } - } - } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + __ Cmp(temp1, temp2); + if (optimizations.GetDestinationIsTypedObjectArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + vixl::aarch64::Label do_copy; + // For class match, we can skip the source type check regardless of the optimization flag. + __ B(&do_copy, eq); + // No read barrier is needed for reading a chain of constant references + // for comparing with null, see `ReadBarrierOption`. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, HeapOperand(temp1, component_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); // Bail out if the source is not a non primitive array. - if (codegen_->EmitBakerReadBarrier()) { - // /* HeapReference<Class> */ temp1 = src->klass_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp1_loc, - src.W(), - class_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, - temp2_loc, - temp1, - component_offset, - temp3_loc, - /* needs_null_check= */ false, - /* use_load_acquire= */ false); - __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); - // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the previous call to GenerateFieldLoadWithBakerReadBarrier. - } else { - // /* HeapReference<Class> */ temp1 = src->klass_ - __ Ldr(temp1, HeapOperand(src.W(), class_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - __ Ldr(temp2, HeapOperand(temp1, component_offset)); - __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + check_non_primitive_array_class(temp2, temp2); + } + __ Bind(&do_copy); + } else { + DCHECK(!optimizations.GetDestinationIsTypedObjectArray()); + // For class match, we can skip the array type check completely if at least one of source + // and destination is known to be a non primitive array, otherwise one check is enough. + __ B(intrinsic_slow_path->GetEntryLabel(), ne); + if (!optimizations.GetDestinationIsNonPrimitiveArray() && + !optimizations.GetSourceIsNonPrimitiveArray()) { + check_non_primitive_array_class(temp2, temp2); } - // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); - __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + // No read barrier is needed for reading a chain of constant references for comparing + // with null, or for reading a constant primitive value, see `ReadBarrierOption`. + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + check_non_primitive_array_class(temp2, temp2); + } + + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. + } else { + vixl::aarch64::Label skip_copy_and_write_barrier; + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ Cbz(WRegisterFrom(length), &skip_copy_and_write_barrier); } - if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { - // Null constant length: not need to emit the loop code at all. - } else { + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard`. + UseScratchRegisterScope temps(masm); + bool emit_rb = codegen_->EmitBakerReadBarrier(); + Register temp3; + Register tmp; + if (emit_rb) { + temp3 = WRegisterFrom(locations->GetTemp(2)); + // Make sure `tmp` is not IP0, as it is clobbered by ReadBarrierMarkRegX entry points + // in ReadBarrierSystemArrayCopySlowPathARM64. Explicitly allocate the register IP1. + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip1); + tmp = ip1.W(); + } else { + temp3 = temps.AcquireW(); + tmp = temps.AcquireW(); + } + Register src_curr_addr = temp1.X(); Register dst_curr_addr = temp2.X(); Register src_stop_addr = temp3.X(); - vixl::aarch64::Label done; const DataType::Type type = DataType::Type::kReference; const int32_t element_size = DataType::Size(type); - if (length.IsRegister()) { - // Don't enter the copy loop if the length is null. - __ Cbz(WRegisterFrom(length), &done); - } - - if (codegen_->EmitBakerReadBarrier()) { + SlowPathCodeARM64* read_barrier_slow_path = nullptr; + if (emit_rb) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -3471,21 +3334,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // } while (src_ptr != end_ptr) // } - // Make sure `tmp` is not IP0, as it is clobbered by - // ReadBarrierMarkRegX entry points in - // ReadBarrierSystemArrayCopySlowPathARM64. - DCHECK(temps.IsAvailable(ip0)); - temps.Exclude(ip0); - Register tmp = temps.AcquireW(); - DCHECK_NE(LocationFrom(tmp).reg(), IP0); - // Put IP0 back in the pool so that VIXL has at least one - // scratch register available to emit macro-instructions (note - // that IP1 is already used for `tmp`). Indeed some - // macro-instructions used in GenSystemArrayCopyAddresses - // (invoked hereunder) may require a scratch register (for - // instance to emit a load with a large constant offset). - temps.Include(ip0); - // /* int32_t */ monitor = src->monitor_ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); // /* LockWord */ lock_word = LockWord(monitor) @@ -3499,78 +3347,57 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // on `tmp`. __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); - // Compute base source address, base destination address, and end - // source address for System.arraycopy* intrinsics in `src_base`, - // `dst_base` and `src_end` respectively. - // Note that `src_curr_addr` is computed from from `src` (and - // `src_pos`) here, and thus honors the artificial dependency - // of `src` on `tmp`. - GenSystemArrayCopyAddresses(masm, - type, - src, - src_pos, - dest, - dest_pos, - length, - src_curr_addr, - dst_curr_addr, - src_stop_addr); - // Slow path used to copy array when `src` is gray. - SlowPathCodeARM64* read_barrier_slow_path = + read_barrier_slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64( invoke, LocationFrom(tmp)); codegen_->AddSlowPath(read_barrier_slow_path); + } + // Compute base source address, base destination address, and end + // source address for System.arraycopy* intrinsics in `src_base`, + // `dst_base` and `src_end` respectively. + // Note that `src_curr_addr` is computed from from `src` (and + // `src_pos`) here, and thus honors the artificial dependency + // of `src` on `tmp`. + GenSystemArrayCopyAddresses(masm, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + + if (emit_rb) { // Given the numeric representation, it's enough to check the low bit of the rb_state. static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + } - // Fast-path copy. - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop; - __ Bind(&loop); - __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); - __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&loop, ne); - + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop; + __ Bind(&loop); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + if (emit_rb) { + DCHECK(read_barrier_slow_path != nullptr); __ Bind(read_barrier_slow_path->GetExitLabel()); - } else { - // Non read barrier code. - // Compute base source address, base destination address, and end - // source address for System.arraycopy* intrinsics in `src_base`, - // `dst_base` and `src_end` respectively. - GenSystemArrayCopyAddresses(masm, - type, - src, - src_pos, - dest, - dest_pos, - length, - src_curr_addr, - dst_curr_addr, - src_stop_addr); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop; - __ Bind(&loop); - { - Register tmp = temps.AcquireW(); - __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); - __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); - } - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&loop, ne); } - __ Bind(&done); } - } - // We only need one card marking on the destination array. - codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false); + // We only need one card marking on the destination array. + codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false); + + __ Bind(&skip_copy_and_write_barrier); + } __ Bind(intrinsic_slow_path->GetExitLabel()); } |