diff options
author | Vladimir Marko <vmarko@google.com> | 2024-04-22 16:49:25 +0200 |
---|---|---|
committer | VladimĂr Marko <vmarko@google.com> | 2024-04-25 14:14:58 +0000 |
commit | 20668496dda44e4877d3c29afc22cd098701d461 (patch) | |
tree | c71c813881f24ee578ce238c90ece97259844c76 | |
parent | fd7441229120ceaf5ea36d9dbfab86e3ade638db (diff) | |
download | art-20668496dda44e4877d3c29afc22cd098701d461.tar.gz |
Fast-path for `HInstanceOf`/`kInterfaceCheck`.
Implemented for arm, arm64, x86 and x86-64 for now.
The implementation for riscv64 shall be added later.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 333690895
Change-Id: I43f9820c9928601ecebf172b38df17fa13d35fab
-rw-r--r-- | compiler/optimizing/code_generator.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 70 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 71 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 95 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 101 |
5 files changed, 287 insertions, 56 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 970da76c43..3ac4bd7dcc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -461,11 +461,13 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { DataType::Type type2); bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { - // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. + // Used only for `kExactCheck`, `kAbstractClassCheck`, `kClassHierarchyCheck`, + // `kArrayObjectCheck` and `kInterfaceCheck`. DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || - instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck) + instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck || + instance_of->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) << instance_of->GetTypeCheckKind(); // If the target class is in the boot or app image, it's non-moveable and it doesn't matter // if we compare it with a from-space or to-space reference, the result is the same. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 8a4b069e19..cfa28eda25 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -4135,15 +4135,16 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { + case TypeCheckKind::kArrayObjectCheck: + case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) && + (type_check_kind != TypeCheckKind::kInterfaceCheck); break; } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; case TypeCheckKind::kBitstringCheck: @@ -4184,10 +4185,14 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); vixl::aarch64::Label done, zero; SlowPathCodeARM64* slow_path = nullptr; @@ -4334,11 +4339,54 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { break; } - case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { + if (codegen_->InstanceOfNeedsReadBarrier(instruction)) { + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + if (codegen_->EmitNonBakerReadBarrier()) { + __ B(slow_path->GetEntryLabel()); + break; + } + // For Baker read barrier, take the slow path while marking. + __ Cbnz(mr, slow_path->GetEntryLabel()); + } + + // Fast-path without read barriers. + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireW(); + Register temp2 = temps.AcquireW(); + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + // /* HeapReference<Class> */ temp = temp->iftable_ + __ Ldr(temp, HeapOperand(temp, iftable_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + // Load the size of the `IfTable`. The `Class::iftable_` is never null. + __ Ldr(out, HeapOperand(temp, array_length_offset)); + // Loop through the `IfTable` and check if any class matches. + vixl::aarch64::Label loop; + __ Bind(&loop); + __ Cbz(out, &done); // If taken, the result in `out` is already 0 (false). + __ Ldr(temp2, HeapOperand(temp, object_array_data_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp2); + // Go to next interface. + __ Add(temp, temp, 2 * kHeapReferenceSize); + __ Sub(out, out, 2); + // Compare the classes and continue the loop if they do not match. + __ Cmp(cls, temp2); + __ B(ne, &loop); + __ Mov(out, 1); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } + + case TypeCheckKind::kUnresolvedCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. + // into the slow path for the unresolved check case. // // We cannot directly call the InstanceofNonTrivial runtime // entry point without resorting to a type checking slow path @@ -4581,7 +4629,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { iftable_offset, maybe_temp2_loc, kWithoutReadBarrier); - // Iftable is never null. + // Load the size of the `IfTable`. The `Class::iftable_` is never null. __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); // Loop through the iftable and check if any class matches. vixl::aarch64::Label start_loop; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index eb5fbc4364..a7cc5a6d12 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -8061,6 +8061,9 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 1; + } if (emit_read_barrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -8089,15 +8092,16 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { + case TypeCheckKind::kArrayObjectCheck: + case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) && + (type_check_kind != TypeCheckKind::kInterfaceCheck); break; } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; case TypeCheckKind::kBitstringCheck: @@ -8137,10 +8141,14 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); vixl32::Label done; vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; @@ -8344,11 +8352,52 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) break; } - case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { + if (codegen_->InstanceOfNeedsReadBarrier(instruction)) { + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + if (codegen_->EmitNonBakerReadBarrier()) { + __ B(slow_path->GetEntryLabel()); + break; + } + // For Baker read barrier, take the slow path while marking. + __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); + } + + // Fast-path without read barriers. + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = RegisterFrom(maybe_temp_loc); + vixl32::Register temp2 = temps.Acquire(); + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, MemOperand(obj, class_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + // /* HeapReference<Class> */ temp = temp->iftable_ + __ Ldr(temp, MemOperand(temp, iftable_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + // Load the size of the `IfTable`. The `Class::iftable_` is never null. + __ Ldr(out, MemOperand(temp, array_length_offset)); + // Loop through the `IfTable` and check if any class matches. + vixl32::Label loop; + __ Bind(&loop); + // If taken, the result in `out` is already 0 (false). + __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false); + __ Ldr(temp2, MemOperand(temp, object_array_data_offset)); + GetAssembler()->MaybeUnpoisonHeapReference(temp2); + // Go to next interface. + __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize)); + __ Sub(out, out, 2); + // Compare the classes and continue the loop if they do not match. + __ Cmp(cls, temp2); + __ B(ne, &loop); + __ Mov(out, 1); + break; + } + + case TypeCheckKind::kUnresolvedCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. + // into the slow path for the unresolved check case. // // We cannot directly call the InstanceofNonTrivial runtime // entry point without resorting to a type checking slow path @@ -8593,7 +8642,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { iftable_offset, maybe_temp2_loc, kWithoutReadBarrier); - // Iftable is never null. + // Load the size of the `IfTable`. The `Class::iftable_` is never null. __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset)); // Loop through the iftable and check if any class matches. vixl32::Label start_loop; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5460a1bb07..5ad818dd53 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7666,6 +7666,9 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 1; + } if (emit_read_barrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -7680,9 +7683,6 @@ static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type // interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - return 2; - } return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } @@ -7694,15 +7694,16 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { + case TypeCheckKind::kArrayObjectCheck: + case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) && + (type_check_kind != TypeCheckKind::kInterfaceCheck); break; } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; case TypeCheckKind::kBitstringCheck: @@ -7719,6 +7720,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + locations->SetInAt(1, Location::RequiresRegister()); } else { locations->SetInAt(1, Location::Any()); } @@ -7740,10 +7743,14 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); SlowPathCode* slow_path = nullptr; NearLabel done, zero; @@ -7913,11 +7920,69 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { break; } - case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { + if (codegen_->InstanceOfNeedsReadBarrier(instruction)) { + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + if (codegen_->EmitNonBakerReadBarrier()) { + __ jmp(slow_path->GetEntryLabel()); + break; + } + // For Baker read barrier, take the slow path while marking. + __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Fast-path without read barriers. + Register temp = maybe_temp_loc.AsRegister<Register>(); + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + __ MaybeUnpoisonHeapReference(temp); + // /* HeapReference<Class> */ temp = temp->iftable_ + __ movl(temp, Address(temp, iftable_offset)); + __ MaybeUnpoisonHeapReference(temp); + // Load the size of the `IfTable`. The `Class::iftable_` is never null. + __ movl(out, Address(temp, array_length_offset)); + // Maybe poison the `cls` for direct comparison with memory. + __ MaybePoisonHeapReference(cls.AsRegister<Register>()); + // Loop through the iftable and check if any class matches. + NearLabel loop, end; + __ Bind(&loop); + // Check if we still have an entry to compare. + __ subl(out, Immediate(2)); + __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end); + // Go to next interface if the classes do not match. + __ cmpl(cls.AsRegister<Register>(), + CodeGeneratorX86::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset)); + __ j(kNotEqual, &loop); + if (zero.IsLinked()) { + __ movl(out, Immediate(1)); + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); + __ jmp(&done); + if (kPoisonHeapReferences) { + // The false case needs to unpoison the class before jumping to `zero`. + __ Bind(&end); + __ UnpoisonHeapReference(cls.AsRegister<Register>()); + __ jmp(&zero); + } + } else { + // To reduce branching, use the fact that the false case branches with a `-2` in `out`. + __ movl(out, Immediate(-1)); + __ Bind(&end); + __ addl(out, Immediate(2)); + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); + } + break; + } + + case TypeCheckKind::kUnresolvedCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. + // into the slow path for the unresolved check case. // // We cannot directly call the InstanceofNonTrivial runtime // entry point without resorting to a type checking slow path @@ -8180,14 +8245,14 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { iftable_offset, maybe_temp2_loc, kWithoutReadBarrier); - // Iftable is never null. + // Load the size of the `IfTable`. The `Class::iftable_` is never null. __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. __ MaybePoisonHeapReference(cls.AsRegister<Register>()); // Loop through the iftable and check if any class matches. NearLabel start_loop; __ Bind(&start_loop); - // Need to subtract first to handle the empty array case. + // Check if we still have an entry to compare. __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2)); __ j(kNegative, type_check_slow_path->GetEntryLabel()); // Go to next interface if the classes do not match. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index c495d48406..4d23062100 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -6267,8 +6267,9 @@ void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp, // assert (!clean || !self->is_gc_marking) __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean)); __ j(kNotEqual, &done); - __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), true), - Immediate(0)); + __ gs()->cmpl( + Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true), + Immediate(0)); __ j(kEqual, &done); __ int3(); __ Bind(&done); @@ -7007,6 +7008,9 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 1; + } if (emit_read_barrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -7021,9 +7025,6 @@ static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type // interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - return 2; - } return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } @@ -7035,15 +7036,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { + case TypeCheckKind::kArrayObjectCheck: + case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) && + (type_check_kind == TypeCheckKind::kInterfaceCheck); break; } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; case TypeCheckKind::kBitstringCheck: @@ -7060,6 +7062,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + locations->SetInAt(1, Location::RequiresRegister()); } else { locations->SetInAt(1, Location::Any()); } @@ -7080,10 +7084,14 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); SlowPathCode* slow_path = nullptr; NearLabel done, zero; @@ -7258,11 +7266,70 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { break; } - case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { + if (codegen_->InstanceOfNeedsReadBarrier(instruction)) { + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + if (codegen_->EmitNonBakerReadBarrier()) { + __ jmp(slow_path->GetEntryLabel()); + break; + } + // For Baker read barrier, take the slow path while marking. + __ gs()->cmpl( + Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Fast-path without read barriers. + CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>(); + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + __ MaybeUnpoisonHeapReference(temp); + // /* HeapReference<Class> */ temp = temp->iftable_ + __ movl(temp, Address(temp, iftable_offset)); + __ MaybeUnpoisonHeapReference(temp); + // Load the size of the `IfTable`. The `Class::iftable_` is never null. + __ movl(out, Address(temp, array_length_offset)); + // Maybe poison the `cls` for direct comparison with memory. + __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); + // Loop through the iftable and check if any class matches. + NearLabel loop, end; + __ Bind(&loop); + // Check if we still have an entry to compare. + __ subl(out, Immediate(2)); + __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end); + // Go to next interface if the classes do not match. + __ cmpl(cls.AsRegister<CpuRegister>(), + CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset)); + __ j(kNotEqual, &loop); + if (zero.IsLinked()) { + __ movl(out, Immediate(1)); + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); + __ jmp(&done); + if (kPoisonHeapReferences) { + // The false case needs to unpoison the class before jumping to `zero`. + __ Bind(&end); + __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>()); + __ jmp(&zero); + } + } else { + // To reduce branching, use the fact that the false case branches with a `-2` in `out`. + __ movl(out, Immediate(-1)); + __ Bind(&end); + __ addl(out, Immediate(2)); + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); + } + break; + } + + case TypeCheckKind::kUnresolvedCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. + // into the slow path for the unresolved check case. // // We cannot directly call the InstanceofNonTrivial runtime // entry point without resorting to a type checking slow path @@ -7532,14 +7599,14 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { iftable_offset, maybe_temp2_loc, kWithoutReadBarrier); - // Iftable is never null. + // Load the size of the `IfTable`. The `Class::iftable_` is never null. __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); // Loop through the iftable and check if any class matches. NearLabel start_loop; __ Bind(&start_loop); - // Need to subtract first to handle the empty array case. + // Check if we still have an entry to compare. __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); __ j(kNegative, type_check_slow_path->GetEntryLabel()); // Go to next interface if the classes do not match. |