diff options
Diffstat (limited to 'src/main/java/com/code_intelligence/jazzer/mutation/mutator/proto/BuilderMutatorFactory.java')
-rw-r--r-- | src/main/java/com/code_intelligence/jazzer/mutation/mutator/proto/BuilderMutatorFactory.java | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/src/main/java/com/code_intelligence/jazzer/mutation/mutator/proto/BuilderMutatorFactory.java b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/proto/BuilderMutatorFactory.java new file mode 100644 index 00000000..85427c6f --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/mutation/mutator/proto/BuilderMutatorFactory.java @@ -0,0 +1,451 @@ +/* + * Copyright 2023 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.mutation.mutator.proto; + +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.assemble; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.combine; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.fixedValue; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.mutateIndices; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.mutateProperty; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.mutateSumInPlace; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.mutateThenMapToImmutable; +import static com.code_intelligence.jazzer.mutation.combinator.MutatorCombinators.mutateViaView; +import static com.code_intelligence.jazzer.mutation.mutator.proto.BuilderAdapters.getMapField; +import static com.code_intelligence.jazzer.mutation.mutator.proto.BuilderAdapters.getPresentFieldOrNull; +import static com.code_intelligence.jazzer.mutation.mutator.proto.BuilderAdapters.makeMutableRepeatedFieldView; +import static com.code_intelligence.jazzer.mutation.mutator.proto.BuilderAdapters.setFieldWithPresence; +import static com.code_intelligence.jazzer.mutation.mutator.proto.BuilderAdapters.setMapField; +import static com.code_intelligence.jazzer.mutation.mutator.proto.TypeLibrary.getDefaultInstance; +import static com.code_intelligence.jazzer.mutation.mutator.proto.TypeLibrary.withoutInitIfRecursive; +import static com.code_intelligence.jazzer.mutation.support.InputStreamSupport.cap; +import static com.code_intelligence.jazzer.mutation.support.TypeSupport.asAnnotatedType; +import static com.code_intelligence.jazzer.mutation.support.TypeSupport.asSubclassOrEmpty; +import static com.code_intelligence.jazzer.mutation.support.TypeSupport.findFirstParentIfClass; +import static com.code_intelligence.jazzer.mutation.support.TypeSupport.notNull; +import static com.code_intelligence.jazzer.mutation.support.TypeSupport.withExtraAnnotations; +import static java.util.Arrays.stream; +import static java.util.Objects.requireNonNull; +import static java.util.function.UnaryOperator.identity; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toMap; + +import com.code_intelligence.jazzer.mutation.annotation.proto.AnySource; +import com.code_intelligence.jazzer.mutation.annotation.proto.WithDefaultInstance; +import com.code_intelligence.jazzer.mutation.api.ChainedMutatorFactory; +import com.code_intelligence.jazzer.mutation.api.InPlaceMutator; +import com.code_intelligence.jazzer.mutation.api.MutatorFactory; +import com.code_intelligence.jazzer.mutation.api.Serializer; +import com.code_intelligence.jazzer.mutation.api.SerializingInPlaceMutator; +import com.code_intelligence.jazzer.mutation.api.SerializingMutator; +import com.code_intelligence.jazzer.mutation.support.Preconditions; +import com.google.protobuf.Any; +import com.google.protobuf.Descriptors.Descriptor; +import com.google.protobuf.Descriptors.EnumDescriptor; +import com.google.protobuf.Descriptors.EnumValueDescriptor; +import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; +import com.google.protobuf.Descriptors.OneofDescriptor; +import com.google.protobuf.DynamicMessage; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import com.google.protobuf.Message.Builder; +import com.google.protobuf.UnknownFieldSet; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.lang.reflect.AnnotatedType; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public final class BuilderMutatorFactory extends MutatorFactory { + private <T extends Builder, U> InPlaceMutator<T> mutatorForField( + FieldDescriptor field, Annotation[] annotations, MutatorFactory factory) { + factory = withDescriptorDependentMutatorFactoryIfNeeded(factory, field, annotations); + AnnotatedType typeToMutate = TypeLibrary.getTypeToMutate(field); + requireNonNull(typeToMutate, () -> "Java class not specified for " + field); + + InPlaceMutator<T> mutator; + if (field.isMapField()) { + SerializingInPlaceMutator<Map> underlyingMutator = + (SerializingInPlaceMutator<Map>) factory.createInPlaceOrThrow(typeToMutate); + mutator = mutateProperty(builder + -> getMapField(builder, field), + underlyingMutator, (builder, value) -> setMapField(builder, field, value)); + } else if (field.isRepeated()) { + SerializingInPlaceMutator<List<U>> underlyingMutator = + (SerializingInPlaceMutator<List<U>>) factory.createInPlaceOrThrow(typeToMutate); + mutator = + mutateViaView(builder -> makeMutableRepeatedFieldView(builder, field), underlyingMutator); + } else if (field.hasPresence()) { + SerializingMutator<U> underlyingMutator = + (SerializingMutator<U>) factory.createOrThrow(typeToMutate); + mutator = mutateProperty(builder + -> getPresentFieldOrNull(builder, field), + underlyingMutator, (builder, value) -> setFieldWithPresence(builder, field, value)); + } else { + SerializingMutator<U> underlyingMutator = + (SerializingMutator<U>) factory.createOrThrow(typeToMutate); + mutator = mutateProperty(builder + -> (U) builder.getField(field), + underlyingMutator, (builder, value) -> builder.setField(field, value)); + } + + // If recursive message fields (i.e. those that have themselves as transitive subfields) are + // initialized eagerly, they tend to nest very deeply, which easily results in stack overflows. + // We guard against that by making their init a no-op and instead initialize them layer by layer + // in mutations. + return withoutInitIfRecursive(mutator, field); + } + + private MutatorFactory withDescriptorDependentMutatorFactoryIfNeeded( + MutatorFactory originalFactory, FieldDescriptor field, Annotation[] annotations) { + if (field.getJavaType() == JavaType.ENUM) { + // Proto enum fields are special as their type (EnumValueDescriptor) does not encode their + // domain - we need the actual EnumDescriptor instance. + return new ChainedMutatorFactory(originalFactory, new MutatorFactory() { + @Override + public Optional<SerializingMutator<?>> tryCreate( + AnnotatedType type, MutatorFactory factory) { + return findFirstParentIfClass(type, EnumValueDescriptor.class).map(parent -> { + EnumDescriptor enumType = field.getEnumType(); + List<EnumValueDescriptor> values = enumType.getValues(); + String name = enumType.getName(); + if (values.size() == 1) { + // While we generally prefer to error out instead of creating a mutator that can't + // actually mutate its domain, we can't do that for proto enum fields as the user + // creating the fuzz test may not be in a position to modify the existing proto + // definition. + return fixedValue(values.get(0)); + } else { + return mutateThenMapToImmutable(mutateIndices(values.size()), values::get, + EnumValueDescriptor::getIndex, unused -> "Enum<" + name + ">"); + } + }); + } + }); + } else if (field.getJavaType() == JavaType.MESSAGE) { + Descriptor messageDescriptor; + if (field.isMapField()) { + // Map fields are represented as messages, but we mutate them as actual Java Maps. In case + // the values of the proto map are themselves messages, we need to mutate their type. + FieldDescriptor valueField = field.getMessageType().getFields().get(1); + if (valueField.getJavaType() != JavaType.MESSAGE) { + return originalFactory; + } + messageDescriptor = valueField.getMessageType(); + } else { + messageDescriptor = field.getMessageType(); + } + return new ChainedMutatorFactory(originalFactory, new MutatorFactory() { + @Override + public Optional<SerializingMutator<?>> tryCreate( + AnnotatedType type, MutatorFactory factory) { + return asSubclassOrEmpty(type, Message.Builder.class).flatMap(clazz -> { + // BuilderMutatorFactory only handles subclasses of Message.Builder and requests + // Message.Builder itself for message fields, which we handle here. + if (clazz != Message.Builder.class) { + return Optional.empty(); + } + // It is important that we use originalFactory here instead of factory: factory has this + // field-specific message mutator appended, but this mutator should only be used for + // this particular field and not any message subfields. + return Optional.of(makeBuilderMutator(originalFactory, + DynamicMessage.getDefaultInstance(messageDescriptor), annotations)); + }); + } + }); + } else { + return originalFactory; + } + } + + private <T extends Builder> Stream<InPlaceMutator<T>> mutatorsForFields( + Optional<OneofDescriptor> oneofField, List<FieldDescriptor> fields, Annotation[] annotations, + MutatorFactory factory) { + if (oneofField.isPresent()) { + // oneof fields are mutated as one as mutating them independently would cause the mutator to + // erratically switch between the different states. The individual fields are kept in the + // order in which they are defined in the .proto file. + OneofDescriptor oneofDescriptor = oneofField.get(); + + IdentityHashMap<FieldDescriptor, Integer> indexInOneof = + new IdentityHashMap<>(oneofDescriptor.getFieldCount()); + for (int i = 0; i < oneofDescriptor.getFieldCount(); i++) { + indexInOneof.put(oneofDescriptor.getField(i), i); + } + + return Stream.of(mutateSumInPlace( + (T builder) + -> { + FieldDescriptor setField = builder.getOneofFieldDescriptor(oneofDescriptor); + if (setField == null) { + return -1; + } else { + return indexInOneof.get(setField); + } + }, + // Mutating to the unset (-1) state is handled by the individual field mutators, which + // are created nullable as oneof fields report that they track presence. + fields.stream() + .map(field -> mutatorForField(field, annotations, factory)) + .toArray(InPlaceMutator[] ::new))); + } else { + // All non-oneof fields are mutated independently, using the order in which they are declared + // in the .proto file (which may not coincide with the order by field number). + return fields.stream().map(field -> mutatorForField(field, annotations, factory)); + } + } + + private static <M extends Message, B extends Builder> Serializer<B> makeBuilderSerializer( + M defaultInstance) { + return new Serializer<B>() { + @Override + public B read(DataInputStream in) throws IOException { + int length = Math.max(in.readInt(), 0); + return (B) parseLeniently(cap(in, length)); + } + + @Override + public B readExclusive(InputStream in) throws IOException { + return (B) parseLeniently(in); + } + + private Builder parseLeniently(InputStream in) throws IOException { + Builder builder = defaultInstance.toBuilder(); + try { + builder.mergeFrom(in); + } catch (InvalidProtocolBufferException ignored) { + // builder has been partially modified with what could be decoded before the parser error. + } + // We never want the fuzz test to see unknown fields and our mutations should never produce + // them. + builder.setUnknownFields(UnknownFieldSet.getDefaultInstance()); + // Required fields may not have been set at this point. We set them to default values to + // prevent an exception when built. + forceInitialized(builder); + return builder; + } + + private void forceInitialized(Builder builder) { + if (builder.isInitialized()) { + return; + } + for (FieldDescriptor field : builder.getDescriptorForType().getFields()) { + if (!field.isRequired()) { + continue; + } + if (field.getJavaType() == JavaType.MESSAGE) { + forceInitialized(builder.getFieldBuilder(field)); + } else if (!builder.hasField(field)) { + builder.setField(field, field.getDefaultValue()); + } + } + } + + @Override + public void write(Builder builder, DataOutputStream out) throws IOException { + Message message = builder.build(); + out.writeInt(message.getSerializedSize()); + message.writeTo(out); + } + + @Override + public void writeExclusive(Builder builder, OutputStream out) throws IOException { + builder.build().writeTo(out); + } + + @Override + public B detach(Builder builder) { + return (B) builder.build().toBuilder(); + } + }; + } + + /* + * Ensures that only a single instance is created per builder class and shared among all mutators + * that need it. This ensures that arbitrarily nested recursive structures such as a Protobuf + * message type that contains itself as a message field are representable as fixed-size mutator + * structures. + * + * Note: The resulting mutator structures may no longer form a tree: If A is a protobuf message + * type with a message field B and B in turn has a message field of type A, then the mutators for + * A and B will reference each other, forming a cycle. + */ + private final HashMap<CacheKey, SerializingMutator<? extends Builder>> internedMutators = + new HashMap<>(); + + private SerializingMutator<Any.Builder> mutatorForAny( + AnySource anySource, MutatorFactory factory) { + Map<String, Integer> typeUrlToIndex = + IntStream.range(0, anySource.value().length) + .boxed() + .collect(toMap(i -> getTypeUrl(getDefaultInstance(anySource.value()[i])), identity())); + + return assemble(mutator + -> internedMutators.put(new CacheKey(Any.getDescriptor(), anySource), mutator), + Any.getDefaultInstance()::toBuilder, makeBuilderSerializer(Any.getDefaultInstance()), + () + -> mutateSumInPlace( + // Corpus entries may contain Anys with arbitrary (and even invalid) messages, so we + // fall back to mutating the first message type if the type isn't recognized. + (Any.Builder builder) + -> typeUrlToIndex.getOrDefault(builder.getTypeUrl(), 0), + stream(anySource.value()) + .map(messageClass -> { + SerializingMutator<Message> messageMutator = + (SerializingMutator<Message>) factory.createOrThrow(notNull( + withExtraAnnotations(asAnnotatedType(messageClass), anySource))); + return mutateProperty( + (Any.Builder anyBuilder) + -> { + try { + return anyBuilder.build().unpack(messageClass); + } catch (InvalidProtocolBufferException e) { + // This can only happen if the corpus contains an invalid Any. + return getDefaultInstance(messageClass); + } + }, + messageMutator, + (Any.Builder any, Message message) -> { + any.setTypeUrl(getTypeUrl(message)); + any.setValue(message.toByteString()); + }); + }) + .toArray(InPlaceMutator[] ::new))); + } + + private static String getTypeUrl(Message message) { + // We only support the default "type.googleapis.com" prefix. + // https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/any.proto#L94 + return "type.googleapis.com/" + message.getDescriptorForType().getFullName(); + } + + @Override + public Optional<SerializingMutator<?>> tryCreate(AnnotatedType type, MutatorFactory factory) { + return asSubclassOrEmpty(type, Builder.class).flatMap(builderClass -> { + Message defaultInstance; + WithDefaultInstance withDefaultInstance = type.getAnnotation(WithDefaultInstance.class); + if (withDefaultInstance != null) { + defaultInstance = getDefaultInstance(withDefaultInstance); + } else if (builderClass == DynamicMessage.Builder.class) { + throw new IllegalArgumentException( + "To mutate a dynamic message, add a @WithDefaultInstance annotation specifying the" + + " fully qualified method name of a static method returning a default instance"); + } else if (builderClass == Message.Builder.class) { + // Handled by a custom mutator factory for message fields that is created in + // withDescriptorDependentMutatorFactoryIfNeeded. Without @WithDefaultInstance, + // BuilderMutatorFactory only handles proper subclasses, which correspond to generated + // message types. + return Optional.empty(); + } else { + defaultInstance = + getDefaultInstance((Class<? extends Message>) builderClass.getEnclosingClass()); + } + + return Optional.of( + makeBuilderMutator(factory, defaultInstance, type.getDeclaredAnnotations())); + }); + } + + private SerializingMutator<?> makeBuilderMutator( + MutatorFactory factory, Message defaultInstance, Annotation[] annotations) { + AnySource anySource = (AnySource) stream(annotations) + .filter(annotation -> annotation.annotationType() == AnySource.class) + .findFirst() + .orElse(null); + Preconditions.require(anySource == null || anySource.value().length > 0, + "@AnySource must list a non-empty list of classes"); + Descriptor descriptor = defaultInstance.getDescriptorForType(); + + CacheKey cacheKey = new CacheKey(descriptor, anySource); + if (internedMutators.containsKey(cacheKey)) { + return internedMutators.get(cacheKey); + } + + // If there is no @AnySource, mutate the Any.Builder fields just like a regular message. + // TODO: Determine whether we should show a warning in this case. + if (descriptor.equals(Any.getDescriptor()) && anySource != null) { + return mutatorForAny(anySource, factory); + } + + // assemble inserts the instance of the newly created builder mutator into the + // internedMutators map *before* recursively creating the mutators for its fields, which + // ensures that the recursion is finite (bounded by the total number of distinct message types + // that transitively occur as field types on the current message type). + return assemble(mutator + -> internedMutators.put(cacheKey, mutator), + defaultInstance::toBuilder, makeBuilderSerializer(defaultInstance), + () + -> combine( + descriptor.getFields() + .stream() + // Keep oneofs sorted by the first appearance of their fields in the + // .proto file. + .collect(groupingBy( + // groupingBy does not support null keys. We use getRealContainingOneof() + // instead of getContainingOneof() as the latter also reports oneofs for + // proto3 optional fields, which we handle separately. + fieldDescriptor + -> Optional.ofNullable(fieldDescriptor.getRealContainingOneof()), + LinkedHashMap::new, toList())) + .entrySet() + .stream() + .flatMap(entry + -> mutatorsForFields(entry.getKey(), entry.getValue(), + anySource == null ? new Annotation[0] : new Annotation[] {anySource}, + factory)) + .toArray(InPlaceMutator[] ::new))); + } + + private static final class CacheKey { + private final Descriptor descriptor; + private final AnySource anySource; + + private CacheKey(Descriptor descriptor, AnySource anySource) { + this.descriptor = descriptor; + this.anySource = anySource; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CacheKey cacheKey = (CacheKey) o; + return descriptor == cacheKey.descriptor && Objects.equals(anySource, cacheKey.anySource); + } + + @Override + public int hashCode() { + return 31 * System.identityHashCode(descriptor) + Objects.hashCode(anySource); + } + } +} |