aboutsummaryrefslogtreecommitdiff
path: root/equivalence_map.h
blob: af99ac44eb03c7650c62722c4658d2dfed9d6b5b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_

#include <stddef.h>

#include <deque>
#include <limits>
#include <vector>

#include "components/zucchini/image_index.h"
#include "components/zucchini/image_utils.h"
#include "components/zucchini/targets_affinity.h"

namespace zucchini {

constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();

class EncodedView;
class EquivalenceSource;

// Returns similarity score between a token (raw byte or first byte of a
// reference) in |old_image_index| at |src| and a token in |new_image_index|
// at |dst|. |targets_affinities| describes affinities for each target pool and
// is used to evaluate similarity between references, hence it's size must be
// equal to the number of pools in both |old_image_index| and |new_image_index|.
// Both |src| and |dst| must refer to tokens in |old_image_index| and
// |new_image_index|.
double GetTokenSimilarity(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    offset_t src,
    offset_t dst);

// Returns a similarity score between content in |old_image_index| and
// |new_image_index| at regions described by |equivalence|, using
// |targets_affinities| to evaluate similarity between references.
double GetEquivalenceSimilarity(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const Equivalence& equivalence);

// Extends |equivalence| forward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceForward(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const EquivalenceCandidate& equivalence,
    double min_similarity);

// Extends |equivalence| backward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceBackward(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const EquivalenceCandidate& equivalence,
    double min_similarity);

// Creates an equivalence, starting with |src| and |dst| as offset hint, and
// extends it both forward and backward, trying to maximise similarity between
// |old_image_index| and |new_image_index|, and returns the result.
// |targets_affinities| is used to evaluate similarity between references.
// |min_similarity| describes the minimum acceptable similarity score and is
// used as threshold to discard bad equivalences.
EquivalenceCandidate VisitEquivalenceSeed(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    offset_t src,
    offset_t dst,
    double min_similarity);

// Container of pruned equivalences used to map offsets from |old_image| to
// offsets in |new_image|. Equivalences are pruned by cropping smaller
// equivalences to avoid overlaps, to make the equivalence map (for covered
// bytes in |old_image| and |new_image|) one-to-one.
class OffsetMapper {
 public:
  using const_iterator = std::deque<Equivalence>::const_iterator;

  // Constructors for various data sources. "Old" and "new" image sizes are
  // needed for bounds checks and to handle dangling targets.
  // - From a list of |equivalences|, already sorted (by |src_offset|) and
  //   pruned, useful for tests.
  OffsetMapper(std::deque<Equivalence>&& equivalences,
               offset_t old_image_size,
               offset_t new_image_size);
  // - From a generator, useful for Zucchini-apply.
  OffsetMapper(EquivalenceSource&& equivalence_source,
               offset_t old_image_size,
               offset_t new_image_size);
  // - From an EquivalenceMap that needs to be processed, useful for
  //   Zucchini-gen.
  OffsetMapper(const EquivalenceMap& equivalence_map,
               offset_t old_image_size,
               offset_t new_image_size);
  ~OffsetMapper();

  size_t size() const { return equivalences_.size(); }
  const_iterator begin() const { return equivalences_.begin(); }
  const_iterator end() const { return equivalences_.end(); }

  // Returns naive extended forward-projection of "old" |offset| that follows
  // |eq|'s delta. |eq| needs not cover |offset|.
  // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
  // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
  //   caller must to make the check (hence "naive").
  offset_t NaiveExtendedForwardProject(const Equivalence& unit,
                                       offset_t offset) const;

  // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
  // Assumes |equivalences_| to be non-empty. Cases:
  // - If |offset| is covered (i.e., in an "old" block), then use the delta of
  //   the (unique) equivalence unit that covers |offset|.
  // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
  //   nearest "old" block, use its delta, and avert underflow / overflow by
  //   clamping the result to |[0, new_image_size_)|.
  // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
  //   |new_image_size_ - old_image_size_| as the delta.
  offset_t ExtendedForwardProject(offset_t offset) const;

  // Given sorted |offsets|, applies a projection in-place of all offsets that
  // are part of a pruned equivalence from |old_image| to |new_image|. Other
  // offsets are removed from |offsets|.
  void ForwardProjectAll(std::deque<offset_t>* offsets) const;

  // Accessor for testing.
  const std::deque<Equivalence> equivalences() const { return equivalences_; }

  // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
  // source location that was covered by some Equivalence would become covered
  // by exactly one Equivalence. Moreover, for the offset, the equivalence
  // corresponds to the largest (pre-pruning) covering Equivalence, and in case
  // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
  // change in size since empty Equivalences are removed.
  static void PruneEquivalencesAndSortBySource(
      std::deque<Equivalence>* equivalences);

 private:
  // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
  // block overlaps). Also, it is sorted by "old" offsets.
  std::deque<Equivalence> equivalences_;
  const offset_t old_image_size_;
  const offset_t new_image_size_;
};

// Container of equivalences between |old_image_index| and |new_image_index|,
// sorted by |Equivalence::dst_offset|, only used during patch generation.
class EquivalenceMap {
 public:
  using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;

  EquivalenceMap();
  // Initializes the object with |equivalences|.
  explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
  EquivalenceMap(EquivalenceMap&&);
  EquivalenceMap(const EquivalenceMap&) = delete;
  ~EquivalenceMap();

  // Finds relevant equivalences between |old_view| and |new_view|, using
  // suffix array |old_sa| computed from |old_view| and using
  // |targets_affinities| to evaluate similarity between references. This
  // function is not symmetric. Equivalences might overlap in |old_view|, but
  // not in |new_view|. It tries to maximize accumulated similarity within each
  // equivalence, while maximizing |new_view| coverage. The minimum similarity
  // of an equivalence is given by |min_similarity|.
  void Build(const std::vector<offset_t>& old_sa,
             const EncodedView& old_view,
             const EncodedView& new_view,
             const std::vector<TargetsAffinity>& targets_affinities,
             double min_similarity);

  size_t size() const { return candidates_.size(); }
  const_iterator begin() const { return candidates_.begin(); }
  const_iterator end() const { return candidates_.end(); }

 private:
  // Discovers equivalence candidates between |old_view| and |new_view| and
  // stores them in the object. Note that resulting candidates are not sorted
  // and might be overlapping in new image.
  void CreateCandidates(const std::vector<offset_t>& old_sa,
                        const EncodedView& old_view,
                        const EncodedView& new_view,
                        const std::vector<TargetsAffinity>& targets_affinities,
                        double min_similarity);
  // Sorts candidates by their offset in new image.
  void SortByDestination();
  // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
  // overlaps. Candidates with low similarity scores are more likely to be
  // shrunken. Unfit candidates may be removed.
  void Prune(const EncodedView& old_view,
             const EncodedView& new_view,
             const std::vector<TargetsAffinity>& targets_affinities,
             double min_similarity);

  std::vector<EquivalenceCandidate> candidates_;
};

}  // namespace zucchini

#endif  // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_