aboutsummaryrefslogtreecommitdiff
path: root/pkg/filter_directory.py
blob: 83424241e73abae628a326a70068f96a1582d517 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
#!/usr/bin/env python3

# Copyright 2021 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Generic directory filter

This program provides a command-line interface that allows for copying contents
from one directory to another, in support of basic manipulation of
TreeArtifacts (directory outputs).

For more information on TreeArtifacts, see
https://docs.bazel.build/versions/master/glossary.html#artifact

"""

import argparse
import os
import pathlib
import shutil
import sys
import textwrap


def main(argv):
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')

    parser.add_argument("--strip_prefix", type=pathlib.Path, default=None,
                        help="directory prefix to strip from all incoming paths")
    parser.add_argument("--prefix", type=pathlib.Path, default=None,
                        help="prefix to add to all output paths")
    parser.add_argument("--rename", type=str, action='append', default=[],
                        help="DESTINATION=SOURCE mappings.  Only supports files.  "
                             "DESTINATION=SOURCE must be one-to-one.")
    parser.add_argument("--exclude", type=pathlib.Path, action='append',
                        default=[],
                        help="Input files to exclude from the output directory")

    parser.add_argument("input_dir", type=pathlib.Path,
                        help="input directory")
    parser.add_argument("output_dir", type=pathlib.Path,
                        help="output directory")

    args = parser.parse_args(argv)

    ###########################################################################
    # Argument consistency checking.
    ###########################################################################

    dir_in = args.input_dir
    dir_out = args.output_dir
    dir_out_abs = pathlib.Path.cwd() / dir_out

    excludes_used_map = {e: False for e in args.exclude}

    # src -> dest
    renames_map = {}
    # dest -> src, used for diagnostics
    renames_map_reversed = {}

    for r in args.rename:
        dest, src = (pathlib.Path(p) for p in r.split('=', maxsplit=1))
        if src in renames_map:
            sys.exit(textwrap.dedent("""In --renames, sources used multiple times:
                {s1} -> {d1}
                {s2} -> {d2}

            Each --rename DESTINATION=SOURCE pair must be one-to-one.
            """.format(
                s1=src, d1=dest,
                s2=src, d2=renames_map[src],
            )))

        if dest in renames_map_reversed:
            sys.exit(textwrap.dedent("""--renames destination collision:
                {d1} <- {s1}
                {d2} <- {s2}

            Each --rename DESTINATION=SOURCE pair must be one-to-one.
            """.format(
                d1=dest, s1=src,
                d2=dest, s2=renames_map_reversed[dest],
            )))
        renames_map[src] = dest
        renames_map_reversed[dest] = src

    ###########################################################################
    # Assemble src -> dest map (file_mappings)
    ###########################################################################
    renames_used_map = {src: False for src in renames_map.keys()}
    invalid_strip_prefix_dirs = []

    files_installed_outside_destdir = []

    file_mappings = {}

    # NOTE: We need to stringify `dir_in` to support Python 3.5 (Ubuntu 16.04).
    # Otherwise we could just pass it directly.  This is supported as of
    # Python 3.6.
    for root, dirs, files in os.walk(str(dir_in)):
        root_path = pathlib.Path(root)

        rel_root = root_path.relative_to(dir_in)

        # Prepend the prefix
        if args.prefix:
            dest_dir = dir_out / args.prefix
        else:
            dest_dir = dir_out

        # strip_prefix must apply to everything to reduce overall surprise.  If
        # this root contains files and is not under strip_prefix, record it and
        # fail after this preprocessing stage.
        #
        # This can be refined somewhat -- for example, if we descend into a
        # child directory, we don't need to mention it again.
        #
        # TODO(nacl): this does not make an attempt to tell if everything was
        # rename'd out of the directory we're currently inspecting.  We could
        # theoretically check if this was actually used, and if it was, then add
        # it in.
        dest_rel_root = rel_root
        if len(files) != 0 and args.strip_prefix is not None:
            try:
                dest_rel_root = rel_root.relative_to(args.strip_prefix)
            except ValueError:
                # Cannot proceed -- strip_prefix does not apply here.  Store
                # "invalid" directories in an output list, and then continue.
                invalid_strip_prefix_dirs.append(rel_root)

        # This is the base output directory that will be used when there are no
        # --rename's.
        dest_dir /= dest_rel_root

        for f in files:
            rel_src_path = rel_root / f

            # Handle exclusions
            if rel_src_path in excludes_used_map:
                excludes_used_map[rel_src_path] = True
                # Skip it
                continue

            if rel_src_path in renames_map:
                # Calculate a new path based on the individual renames.  Renames
                # override "strip_prefix". Include the prefix too.
                dest = dir_out
                if args.prefix:
                    dest /= args.prefix
                dest /= renames_map[rel_src_path]
                renames_used_map[rel_src_path] = True
            else:
                # Use the paths we already calculated.
                dest = dest_dir / f

            # Verify that files are not going to be installed outside the output
            # directory, and include them in error lists if this is the case.

            # NOTE: We can't use pathlib here since non-strict checks are only
            # available as of Python 3.6 (Ubuntu 16.04 still uses 3.5).
            common_pfx = os.path.commonprefix([
                os.path.abspath(str(dest)),
                str(dir_out_abs)
            ])
            if common_pfx != str(dir_out_abs):
                files_installed_outside_destdir.append(rel_root / f)

            file_mappings[root_path / f] = dest

    ###########################################################################
    # Check for early failure
    ###########################################################################

    # Figure out if anything is being installed to multiple places in case we
    # missed something above.  Interactions between strip_prefix and renames
    # come to mind, as well as renames to outputs already in the tarball.
    #
    # These are converted to strings here because they aren't used again
    # afterward.
    dest_src_str_map = {}
    duplicate_mappings = {}
    for src, dest in file_mappings.items():
        rel_srcs_str = str(src.relative_to(dir_in))
        try:
            rel_dest_str = str(dest.relative_to(dir_out))
        except ValueError:
            # This can fail if dest is absolute for some reason.  Log something
            # in case there is a code problem here.
            #
            # This probably will also fail due to files being outside of the
            # package.
            print("Ignoring invalid src/dest pair {} -> {}".format(
                src, dest
                ),
                file=sys.stderr,
            )
            continue

        if rel_dest_str in dest_src_str_map:
            dest_src_str_map[rel_dest_str].append(rel_srcs_str)
        else:
            dest_src_str_map[rel_dest_str] = [rel_srcs_str]

    duplicate_mappings = {
        dest: srcs
        for dest, srcs in dest_src_str_map.items()
        if len(srcs) > 1
    }

    # And now, figure out if any of our exclusions/renames were left unused
    def value_unused(value_tuple):
        _, used = value_tuple
        return not used

    unused_exclusions = dict(filter(value_unused, excludes_used_map.items()))
    unused_renames = dict(filter(value_unused, renames_used_map.items()))

    # If any of these iterables have items in them, there's an inconsistency.
    # We should fail before proceeding
    #
    # Empty iterables below are "falsy", so this works well enough.
    fail_early = any([
        invalid_strip_prefix_dirs,
        unused_exclusions,
        unused_renames,
        files_installed_outside_destdir,
        duplicate_mappings,
    ])

    if fail_early:
        print("Refusing to continue due to:")
        if invalid_strip_prefix_dirs:
            print("    strip_prefix not applying to directories")
            for d in invalid_strip_prefix_dirs:
                print("       {}".format(d))
        if unused_exclusions:
            print("    unused exclusions:")
            for p in unused_exclusions.keys():
                print("       {}".format(p))
        if unused_renames:
            print("    unused renames:")
            for src in unused_renames.keys():
                # TODO: this could be formatted more prettily, specifically,
                # aligned
                print("       {} -> {}".format(src, renames_map[src]))
        if files_installed_outside_destdir:
            print("    files copied outside DESTDIR:")
            for src in files_installed_outside_destdir:
                print("       {}".format(src))
        if duplicate_mappings:
            print("    duplicate destination mappings:")
            for dest, srcs in duplicate_mappings.items():
                print("       {} <- {}".format(dest, ', '.join(srcs)))
        print("")
        print("Sources are relative to      {}".format(dir_in))
        print("Destinations are relative to {}".format(dir_out))

        sys.exit(1)

    ###########################################################################
    # Do the thing
    ###########################################################################

    for src, dest in file_mappings.items():
        dest.parent.mkdir(exist_ok=True, parents=True)
        shutil.copy(
            # NOTE: Stringifying for Python 3.5
            str(src),
            str(dest),
        )


if __name__ == "__main__":
    exit(main(sys.argv[1:]))