aboutsummaryrefslogtreecommitdiff
path: root/gazelle/modules_mapping/generator.py
blob: be57eac3bc4b6db7dd50b03356ba58496aa76019 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import json
import pathlib
import re
import sys
import zipfile


# Generator is the modules_mapping.json file generator.
class Generator:
    stderr = None
    output_file = None
    excluded_patterns = None
    mapping = {}

    def __init__(self, stderr, output_file, excluded_patterns):
        self.stderr = stderr
        self.output_file = output_file
        self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns]

    # dig_wheel analyses the wheel .whl file determining the modules it provides
    # by looking at the directory structure.
    def dig_wheel(self, whl):
        with zipfile.ZipFile(whl, "r") as zip_file:
            for path in zip_file.namelist():
                if is_metadata(path):
                    if data_has_purelib_or_platlib(path):
                        self.module_for_path(path, whl)
                    else:
                        continue
                else:
                    self.module_for_path(path, whl)

    def module_for_path(self, path, whl):
        ext = pathlib.Path(path).suffix
        if ext == ".py" or ext == ".so":
            if "purelib" in path or "platlib" in path:
                root = "/".join(path.split("/")[2:])
            else:
                root = path

            wheel_name = get_wheel_name(whl)

            if root.endswith("/__init__.py"):
                # Note the '/' here means that the __init__.py is not in the
                # root of the wheel, therefore we can index the directory
                # where this file is as an importable package.
                module = root[: -len("/__init__.py")].replace("/", ".")
                if not self.is_excluded(module):
                    self.mapping[module] = wheel_name

            # Always index the module file.
            if ext == ".so":
                # Also remove extra metadata that is embeded as part of
                # the file name as an extra extension.
                ext = "".join(pathlib.Path(root).suffixes)
            module = root[: -len(ext)].replace("/", ".")
            if not self.is_excluded(module):
                self.mapping[module] = wheel_name

    def is_excluded(self, module):
        for pattern in self.excluded_patterns:
            if pattern.search(module):
                return True
        return False

    # run is the entrypoint for the generator.
    def run(self, wheels):
        for whl in wheels:
            try:
                self.dig_wheel(whl)
            except AssertionError as error:
                print(error, file=self.stderr)
                return 1
        mapping_json = json.dumps(self.mapping)
        with open(self.output_file, "w") as f:
            f.write(mapping_json)
        return 0


def get_wheel_name(path):
    pp = pathlib.PurePath(path)
    if pp.suffix != ".whl":
        raise RuntimeError(
            "{} is not a valid wheel file name: the wheel doesn't follow ".format(
                pp.name
            )
            + "https://www.python.org/dev/peps/pep-0427/#file-name-convention"
        )
    return pp.name[: pp.name.find("-")]


# is_metadata checks if the path is in a metadata directory.
# Ref: https://www.python.org/dev/peps/pep-0427/#file-contents.
def is_metadata(path):
    top_level = path.split("/")[0].lower()
    return top_level.endswith(".dist-info") or top_level.endswith(".data")


# The .data is allowed to contain a full purelib or platlib directory
# These get unpacked into site-packages, so require indexing too.
# This is the same if "Root-Is-Purelib: true" is set and the files are at the root.
# Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib
def data_has_purelib_or_platlib(path):
    maybe_lib = path.split("/")[1].lower()
    return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog="generator",
        description="Generates the modules mapping used by the Gazelle manifest.",
    )
    parser.add_argument("--output_file", type=str)
    parser.add_argument("--exclude_patterns", nargs="+", default=[])
    parser.add_argument("--wheels", nargs="+", default=[])
    args = parser.parse_args()
    generator = Generator(sys.stderr, args.output_file, args.exclude_patterns)
    exit(generator.run(args.wheels))