aboutsummaryrefslogtreecommitdiff
path: root/gazelle/modules_mapping/generator.py
diff options
context:
space:
mode:
Diffstat (limited to 'gazelle/modules_mapping/generator.py')
-rw-r--r--gazelle/modules_mapping/generator.py133
1 files changed, 133 insertions, 0 deletions
diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py
new file mode 100644
index 0000000..be57eac
--- /dev/null
+++ b/gazelle/modules_mapping/generator.py
@@ -0,0 +1,133 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import pathlib
+import re
+import sys
+import zipfile
+
+
+# Generator is the modules_mapping.json file generator.
+class Generator:
+ stderr = None
+ output_file = None
+ excluded_patterns = None
+ mapping = {}
+
+ def __init__(self, stderr, output_file, excluded_patterns):
+ self.stderr = stderr
+ self.output_file = output_file
+ self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns]
+
+ # dig_wheel analyses the wheel .whl file determining the modules it provides
+ # by looking at the directory structure.
+ def dig_wheel(self, whl):
+ with zipfile.ZipFile(whl, "r") as zip_file:
+ for path in zip_file.namelist():
+ if is_metadata(path):
+ if data_has_purelib_or_platlib(path):
+ self.module_for_path(path, whl)
+ else:
+ continue
+ else:
+ self.module_for_path(path, whl)
+
+ def module_for_path(self, path, whl):
+ ext = pathlib.Path(path).suffix
+ if ext == ".py" or ext == ".so":
+ if "purelib" in path or "platlib" in path:
+ root = "/".join(path.split("/")[2:])
+ else:
+ root = path
+
+ wheel_name = get_wheel_name(whl)
+
+ if root.endswith("/__init__.py"):
+ # Note the '/' here means that the __init__.py is not in the
+ # root of the wheel, therefore we can index the directory
+ # where this file is as an importable package.
+ module = root[: -len("/__init__.py")].replace("/", ".")
+ if not self.is_excluded(module):
+ self.mapping[module] = wheel_name
+
+ # Always index the module file.
+ if ext == ".so":
+ # Also remove extra metadata that is embeded as part of
+ # the file name as an extra extension.
+ ext = "".join(pathlib.Path(root).suffixes)
+ module = root[: -len(ext)].replace("/", ".")
+ if not self.is_excluded(module):
+ self.mapping[module] = wheel_name
+
+ def is_excluded(self, module):
+ for pattern in self.excluded_patterns:
+ if pattern.search(module):
+ return True
+ return False
+
+ # run is the entrypoint for the generator.
+ def run(self, wheels):
+ for whl in wheels:
+ try:
+ self.dig_wheel(whl)
+ except AssertionError as error:
+ print(error, file=self.stderr)
+ return 1
+ mapping_json = json.dumps(self.mapping)
+ with open(self.output_file, "w") as f:
+ f.write(mapping_json)
+ return 0
+
+
+def get_wheel_name(path):
+ pp = pathlib.PurePath(path)
+ if pp.suffix != ".whl":
+ raise RuntimeError(
+ "{} is not a valid wheel file name: the wheel doesn't follow ".format(
+ pp.name
+ )
+ + "https://www.python.org/dev/peps/pep-0427/#file-name-convention"
+ )
+ return pp.name[: pp.name.find("-")]
+
+
+# is_metadata checks if the path is in a metadata directory.
+# Ref: https://www.python.org/dev/peps/pep-0427/#file-contents.
+def is_metadata(path):
+ top_level = path.split("/")[0].lower()
+ return top_level.endswith(".dist-info") or top_level.endswith(".data")
+
+
+# The .data is allowed to contain a full purelib or platlib directory
+# These get unpacked into site-packages, so require indexing too.
+# This is the same if "Root-Is-Purelib: true" is set and the files are at the root.
+# Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib
+def data_has_purelib_or_platlib(path):
+ maybe_lib = path.split("/")[1].lower()
+ return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ prog="generator",
+ description="Generates the modules mapping used by the Gazelle manifest.",
+ )
+ parser.add_argument("--output_file", type=str)
+ parser.add_argument("--exclude_patterns", nargs="+", default=[])
+ parser.add_argument("--wheels", nargs="+", default=[])
+ args = parser.parse_args()
+ generator = Generator(sys.stderr, args.output_file, args.exclude_patterns)
+ exit(generator.run(args.wheels))