diff options
author | Alex Eagle <alex@aspect.dev> | 2023-08-17 07:52:12 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-17 07:52:12 -0700 |
commit | 6ad211b6f9da1675d342ba187a5e86f244de0394 (patch) | |
tree | 897b12ed3569fdf3bb2ec54a76b4844e69dfc576 | |
parent | 40e1477e71bf935b220741e9f811cf5de92e8770 (diff) | |
parent | ed49ca3f9f9fd7b526cfdbf7caec3db5a57797c6 (diff) | |
download | rules_pkg-6ad211b6f9da1675d342ba187a5e86f244de0394.tar.gz |
Make pkg_zip compression configurable (#737)
Zip offers different compression algorithms and levels. For packaging,
sometimes faster compression speed is more important than size. With
`pkg_tar`, there is already the option to use a custom compressor,
there's no similar option for `pkg_zip` yet.
This PR exposes the zip `compression_level` and `compression_type`
arguments for `pkg_zip`, thus enabling the use case.
The list of compressions is the same as Python's `ZipFile`:
https://docs.python.org/3/library/zipfile.html#zipfile.ZIP_STORED
There's a new test case that verifies that the compression settings work
by comparing the compressed file sizes.
The default compression level is the same as in this recent change for
`pkg_tar` in #720
-rw-r--r-- | pkg/private/zip/build_zip.py | 42 | ||||
-rw-r--r-- | pkg/private/zip/zip.bzl | 12 | ||||
-rw-r--r-- | tests/zip/BUILD | 35 | ||||
-rw-r--r-- | tests/zip/zip_test.py | 28 | ||||
-rw-r--r-- | tests/zip/zip_test_lib.py | 2 |
5 files changed, 111 insertions, 8 deletions
diff --git a/pkg/private/zip/build_zip.py b/pkg/private/zip/build_zip.py index fe8ffab..ca48a08 100644 --- a/pkg/private/zip/build_zip.py +++ b/pkg/private/zip/build_zip.py @@ -15,7 +15,9 @@ import argparse import datetime +import logging import os +import sys import zipfile from pkg.private import build_info @@ -46,6 +48,12 @@ def _create_argument_parser(): parser.add_argument( '-m', '--mode', help='The file system mode to use for files added into the zip.') + parser.add_argument( + '-c', '--compression_type', + help='The compression type to use') + parser.add_argument( + '-l', '--compression_level', + help='The compression level to use') parser.add_argument('--manifest', help='manifest of contents to add to the layer.', required=True) @@ -71,7 +79,7 @@ def parse_date(ts): class ZipWriter(object): - def __init__(self, output_path: str, time_stamp: int, default_mode: int): + def __init__(self, output_path: str, time_stamp: int, default_mode: int, compression_type: str, compression_level: int): """Create a writer. You must close() after use or use in a 'with' statement. @@ -84,7 +92,15 @@ class ZipWriter(object): self.output_path = output_path self.time_stamp = time_stamp self.default_mode = default_mode - self.zip_file = zipfile.ZipFile(self.output_path, mode='w') + compressions = { + "deflated": zipfile.ZIP_DEFLATED, + "lzma": zipfile.ZIP_LZMA, + "bzip2": zipfile.ZIP_BZIP2, + "stored": zipfile.ZIP_STORED + } + self.compression_type = compressions[compression_type] + self.compression_level = compression_level + self.zip_file = zipfile.ZipFile(self.output_path, mode='w', compression=self.compression_type) def __enter__(self): return self @@ -96,6 +112,15 @@ class ZipWriter(object): self.zip_file.close() self.zip_file = None + def writestr(self, entry_info, content: str, compresslevel: int): + if sys.version_info >= (3, 7): + self.zip_file.writestr(entry_info, content, compresslevel=compresslevel) + else: + # Python 3.6 and lower don't support compresslevel + self.zip_file.writestr(entry_info, content) + if compresslevel != 6: + logging.warn("Custom compresslevel is not supported with python < 3.7") + def make_zipinfo(self, path: str, mode: str): """Create a Zipinfo. @@ -141,10 +166,10 @@ class ZipWriter(object): entry_info = self.make_zipinfo(path=dst_path, mode=mode) if entry_type == manifest.ENTRY_IS_FILE: - entry_info.compress_type = zipfile.ZIP_DEFLATED + entry_info.compress_type = self.compression_type # Using utf-8 for the file names is for python <3.7 compatibility. with open(src.encode('utf-8'), 'rb') as src_content: - self.zip_file.writestr(entry_info, src_content.read()) + self.writestr(entry_info, src_content.read(), compresslevel=self.compression_level) elif entry_type == manifest.ENTRY_IS_DIR: entry_info.compress_type = zipfile.ZIP_STORED # Set directory bits @@ -158,7 +183,7 @@ class ZipWriter(object): elif entry_type == manifest.ENTRY_IS_TREE: self.add_tree(src, dst_path, mode) elif entry_type == manifest.ENTRY_IS_EMPTY_FILE: - entry_info.compress_type = zipfile.ZIP_DEFLATED + entry_info.compress_type = zipfile.ZIP_STORED self.zip_file.writestr(entry_info, '') else: raise Exception('Unknown type for manifest entry:', entry) @@ -213,9 +238,9 @@ class ZipWriter(object): else: f_mode = mode entry_info = self.make_zipinfo(path=path, mode=f_mode) - entry_info.compress_type = zipfile.ZIP_DEFLATED + entry_info.compress_type = self.compression_type with open(content_path, 'rb') as src: - self.zip_file.writestr(entry_info, src.read()) + self.writestr(entry_info, src.read(), compresslevel=self.compression_level) else: # Implicitly created directory dir_path = path @@ -266,10 +291,11 @@ def main(args): default_mode = None if args.mode: default_mode = int(args.mode, 8) + compression_level = int(args.compression_level) manifest = _load_manifest(args.directory, args.manifest) with ZipWriter( - args.output, time_stamp=ts, default_mode=default_mode) as zip_out: + args.output, time_stamp=ts, default_mode=default_mode, compression_type=args.compression_type, compression_level=compression_level) as zip_out: for entry in manifest: zip_out.add_manifest_entry(entry) diff --git a/pkg/private/zip/zip.bzl b/pkg/private/zip/zip.bzl index d66e536..9230de6 100644 --- a/pkg/private/zip/zip.bzl +++ b/pkg/private/zip/zip.bzl @@ -40,6 +40,8 @@ def _pkg_zip_impl(ctx): args.add("-d", substitute_package_variables(ctx, ctx.attr.package_dir)) args.add("-t", ctx.attr.timestamp) args.add("-m", ctx.attr.mode) + args.add("-c", str(ctx.attr.compression_type)) + args.add("-l", ctx.attr.compression_level) inputs = [] if ctx.attr.stamp == 1 or (ctx.attr.stamp == -1 and ctx.attr.private_stamp_detect): @@ -115,6 +117,16 @@ Jan 1, 1980 will be rounded up and the precision in the zip file is limited to a granularity of 2 seconds.""", default = 315532800, ), + "compression_level": attr.int( + default = 6, + doc = "The compression level to use, 1 is the fastest, 9 gives the smallest results. 0 skips compression, depending on the method used" + ), + "compression_type": attr.string( + default = "deflated", + doc = """The compression to use. Note that lzma and bzip2 might not be supported by all readers. +The list of compressions is the same as Python's ZipFile: https://docs.python.org/3/library/zipfile.html#zipfile.ZIP_STORED""", + values = ["deflated", "lzma", "bzip2", "stored"] + ), # Common attributes "out": attr.output( diff --git a/tests/zip/BUILD b/tests/zip/BUILD index 0a545ec..4b4898d 100644 --- a/tests/zip/BUILD +++ b/tests/zip/BUILD @@ -235,6 +235,37 @@ pkg_zip( package_variables = ":my_package_variables", ) +# Different compressions +pkg_zip( + name = "test_zip_deflated_level_3", + srcs = [ + "//tests:testdata/loremipsum.txt", + ], + compression_type = "deflated", + compression_level = 3, +) +pkg_zip( + name = "test_zip_bzip2", + srcs = [ + "//tests:testdata/loremipsum.txt", + ], + compression_type = "bzip2", +) +pkg_zip( + name = "test_zip_lzma", + srcs = [ + "//tests:testdata/loremipsum.txt", + ], + compression_type = "lzma", +) +pkg_zip( + name = "test_zip_stored", + srcs = [ + "//tests:testdata/loremipsum.txt", + ], + compression_type = "stored", +) + py_test( name = "zip_test", srcs = [ @@ -252,6 +283,10 @@ py_test( ":test_zip_permissions.zip", ":test_zip_timestamp.zip", ":test_zip_tree.zip", + ":test_zip_deflated_level_3", + ":test_zip_bzip2", + ":test_zip_lzma", + ":test_zip_stored", ], python_version = "PY3", deps = [ diff --git a/tests/zip/zip_test.py b/tests/zip/zip_test.py index f110ef7..e5955eb 100644 --- a/tests/zip/zip_test.py +++ b/tests/zip/zip_test.py @@ -15,6 +15,7 @@ import datetime import filecmp import os +import sys import unittest import zipfile @@ -123,6 +124,33 @@ class ZipContentsTests(zip_test_lib.ZipContentsTestBase): {"filename": "generate_tree/b/e"}, ]) + def test_compression_deflated(self): + if sys.version_info >= (3, 7): + self.assertZipFileContent("test_zip_deflated_level_3.zip", [ + {"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 312}, + ]) + else: + # Python 3.6 doesn't support setting compresslevel, so the file size differs + self.assertZipFileContent("test_zip_deflated_level_3.zip", [ + {"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 309}, + ]) + + def test_compression_bzip2(self): + self.assertZipFileContent("test_zip_bzip2.zip", [ + {"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 340}, + ]) + + def test_compression_lzma(self): + self.assertZipFileContent("test_zip_lzma.zip", [ + {"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 378}, + ]) + + def test_compression_stored(self): + self.assertZipFileContent("test_zip_stored.zip", [ + {"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 543}, + ]) + + if __name__ == "__main__": unittest.main() diff --git a/tests/zip/zip_test_lib.py b/tests/zip/zip_test_lib.py index e43c117..a9d237c 100644 --- a/tests/zip/zip_test_lib.py +++ b/tests/zip/zip_test_lib.py @@ -80,6 +80,8 @@ class ZipContentsTestBase(ZipTest): elif "isexe" in expected: got_mode = (info.external_attr >> 16) & UNIX_RX_BITS self.assertEqual(oct(got_mode), oct(UNIX_RX_BITS)) + elif "size" in expected: + self.assertEqual(info.compress_size, expected["size"]) else: if "attr" in expected: |