summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraiuto <aiuto@google.com>2023-02-13 23:25:09 -0500
committerGitHub <noreply@github.com>2023-02-13 23:25:09 -0500
commit7c2983c869ae88ca878af47f23d8b51c34b58c9e (patch)
tree2d78832c09a052b313962fa95821fc9ca618a7b4
parentf9cf7959cdeb88fe95e0fa1bbc9108c43336647c (diff)
downloadbazelbuild-rules_pkg-7c2983c869ae88ca878af47f23d8b51c34b58c9e.tar.gz
pkg_zip: Some unicode file handling fixes and basic tests (#641)
Add primitive zip unicode filename handling tests - Just make sure we can create an archive with non-ASCII file names.
-rw-r--r--.bazelci/tests.yml1
-rw-r--r--developers.md2
-rw-r--r--pkg/private/manifest.py9
-rw-r--r--pkg/private/zip/build_zip.py23
-rw-r--r--tests/BUILD20
-rw-r--r--tests/mappings/BUILD1
-rw-r--r--tests/zip/BUILD20
-rw-r--r--tests/zip/unicode_test.py34
-rw-r--r--tests/zip/zip_test_lib.py3
9 files changed, 97 insertions, 16 deletions
diff --git a/.bazelci/tests.yml b/.bazelci/tests.yml
index 88e3699..bc997e6 100644
--- a/.bazelci/tests.yml
+++ b/.bazelci/tests.yml
@@ -47,6 +47,7 @@ win_tests: &win_tests
# https://github.com/bazelbuild/bazel/issues?q=is%3Aissue+is%3Aopen+%2Bunicode+%2Bwindows+
- "-//tests/mappings:utf8_manifest_test"
- "-//tests/mappings/filter_directory/..."
+ - "-//tests/zip:unicode_test"
# See #387
- "-//tests/install/..."
# rpmbuild(8) is not supported on Windows
diff --git a/developers.md b/developers.md
index 17a95c0..611b1e0 100644
--- a/developers.md
+++ b/developers.md
@@ -87,6 +87,8 @@ organization will commit to maintaining that feature and responding to issues.
- We use Python 3. We are not trying to support Python 2 at all.
- Always import with a full path from the root of the source tree.
+- Compatibility back to python 3.6 (for CentOS 7 and Ubuntu 18.04 LTS) is
+ required.
### Dependencies
diff --git a/pkg/private/manifest.py b/pkg/private/manifest.py
index 0edda4e..57b7ddd 100644
--- a/pkg/private/manifest.py
+++ b/pkg/private/manifest.py
@@ -49,9 +49,16 @@ class ManifestEntry(object):
def read_entries_from(fh):
"""Return a list of ManifestEntry's from `fh`"""
- raw_entries = json.load(fh)
+ # Subtle: decode the content with read() rather than in json.load() because
+ # the load in older python releases (< 3.7?) does not know how to decode.
+ raw_entries = json.loads(fh.read())
return [ManifestEntry(**entry) for entry in raw_entries]
+def read_entries_from_file(manifest_path):
+ """Return a list of ManifestEntry's from the manifest file at `path`"""
+ with open(manifest_path, 'r', encoding='utf-8') as fh:
+ return read_entries_from(fh)
+
def entry_type_to_string(et):
"""Entry type stringifier"""
if et == ENTRY_IS_FILE:
diff --git a/pkg/private/zip/build_zip.py b/pkg/private/zip/build_zip.py
index 4ef8f65..64c4206 100644
--- a/pkg/private/zip/build_zip.py
+++ b/pkg/private/zip/build_zip.py
@@ -142,8 +142,9 @@ class ZipWriter(object):
if entry_type == manifest.ENTRY_IS_FILE:
entry_info.compress_type = zipfile.ZIP_DEFLATED
- with open(src, 'rb') as src:
- self.zip_file.writestr(entry_info, src.read())
+ # Using utf-8 for the file names is for python <3.7 compatibility.
+ with open(src.encode('utf-8'), 'rb') as src_content:
+ self.zip_file.writestr(entry_info, src_content.read())
elif entry_type == manifest.ENTRY_IS_DIR:
entry_info.compress_type = zipfile.ZIP_STORED
# Set directory bits
@@ -153,7 +154,7 @@ class ZipWriter(object):
entry_info.compress_type = zipfile.ZIP_STORED
# Set directory bits
entry_info.external_attr |= (UNIX_SYMLINK_BIT << 16)
- self.zip_file.writestr(entry_info, src)
+ self.zip_file.writestr(entry_info, src.encode('utf-8'))
elif entry_type == manifest.ENTRY_IS_TREE:
self.add_tree(src, dst_path, mode)
elif entry_type == manifest.ENTRY_IS_EMPTY_FILE:
@@ -226,9 +227,10 @@ class ZipWriter(object):
entry_info.external_attr |= (UNIX_DIR_BIT << 16) | MSDOS_DIR_BIT
self.zip_file.writestr(entry_info, '')
-def _load_manifest(prefix, manifest_fp):
+def _load_manifest(prefix, manifest_path):
manifest_map = {}
- for entry in manifest.read_entries_from(manifest_fp):
+
+ for entry in manifest.read_entries_from_file(manifest_path):
entry.dest = _combine_paths(prefix, entry.dest)
manifest_map[entry.dest] = entry
@@ -263,12 +265,11 @@ def main(args):
if args.mode:
default_mode = int(args.mode, 8)
- with open(args.manifest, 'r') as manifest_fp:
- manifest = _load_manifest(args.directory, manifest_fp)
- with ZipWriter(
- args.output, time_stamp=ts, default_mode=default_mode) as zip_out:
- for entry in manifest:
- zip_out.add_manifest_entry(entry)
+ manifest = _load_manifest(args.directory, args.manifest)
+ with ZipWriter(
+ args.output, time_stamp=ts, default_mode=default_mode) as zip_out:
+ for entry in manifest:
+ zip_out.add_manifest_entry(entry)
if __name__ == '__main__':
diff --git a/tests/BUILD b/tests/BUILD
index 7f4c989..21b6396 100644
--- a/tests/BUILD
+++ b/tests/BUILD
@@ -14,11 +14,10 @@
# -*- coding: utf-8 -*-
load("@rules_python//python:defs.bzl", "py_test")
-load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load(":my_package_name.bzl", "my_package_naming")
load(":path_test.bzl", "path_tests")
load("//pkg:deb.bzl", "pkg_deb")
-load("//pkg:mappings.bzl", "pkg_attributes", "pkg_mkdirs")
+load("//pkg:mappings.bzl", "pkg_attributes", "pkg_files", "strip_prefix")
load("//pkg:tar.bzl", "pkg_tar")
load("//pkg:zip.bzl", "pkg_zip")
@@ -53,6 +52,23 @@ filegroup(
srcs = glob(["**/*.txt"]),
)
+#
+# Data source for Unicode handling tests
+#
+pkg_files(
+ name = "utf8_files",
+ srcs = [
+ "//tests/testdata/utf8:files",
+ ],
+ # Note: This is temporary. We need to fix a latent bug where
+ # we are using 555 as the default for many things. That was the
+ # Google internal behavior.
+ # See https://github.com/bazelbuild/rules_pkg/issues/302 for thoughts.
+ attributes = pkg_attributes(mode = "0o555"),
+ strip_prefix = strip_prefix.from_pkg(),
+ visibility = ["//tests:__subpackages__"],
+)
+
py_test(
name = "archive_test",
srcs = [
diff --git a/tests/mappings/BUILD b/tests/mappings/BUILD
index 8de6c51..44752cb 100644
--- a/tests/mappings/BUILD
+++ b/tests/mappings/BUILD
@@ -109,6 +109,7 @@ pkg_files(
"//tests/testdata/utf8:files",
],
strip_prefix = strip_prefix.from_pkg(),
+ visibility = ["//tests:__subpackages__"],
)
write_content_manifest(
diff --git a/tests/zip/BUILD b/tests/zip/BUILD
index 56c739b..0a545ec 100644
--- a/tests/zip/BUILD
+++ b/tests/zip/BUILD
@@ -256,7 +256,6 @@ py_test(
python_version = "PY3",
deps = [
":zip_test_lib",
- "@bazel_tools//tools/python/runfiles",
],
)
@@ -282,3 +281,22 @@ py_test(
":zip_test_lib",
],
)
+
+pkg_zip(
+ name = "unicode_names",
+ srcs = ["//tests:utf8_files"]
+)
+
+py_test(
+ name = "unicode_test",
+ srcs = [
+ "unicode_test.py",
+ ],
+ data = [
+ ":unicode_names",
+ ],
+ python_version = "PY3",
+ deps = [
+ ":zip_test_lib",
+ ],
+)
diff --git a/tests/zip/unicode_test.py b/tests/zip/unicode_test.py
new file mode 100644
index 0000000..641394a
--- /dev/null
+++ b/tests/zip/unicode_test.py
@@ -0,0 +1,34 @@
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from tests.zip import zip_test_lib
+
+class UnicodeHandlingTests(zip_test_lib.ZipContentsTestBase):
+
+ def test_unicode_file_names(self):
+ self.assertZipFileContent("unicode_names.zip", [
+ {"filename": "1-a"},
+ {"filename": "2-λ"},
+ {"filename": "3-世"},
+ {"filename": "BUILD"},
+ {"filename": "sübdir/", "isdir": True},
+ {"filename": "sübdir/2-λ"},
+ {"filename": "sübdir/hello"},
+ ])
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/zip/zip_test_lib.py b/tests/zip/zip_test_lib.py
index 2832df8..ada8303 100644
--- a/tests/zip/zip_test_lib.py
+++ b/tests/zip/zip_test_lib.py
@@ -91,4 +91,5 @@ class ZipContentsTestBase(ZipTest):
# legacy rule implementation.
attr = 0o555
self.assertEqual(oct((info.external_attr >> 16) & UNIX_RWX_BITS),
- oct(attr))
+ oct(attr),
+ msg = info.filename)