diff options
author | Kelvin Zhang <zhangkelvin@google.com> | 2023-09-26 19:05:57 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-09-26 19:05:57 +0000 |
commit | 61e847414e43b0af36e7be2d26208a66575869e0 (patch) | |
tree | 339554911b8c7cab4a723e3f2bf8fca341d7c16f | |
parent | 89ab1fc157a0e7eb235c697f345db6bf194b9321 (diff) | |
parent | b0097b8dc70a57edfbf5543befe75485265ebe40 (diff) | |
download | erofs-utils-61e847414e43b0af36e7be2d26208a66575869e0.tar.gz |
Merge remote-tracking branch 'aosp/upstream-master' am: 65ccd2cccd am: 50b8ee3ebc am: fada4c7c37 am: b0097b8dc7
Original change: https://android-review.googlesource.com/c/platform/external/erofs-utils/+/2759796
Change-Id: I07647482436ebbd3883bbfa1738a9c6fc43caec2
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
75 files changed, 7349 insertions, 1713 deletions
diff --git a/.github/ISSUE_TEMPLATE.txt b/.github/ISSUE_TEMPLATE.txt new file mode 100644 index 0000000..0e736fb --- /dev/null +++ b/.github/ISSUE_TEMPLATE.txt @@ -0,0 +1,9 @@ +Please **do not** send pull-requests or open new issues on Github. + +Besides, the current erofs-utils repo is: +git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git + +Github is not _the place_ for EROFS development, and some mirrors +are actually unofficial and not frequently monitored. + +* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org diff --git a/.github/PULL_REQUEST_TEMPLATE.txt b/.github/PULL_REQUEST_TEMPLATE.txt new file mode 100644 index 0000000..0e736fb --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.txt @@ -0,0 +1,9 @@ +Please **do not** send pull-requests or open new issues on Github. + +Besides, the current erofs-utils repo is: +git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git + +Github is not _the place_ for EROFS development, and some mirrors +are actually unofficial and not frequently monitored. + +* Send bug reports and/or feedback to: linux-erofs@lists.ozlabs.org @@ -1,7 +1,7 @@ EROFS USERSPACE UTILITIES M: Li Guifu <bluce.lee@aliyun.com> M: Gao Xiang <xiang@kernel.org> -M: Huang Jianan <huangjianan@oppo.com> +M: Huang Jianan <jnhuang95@gmail.com> R: Chao Yu <chao@kernel.org> R: Miao Xie <miaoxie@huawei.com> R: Fang Wei <fangwei1@huawei.com> @@ -109,6 +109,9 @@ cc_library { srcs: [ "lib/*.c", ], + exclude_srcs: [ + "lib/compressor_libdeflate.c", + ], export_include_dirs: ["include"], target: { @@ -124,7 +127,6 @@ cc_defaults { defaults: ["erofs-utils_defaults"], srcs: [ - "lib/*.c", "mkfs/*.c", ], static_libs: [ @@ -159,7 +161,6 @@ cc_binary { host_supported: true, recovery_available: true, srcs: [ - "lib/*.c", "dump/*.c", ], static_libs: [ @@ -178,7 +179,6 @@ cc_binary { host_supported: true, recovery_available: true, srcs: [ - "lib/*.c", "fsck/*.c", ], static_libs: [ @@ -1,3 +1,19 @@ +erofs-utils 1.7 + + * This release includes the following updates: + - support arbitrary valid block sizes in addition to page size; + - (mkfs.erofs) arrange on-disk meta with Breadth-First Traversal instead; + - support long xattr name prefixes (Jingbo Xu); + - support UUID functionality without libuuid (Norbert Lange); + - (mkfs.erofs, experimental) add DEFLATE algorithm support; + - (mkfs.erofs, experimental) support building images directly from tarballs; + - (dump.erofs) print more superblock fields (Guo Xuenan); + - (mkfs.erofs, experimental) introduce preliminary rebuild mode (Jingbo Xu); + - various bugfixes and cleanups (Sandeep Dhavale, Guo Xuenan, Yue Hu, + Weizhao Ouyang, Kelvin Zhang, Noboru Asai, Yifan Zhao and Li Yiyan); + + -- Gao Xiang <xiang@kernel.org> Thu, 21 Sep 2023 00:00:00 +0800 + erofs-utils 1.6 * This release includes the following updates: @@ -1,2 +1,2 @@ -1.6 -2023-03-12 +1.7 +2023-09-21 diff --git a/configure.ac b/configure.ac index 4dbe86f..13ee616 100644 --- a/configure.ac +++ b/configure.ac @@ -29,6 +29,41 @@ else AC_MSG_ERROR([pkg-config is required. See pkg-config.freedesktop.org]) fi +dnl Check if the flag is supported by compiler +dnl CC_CHECK_CFLAGS_SILENT([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) +AC_DEFUN([CC_CHECK_CFLAGS_SILENT], [ + AC_CACHE_VAL(AS_TR_SH([cc_cv_cflags_$1]), + [ac_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $1" + AC_LINK_IFELSE([AC_LANG_SOURCE([int main() { return 0; }])], + [eval "AS_TR_SH([cc_cv_cflags_$1])='yes'"], + [eval "AS_TR_SH([cc_cv_cflags_$1])='no'"]) + CFLAGS="$ac_save_CFLAGS" + ]) + + AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes], + [$2], [$3]) +]) + +dnl Check if the flag is supported by compiler (cacheable) +dnl CC_CHECK_CFLAG([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) +AC_DEFUN([CC_CHECK_CFLAG], [ + AC_CACHE_CHECK([if $CC supports $1 flag], + AS_TR_SH([cc_cv_cflags_$1]), + CC_CHECK_CFLAGS_SILENT([$1]) dnl Don't execute actions here! + ) + + AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes], + [$2], [$3]) +]) + +dnl CC_CHECK_CFLAGS([FLAG1 FLAG2], [action-if-found], [action-if-not]) +AC_DEFUN([CC_CHECK_CFLAGS], [ + for flag in $1; do + CC_CHECK_CFLAG($flag, [$2], [$3]) + done +]) + dnl EROFS_UTILS_PARSE_DIRECTORY dnl Input: $1 = a string to a relative or absolute directory dnl Output: $2 = the variable to set with the absolute directory @@ -59,6 +94,8 @@ AC_DEFUN([EROFS_UTILS_PARSE_DIRECTORY], fi ]) +AC_ARG_VAR([MAX_BLOCK_SIZE], [The maximum block size which erofs-utils supports]) + AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug], [enable debugging mode @<:@default=no@:>@])], @@ -71,6 +108,12 @@ AC_ARG_ENABLE([werror], [enable_werror="$enableval"], [enable_werror="no"]) +AC_ARG_ENABLE([fuzzing], + [AS_HELP_STRING([--enable-fuzzing], + [set up fuzzing mode @<:@default=no@:>@])], + [enable_fuzzing="$enableval"], + [enable_fuzzing="no"]) + AC_ARG_ENABLE(lz4, [AS_HELP_STRING([--disable-lz4], [disable LZ4 compression support @<:@default=enabled@:>@])], [enable_lz4="$enableval"], [enable_lz4="yes"]) @@ -79,6 +122,15 @@ AC_ARG_ENABLE(lzma, [AS_HELP_STRING([--enable-lzma], [enable LZMA compression support @<:@default=no@:>@])], [enable_lzma="$enableval"], [enable_lzma="no"]) +AC_ARG_WITH(zlib, + [AS_HELP_STRING([--without-zlib], + [Ignore presence of zlib inflate support @<:@default=enabled@:>@])]) + +AC_ARG_WITH(libdeflate, + [AS_HELP_STRING([--with-libdeflate], + [Enable and build with libdeflate inflate support @<:@default=disabled@:>@])], [], + [with_libdeflate="no"]) + AC_ARG_ENABLE(fuse, [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])], [enable_fuse="$enableval"], [enable_fuse="no"]) @@ -124,6 +176,7 @@ AC_CHECK_HEADERS(m4_flatten([ fcntl.h getopt.h inttypes.h + linux/aufs_type.h linux/falloc.h linux/fs.h linux/types.h @@ -135,7 +188,9 @@ AC_CHECK_HEADERS(m4_flatten([ string.h sys/ioctl.h sys/mman.h + sys/random.h sys/stat.h + sys/statfs.h sys/sysmacros.h sys/time.h unistd.h @@ -195,6 +250,8 @@ AC_CHECK_FUNCS(m4_flatten([ ftello64 pread64 pwrite64 + posix_fadvise + fstatfs strdup strerror strrchr @@ -202,6 +259,35 @@ AC_CHECK_FUNCS(m4_flatten([ tmpfile64 utimensat])) +# Detect maximum block size if necessary +AS_IF([test "x$MAX_BLOCK_SIZE" = "x"], [ + AC_CACHE_CHECK([sysconf (_SC_PAGESIZE)], [erofs_cv_max_block_size], + AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[ +#include <unistd.h> +#include <stdio.h> +]], +[[ + int result; + FILE *f; + + result = sysconf(_SC_PAGESIZE); + if (result < 0) + return 1; + + f = fopen("conftest.out", "w"); + if (!f) + return 1; + + fprintf(f, "%d", result); + fclose(f); + return 0; +]])], + [erofs_cv_max_block_size=`cat conftest.out`], + [], + [])) +], [erofs_cv_max_block_size=$MAX_BLOCK_SIZE]) + # Configure debug mode AS_IF([test "x$enable_debug" != "xno"], [], [ dnl Turn off all assert checking. @@ -322,14 +408,53 @@ if test "x$enable_lzma" = "xyes"; then CPPFLAGS="${saved_CPPFLAGS}" fi +# Configure zlib +AS_IF([test "x$with_zlib" != "xno"], [ + PKG_CHECK_MODULES([zlib], [zlib]) + # Paranoia: don't trust the result reported by pkgconfig before trying out + saved_LIBS="$LIBS" + saved_CPPFLAGS=${CPPFLAGS} + CPPFLAGS="${zlib_CFLAGS} ${CPPFLAGS}" + LIBS="${zlib_LIBS} $LIBS" + AC_CHECK_LIB(z, inflate, [ + have_zlib="yes" ], [ + AC_MSG_ERROR([zlib doesn't work properly])]) + LIBS="${saved_LIBS}" + CPPFLAGS="${saved_CPPFLAGS}"], [have_zlib="no"]) + +# Configure libdeflate +AS_IF([test "x$with_libdeflate" != "xno"], [ + PKG_CHECK_MODULES([libdeflate], [libdeflate]) + # Paranoia: don't trust the result reported by pkgconfig before trying out + saved_LIBS="$LIBS" + saved_CPPFLAGS=${CPPFLAGS} + CPPFLAGS="${libdeflate_CFLAGS} ${CPPFLAGS}" + LIBS="${libdeflate_LIBS} $LIBS" + AC_CHECK_LIB(deflate, libdeflate_deflate_decompress, [ + have_libdeflate="yes" ], [ + AC_MSG_ERROR([libdeflate doesn't work properly])]) + LIBS="${saved_LIBS}" + CPPFLAGS="${saved_CPPFLAGS}"], [have_libdeflate="no"]) + # Enable 64-bit off_t CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" +# Configure fuzzing mode +AS_IF([test "x$enable_fuzzing" != "xyes"], [], [ + CC_CHECK_CFLAGS(["-fsanitize=address,fuzzer-no-link"], [ + CFLAGS="$CFLAGS -g -O1 -fsanitize=address,fuzzer-no-link" + ], [ + AC_MSG_ERROR([Compiler doesn't support `-fsanitize=address,fuzzer-no-link`]) + ]) +]) +AM_CONDITIONAL([ENABLE_FUZZING], [test "x${enable_fuzzing}" = "xyes"]) + # Set up needed symbols, conditionals and compiler/linker flags AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"]) AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"]) AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"]) AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"]) +AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"]) if test "x$have_uuid" = "xyes"; then AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found]) @@ -367,6 +492,21 @@ if test "x${have_liblzma}" = "xyes"; then AC_SUBST([liblzma_CFLAGS]) fi +if test "x$have_zlib" = "xyes"; then + AC_DEFINE([HAVE_ZLIB], 1, [Define to 1 if zlib is found]) +fi + +if test "x$have_libdeflate" = "xyes"; then + AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define to 1 if libdeflate is found]) +fi + +# Dump maximum block size +AS_IF([test "x$erofs_cv_max_block_size" = "x"], + [$erofs_cv_max_block_size = 4096], []) + +AC_DEFINE_UNQUOTED([EROFS_MAX_BLOCK_SIZE], [$erofs_cv_max_block_size], + [The maximum block size which erofs-utils supports]) + AC_CONFIG_FILES([Makefile man/Makefile lib/Makefile diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md index fdf4f79..5431856 100644 --- a/docs/PERFORMANCE.md +++ b/docs/PERFORMANCE.md @@ -36,7 +36,9 @@ Note that that dataset can be replaced regularly, and the SHA1 of the snapshot " ## Sequential data access +```bash hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf - . | cat > /dev/null" +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -49,7 +51,9 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf - . | cat > /d ## Sequential metadata access +```bash hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf /dev/null ." +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -64,8 +68,10 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "tar cf /dev/null ." ## Small random data access (~7%) +```bash find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null" +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -79,8 +85,10 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs ## Small random metadata access (~7%) +```bash find mnt -type f -printf "%p\n" | sort -R | head -n 500 > list.txt hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat" +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -93,8 +101,10 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs ## Full random data access (~100%) +```bash find mnt -type f -printf "%p\n" | sort -R > list.txt hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs cat > /dev/null" +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -107,8 +117,10 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs ## Full random metadata access (~100%) +```bash find mnt -type f -printf "%p\n" | sort -R > list.txt hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs stat" +``` | Filesystem | Cluster size | Time | |------------|--------------|---------------------------------| @@ -130,7 +142,7 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs |-----------|------------|--------------|-----------------------------------------------------------| | 114339840 | squashfs | 4096 | -b 4096 -comp lz4 -Xhc -no-xattrs | | 104972288 | erofs | 4096 | -zlz4hc,12 -C4096 | -| 98033664 | squashfs | 16384 | -b 4096 -comp lz4 -Xhc -no-xattrs | +| 98033664 | squashfs | 16384 | -b 16384 -comp lz4 -Xhc -no-xattrs | | 89571328 | erofs | 16384 | -zlz4hc,12 -C16384 | | 85143552 | squashfs | 65536 | -b 65536 -comp lz4 -Xhc -no-xattrs | | 81211392 | squashfs | 131072 | -b 131072 -comp lz4 -Xhc -no-xattrs | @@ -139,7 +151,9 @@ hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" "cat list.txt | xargs ## Sequential I/Os +```bash fio -filename=silesia.tar -bs=4k -rw=read -name=job1 +``` | Filesystem | Cluster size | Bandwidth | |------------|--------------|-----------| @@ -154,7 +168,9 @@ fio -filename=silesia.tar -bs=4k -rw=read -name=job1 ## Full Random I/Os +```bash fio -filename=silesia.tar -bs=4k -rw=randread -name=job1 +``` | Filesystem | Cluster size | Bandwidth | |------------|--------------|-----------| @@ -169,7 +185,9 @@ fio -filename=silesia.tar -bs=4k -rw=randread -name=job1 ## Small Random I/Os (~5%) +```bash fio -filename=silesia.tar -bs=4k -rw=randread --io_size=10m -name=job1 +``` | Filesystem | Cluster size | Bandwidth | |------------|--------------|-----------| diff --git a/dump/Makefile.am b/dump/Makefile.am index c2bef6d..aed20c2 100644 --- a/dump/Makefile.am +++ b/dump/Makefile.am @@ -7,4 +7,4 @@ AM_CPPFLAGS = ${libuuid_CFLAGS} dump_erofs_SOURCES = main.c dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} diff --git a/dump/main.c b/dump/main.c index bc4e028..5425b7b 100644 --- a/dump/main.c +++ b/dump/main.c @@ -17,10 +17,8 @@ #include "erofs/compress.h" #include "erofs/fragments.h" #include "../lib/liberofs_private.h" +#include "../lib/liberofs_uuid.h" -#ifdef HAVE_LIBUUID -#include <uuid.h> -#endif struct erofsdump_cfg { unsigned int totalshow; @@ -93,12 +91,16 @@ struct erofsdump_feature { static struct erofsdump_feature feature_lists[] = { { true, EROFS_FEATURE_COMPAT_SB_CHKSUM, "sb_csum" }, { true, EROFS_FEATURE_COMPAT_MTIME, "mtime" }, - { false, EROFS_FEATURE_INCOMPAT_LZ4_0PADDING, "0padding" }, + { true, EROFS_FEATURE_COMPAT_XATTR_FILTER, "xattr_filter" }, + { false, EROFS_FEATURE_INCOMPAT_ZERO_PADDING, "0padding" }, + { false, EROFS_FEATURE_INCOMPAT_COMPR_CFGS, "compr_cfgs" }, { false, EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER, "big_pcluster" }, { false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" }, { false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" }, { false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" }, { false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" }, + { false, EROFS_FEATURE_INCOMPAT_DEDUPE, "dedupe" }, + { false, EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES, "xattr_prefixes" }, }; static int erofsdump_readdir(struct erofs_dir_context *ctx); @@ -155,7 +157,7 @@ static int erofsdump_parse_options_cfg(int argc, char **argv) usage(); exit(0); case 3: - err = blob_open_ro(optarg); + err = blob_open_ro(&sbi, optarg); if (err) return err; ++sbi.extra_devices; @@ -200,10 +202,10 @@ static int erofsdump_get_occupied_size(struct erofs_inode *inode, stats.uncompressed_files++; *size = inode->i_size; break; - case EROFS_INODE_FLAT_COMPRESSION_LEGACY: - case EROFS_INODE_FLAT_COMPRESSION: + case EROFS_INODE_COMPRESSED_FULL: + case EROFS_INODE_COMPRESSED_COMPACT: stats.compressed_files++; - *size = inode->u.i_blocks * EROFS_BLKSIZ; + *size = inode->u.i_blocks * erofs_blksiz(inode->sbi); break; default: erofs_err("unknown datalayout"); @@ -271,9 +273,9 @@ static int erofsdump_read_packed_inode(void) { int err; erofs_off_t occupied_size = 0; - struct erofs_inode vi = { .nid = sbi.packed_nid }; + struct erofs_inode vi = { .sbi = &sbi, .nid = sbi.packed_nid }; - if (!erofs_sb_has_fragments()) + if (!(erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0)) return 0; err = erofs_read_inode_from_disk(&vi); @@ -297,7 +299,7 @@ static int erofsdump_readdir(struct erofs_dir_context *ctx) { int err; erofs_off_t occupied_size = 0; - struct erofs_inode vi = { .nid = ctx->de_nid }; + struct erofs_inode vi = { .sbi = &sbi, .nid = ctx->de_nid }; err = erofs_read_inode_from_disk(&vi); if (err) { @@ -352,7 +354,7 @@ static void erofsdump_show_fileinfo(bool show_extent) int err, i; erofs_off_t size; u16 access_mode; - struct erofs_inode inode = { .nid = dumpcfg.nid }; + struct erofs_inode inode = { .sbi = &sbi, .nid = dumpcfg.nid }; char path[PATH_MAX]; char access_mode_str[] = "rwxrwxrwx"; char timebuf[128] = {0}; @@ -383,7 +385,7 @@ static void erofsdump_show_fileinfo(bool show_extent) return; } - err = erofs_get_pathname(inode.nid, path, sizeof(path)); + err = erofs_get_pathname(inode.sbi, inode.nid, path, sizeof(path)); if (err < 0) { strncpy(path, "(not found)", sizeof(path) - 1); path[sizeof(path) - 1] = '\0'; @@ -448,7 +450,7 @@ static void erofsdump_show_fileinfo(bool show_extent) .m_deviceid = map.m_deviceid, .m_pa = map.m_pa, }; - err = erofs_map_dev(&sbi, &mdev); + err = erofs_map_dev(inode.sbi, &mdev); if (err) { erofs_err("failed to map device"); return; @@ -589,10 +591,27 @@ static void erofsdump_print_statistic(void) erofsdump_filetype_distribution(file_types, OTHERFILETYPE); } +static void erofsdump_print_supported_compressors(FILE *f, unsigned int mask) +{ + unsigned int i = 0; + bool comma = false; + const char *s; + + while ((s = z_erofs_list_supported_algorithms(i++, &mask)) != NULL) { + if (*s == '\0') + continue; + if (comma) + fputs(", ", f); + fputs(s, f); + comma = true; + } + fputc('\n', f); +} + static void erofsdump_show_superblock(void) { time_t time = sbi.build_time; - char uuid_str[37] = "not available"; + char uuid_str[37]; int i = 0; fprintf(stdout, "Filesystem magic number: 0x%04X\n", @@ -605,9 +624,19 @@ static void erofsdump_show_superblock(void) sbi.xattr_blkaddr); fprintf(stdout, "Filesystem root nid: %llu\n", sbi.root_nid | 0ULL); - if (erofs_sb_has_fragments()) + if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0) fprintf(stdout, "Filesystem packed nid: %llu\n", sbi.packed_nid | 0ULL); + if (erofs_sb_has_compr_cfgs(&sbi)) { + fprintf(stdout, "Filesystem compr_algs: "); + erofsdump_print_supported_compressors(stdout, + sbi.available_compr_algs); + } else { + fprintf(stdout, "Filesystem lz4_max_distance: %u\n", + sbi.lz4_max_distance | 0U); + } + fprintf(stdout, "Filesystem sb_extslots: %u\n", + sbi.extslots | 0U); fprintf(stdout, "Filesystem inode count: %llu\n", sbi.inos | 0ULL); fprintf(stdout, "Filesystem created: %s", @@ -620,9 +649,7 @@ static void erofsdump_show_superblock(void) if (feat & feature_lists[i].flag) fprintf(stdout, "%s ", feature_lists[i].name); } -#ifdef HAVE_LIBUUID - uuid_unparse_lower(sbi.uuid, uuid_str); -#endif + erofs_uuid_unparse_lower(sbi.uuid, uuid_str); fprintf(stdout, "\nFilesystem UUID: %s\n", uuid_str); } @@ -639,13 +666,13 @@ int main(int argc, char **argv) goto exit; } - err = dev_open_ro(cfg.c_img_path); + err = dev_open_ro(&sbi, cfg.c_img_path); if (err) { erofs_err("failed to open image file"); goto exit; } - err = erofs_read_superblock(); + err = erofs_read_superblock(&sbi); if (err) { erofs_err("failed to read superblock"); goto exit_dev_close; @@ -670,11 +697,11 @@ int main(int argc, char **argv) erofsdump_show_fileinfo(dumpcfg.show_extent); exit_put_super: - erofs_put_super(); + erofs_put_super(&sbi); exit_dev_close: - dev_close(); + dev_close(&sbi); exit: - blob_closeall(); + blob_closeall(&sbi); erofs_exit_configure(); return err; } diff --git a/fsck/Makefile.am b/fsck/Makefile.am index e6a1fb6..d024405 100644 --- a/fsck/Makefile.am +++ b/fsck/Makefile.am @@ -7,4 +7,13 @@ AM_CPPFLAGS = ${libuuid_CFLAGS} fsck_erofs_SOURCES = main.c fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} + +if ENABLE_FUZZING +noinst_PROGRAMS = fuzz_erofsfsck +fuzz_erofsfsck_SOURCES = main.c +fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING +fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer +fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} +endif diff --git a/fsck/main.c b/fsck/main.c index 6b42252..3f86da4 100644 --- a/fsck/main.c +++ b/fsck/main.c @@ -49,15 +49,27 @@ static struct option long_options[] = { {0, 0, 0, 0}, }; +#define NR_HARDLINK_HASHTABLE 16384 + +struct erofsfsck_hardlink_entry { + struct list_head list; + erofs_nid_t nid; + char *path; +}; + +static struct list_head erofsfsck_link_hashtable[NR_HARDLINK_HASHTABLE]; + static void print_available_decompressors(FILE *f, const char *delim) { - unsigned int i = 0; + int i = 0; + bool comma = false; const char *s; - while ((s = z_erofs_list_available_compressors(i)) != NULL) { - if (i++) + while ((s = z_erofs_list_available_compressors(&i)) != NULL) { + if (comma) fputs(delim, f); fputs(s, f); + comma = true; } fputc('\n', f); } @@ -131,6 +143,11 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv) while (len > 1 && optarg[len - 1] == '/') len--; + if (len >= PATH_MAX) { + erofs_err("target directory name too long!"); + return -ENAMETOOLONG; + } + fsckcfg.extract_path = malloc(PATH_MAX); if (!fsckcfg.extract_path) return -ENOMEM; @@ -143,7 +160,7 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv) } break; case 3: - ret = blob_open_ro(optarg); + ret = blob_open_ro(&sbi, optarg); if (ret) return ret; ++sbi.extra_devices; @@ -258,12 +275,13 @@ static void erofsfsck_set_attributes(struct erofs_inode *inode, char *path) static int erofs_check_sb_chksum(void) { - int ret; - u8 buf[EROFS_BLKSIZ]; +#ifndef FUZZING + u8 buf[EROFS_MAX_BLOCK_SIZE]; u32 crc; struct erofs_super_block *sb; + int ret; - ret = blk_read(0, buf, 0, 1); + ret = blk_read(&sbi, 0, buf, 0, 1); if (ret) { erofs_err("failed to read superblock to check checksum: %d", ret); @@ -273,18 +291,20 @@ static int erofs_check_sb_chksum(void) sb = (struct erofs_super_block *)(buf + EROFS_SUPER_OFFSET); sb->checksum = 0; - crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); + crc = erofs_crc32c(~0, (u8 *)sb, erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET); if (crc != sbi.checksum) { erofs_err("superblock chksum doesn't match: saved(%08xh) calculated(%08xh)", sbi.checksum, crc); fsckcfg.corrupted = true; return -1; } +#endif return 0; } static int erofs_verify_xattr(struct erofs_inode *inode) { + struct erofs_sb_info *sbi = inode->sbi; unsigned int xattr_hdr_size = sizeof(struct erofs_xattr_ibody_header); unsigned int xattr_entry_size = sizeof(struct erofs_xattr_entry); erofs_off_t addr; @@ -292,7 +312,7 @@ static int erofs_verify_xattr(struct erofs_inode *inode) struct erofs_xattr_ibody_header *ih; struct erofs_xattr_entry *entry; int i, remaining = inode->xattr_isize, ret = 0; - char buf[EROFS_BLKSIZ]; + char buf[EROFS_MAX_BLOCK_SIZE]; if (inode->xattr_isize == xattr_hdr_size) { erofs_err("xattr_isize %d of nid %llu is not supported yet", @@ -308,8 +328,8 @@ static int erofs_verify_xattr(struct erofs_inode *inode) } } - addr = iloc(inode->nid) + inode->inode_isize; - ret = dev_read(0, buf, addr, xattr_hdr_size); + addr = erofs_iloc(inode) + inode->inode_isize; + ret = dev_read(sbi, 0, buf, addr, xattr_hdr_size); if (ret < 0) { erofs_err("failed to read xattr header @ nid %llu: %d", inode->nid | 0ULL, ret); @@ -318,12 +338,12 @@ static int erofs_verify_xattr(struct erofs_inode *inode) ih = (struct erofs_xattr_ibody_header *)buf; xattr_shared_count = ih->h_shared_count; - ofs = erofs_blkoff(addr) + xattr_hdr_size; + ofs = erofs_blkoff(sbi, addr) + xattr_hdr_size; addr += xattr_hdr_size; remaining -= xattr_hdr_size; for (i = 0; i < xattr_shared_count; ++i) { - if (ofs >= EROFS_BLKSIZ) { - if (ofs != EROFS_BLKSIZ) { + if (ofs >= erofs_blksiz(sbi)) { + if (ofs != erofs_blksiz(sbi)) { erofs_err("unaligned xattr entry in xattr shared area @ nid %llu", inode->nid | 0ULL); ret = -EFSCORRUPTED; @@ -339,7 +359,7 @@ static int erofs_verify_xattr(struct erofs_inode *inode) while (remaining > 0) { unsigned int entry_sz; - ret = dev_read(0, buf, addr, xattr_entry_size); + ret = dev_read(sbi, 0, buf, addr, xattr_entry_size); if (ret) { erofs_err("failed to read xattr entry @ nid %llu: %d", inode->nid | 0ULL, ret); @@ -382,8 +402,8 @@ static int erofs_verify_inode_data(struct erofs_inode *inode, int outfd) case EROFS_INODE_CHUNK_BASED: compressed = false; break; - case EROFS_INODE_FLAT_COMPRESSION_LEGACY: - case EROFS_INODE_FLAT_COMPRESSION: + case EROFS_INODE_COMPRESSED_FULL: + case EROFS_INODE_COMPRESSED_COMPACT: compressed = true; break; default: @@ -392,6 +412,8 @@ static int erofs_verify_inode_data(struct erofs_inode *inode, int outfd) } while (pos < inode->i_size) { + unsigned int alloc_rawsize; + map.m_la = pos; if (compressed) ret = z_erofs_map_blocks_iter(inode, &map, @@ -420,10 +442,28 @@ static int erofs_verify_inode_data(struct erofs_inode *inode, int outfd) if (!(map.m_flags & EROFS_MAP_MAPPED) || !fsckcfg.check_decomp) continue; - if (map.m_plen > raw_size) { - raw_size = map.m_plen; - raw = realloc(raw, raw_size); - BUG_ON(!raw); + if (map.m_plen > Z_EROFS_PCLUSTER_MAX_SIZE) { + if (compressed) { + erofs_err("invalid pcluster size %" PRIu64 " @ offset %" PRIu64 " of nid %" PRIu64, + map.m_plen, map.m_la, + inode->nid | 0ULL); + ret = -EFSCORRUPTED; + goto out; + } + alloc_rawsize = Z_EROFS_PCLUSTER_MAX_SIZE; + } else { + alloc_rawsize = map.m_plen; + } + + if (alloc_rawsize > raw_size) { + char *newraw = realloc(raw, alloc_rawsize); + + if (!newraw) { + ret = -ENOMEM; + goto out; + } + raw = newraw; + raw_size = alloc_rawsize; } if (compressed) { @@ -434,25 +474,34 @@ static int erofs_verify_inode_data(struct erofs_inode *inode, int outfd) } ret = z_erofs_read_one_data(inode, &map, raw, buffer, 0, map.m_llen, false); + if (ret) + goto out; + + if (outfd >= 0 && write(outfd, buffer, map.m_llen) < 0) + goto fail_eio; } else { - ret = erofs_read_one_data(&map, raw, 0, map.m_plen); - } - if (ret) - goto out; + u64 p = 0; - if (outfd >= 0 && write(outfd, compressed ? buffer : raw, - map.m_llen) < 0) { - erofs_err("I/O error occurred when verifying data chunk @ nid %llu", - inode->nid | 0ULL); - ret = -EIO; - goto out; + do { + u64 count = min_t(u64, alloc_rawsize, + map.m_llen); + + ret = erofs_read_one_data(inode, &map, raw, p, count); + if (ret) + goto out; + + if (outfd >= 0 && write(outfd, raw, count) < 0) + goto fail_eio; + map.m_llen -= count; + p += count; + } while (map.m_llen); } } if (fsckcfg.print_comp_ratio) { if (!erofs_is_packed_inode(inode)) - fsckcfg.logical_blocks += BLK_ROUND_UP(inode->i_size); - fsckcfg.physical_blocks += BLK_ROUND_UP(pchunk_len); + fsckcfg.logical_blocks += BLK_ROUND_UP(inode->sbi, inode->i_size); + fsckcfg.physical_blocks += BLK_ROUND_UP(inode->sbi, pchunk_len); } out: if (raw) @@ -460,6 +509,12 @@ out: if (buffer) free(buffer); return ret < 0 ? ret : 0; + +fail_eio: + erofs_err("I/O error occurred when verifying data chunk @ nid %llu", + inode->nid | 0ULL); + ret = -EIO; + goto out; } static inline int erofs_extract_dir(struct erofs_inode *inode) @@ -508,6 +563,63 @@ static inline int erofs_extract_dir(struct erofs_inode *inode) return 0; } +static char *erofsfsck_hardlink_find(erofs_nid_t nid) +{ + struct list_head *head = + &erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE]; + struct erofsfsck_hardlink_entry *entry; + + list_for_each_entry(entry, head, list) + if (entry->nid == nid) + return entry->path; + return NULL; +} + +static int erofsfsck_hardlink_insert(erofs_nid_t nid, const char *path) +{ + struct erofsfsck_hardlink_entry *entry; + + entry = malloc(sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->nid = nid; + entry->path = strdup(path); + if (!entry->path) { + free(entry); + return -ENOMEM; + } + + list_add_tail(&entry->list, + &erofsfsck_link_hashtable[nid % NR_HARDLINK_HASHTABLE]); + return 0; +} + +static void erofsfsck_hardlink_init(void) +{ + unsigned int i; + + for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i) + init_list_head(&erofsfsck_link_hashtable[i]); +} + +static void erofsfsck_hardlink_exit(void) +{ + struct erofsfsck_hardlink_entry *entry, *n; + struct list_head *head; + unsigned int i; + + for (i = 0; i < NR_HARDLINK_HASHTABLE; ++i) { + head = &erofsfsck_link_hashtable[i]; + + list_for_each_entry_safe(entry, n, head, list) { + if (entry->path) + free(entry->path); + free(entry); + } + } +} + static inline int erofs_extract_file(struct erofs_inode *inode) { bool tryagain = true; @@ -645,28 +757,88 @@ again: static int erofsfsck_dirent_iter(struct erofs_dir_context *ctx) { int ret; - size_t prev_pos = fsckcfg.extract_pos; + size_t prev_pos, curr_pos; if (ctx->dot_dotdot) return 0; - if (fsckcfg.extract_path) { - size_t curr_pos = prev_pos; + prev_pos = fsckcfg.extract_pos; + curr_pos = prev_pos; + if (prev_pos + ctx->de_namelen >= PATH_MAX) { + erofs_err("unable to fsck since the path is too long (%u)", + curr_pos + ctx->de_namelen); + return -EOPNOTSUPP; + } + + if (fsckcfg.extract_path) { fsckcfg.extract_path[curr_pos++] = '/'; strncpy(fsckcfg.extract_path + curr_pos, ctx->dname, ctx->de_namelen); curr_pos += ctx->de_namelen; fsckcfg.extract_path[curr_pos] = '\0'; - fsckcfg.extract_pos = curr_pos; + } else { + curr_pos += ctx->de_namelen; } - + fsckcfg.extract_pos = curr_pos; ret = erofsfsck_check_inode(ctx->dir->nid, ctx->de_nid); - if (fsckcfg.extract_path) { + if (fsckcfg.extract_path) fsckcfg.extract_path[prev_pos] = '\0'; - fsckcfg.extract_pos = prev_pos; + fsckcfg.extract_pos = prev_pos; + return ret; +} + +static int erofsfsck_extract_inode(struct erofs_inode *inode) +{ + int ret; + char *oldpath; + + if (!fsckcfg.extract_path) { +verify: + /* verify data chunk layout */ + return erofs_verify_inode_data(inode, -1); + } + + oldpath = erofsfsck_hardlink_find(inode->nid); + if (oldpath) { + if (link(oldpath, fsckcfg.extract_path) == -1) { + erofs_err("failed to extract hard link: %s (%s)", + fsckcfg.extract_path, strerror(errno)); + return -errno; + } + return 0; + } + + switch (inode->i_mode & S_IFMT) { + case S_IFDIR: + ret = erofs_extract_dir(inode); + break; + case S_IFREG: + if (erofs_is_packed_inode(inode)) + goto verify; + ret = erofs_extract_file(inode); + break; + case S_IFLNK: + ret = erofs_extract_symlink(inode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + ret = erofs_extract_special(inode); + break; + default: + /* TODO */ + goto verify; } + if (ret && ret != -ECANCELED) + return ret; + + /* record nid and old path for hardlink */ + if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) + ret = erofsfsck_hardlink_insert(inode->nid, + fsckcfg.extract_path); return ret; } @@ -678,6 +850,7 @@ static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid) erofs_dbg("check inode: nid(%llu)", nid | 0ULL); inode.nid = nid; + inode.sbi = &sbi; ret = erofs_read_inode_from_disk(&inode); if (ret) { if (ret == -EIO) @@ -691,34 +864,7 @@ static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid) if (ret) goto out; - if (fsckcfg.extract_path) { - switch (inode.i_mode & S_IFMT) { - case S_IFDIR: - ret = erofs_extract_dir(&inode); - break; - case S_IFREG: - if (erofs_is_packed_inode(&inode)) - goto verify; - ret = erofs_extract_file(&inode); - break; - case S_IFLNK: - ret = erofs_extract_symlink(&inode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: - ret = erofs_extract_special(&inode); - break; - default: - /* TODO */ - goto verify; - } - } else { -verify: - /* verify data chunk layout */ - ret = erofs_verify_inode_data(&inode, -1); - } + ret = erofsfsck_extract_inode(&inode); if (ret && ret != -ECANCELED) goto out; @@ -745,7 +891,11 @@ out: return ret; } -int main(int argc, char **argv) +#ifdef FUZZING +int erofsfsck_fuzz_one(int argc, char *argv[]) +#else +int main(int argc, char *argv[]) +#endif { int err; @@ -772,28 +922,35 @@ int main(int argc, char **argv) goto exit; } - err = dev_open_ro(cfg.c_img_path); +#ifdef FUZZING + cfg.c_dbg_lvl = -1; +#endif + + err = dev_open_ro(&sbi, cfg.c_img_path); if (err) { erofs_err("failed to open image file"); goto exit; } - err = erofs_read_superblock(); + err = erofs_read_superblock(&sbi); if (err) { erofs_err("failed to read superblock"); goto exit_dev_close; } - if (erofs_sb_has_sb_chksum() && erofs_check_sb_chksum()) { + if (erofs_sb_has_sb_chksum(&sbi) && erofs_check_sb_chksum()) { erofs_err("failed to verify superblock checksum"); goto exit_put_super; } - if (erofs_sb_has_fragments()) { + if (fsckcfg.extract_path) + erofsfsck_hardlink_init(); + + if (erofs_sb_has_fragments(&sbi) && sbi.packed_nid > 0) { err = erofsfsck_check_inode(sbi.packed_nid, sbi.packed_nid); if (err) { erofs_err("failed to verify packed file"); - goto exit_put_super; + goto exit_hardlink; } } @@ -819,12 +976,40 @@ int main(int argc, char **argv) } } +exit_hardlink: + if (fsckcfg.extract_path) + erofsfsck_hardlink_exit(); exit_put_super: - erofs_put_super(); + erofs_put_super(&sbi); exit_dev_close: - dev_close(); + dev_close(&sbi); exit: - blob_closeall(); + blob_closeall(&sbi); erofs_exit_configure(); return err ? 1 : 0; } + +#ifdef FUZZING +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) +{ + int fd, ret; + char filename[] = "/tmp/erofsfsck_libfuzzer_XXXXXX"; + char *argv[] = { + "fsck.erofs", + "--extract", + filename, + }; + + fd = mkstemp(filename); + if (fd < 0) + return -errno; + if (write(fd, Data, Size) != Size) { + close(fd); + return -EIO; + } + close(fd); + ret = erofsfsck_fuzz_one(ARRAY_SIZE(argv), argv); + unlink(filename); + return ret ? -1 : 0; +} +#endif diff --git a/fuse/Makefile.am b/fuse/Makefile.am index 3179a2b..50be783 100644 --- a/fuse/Makefile.am +++ b/fuse/Makefile.am @@ -7,4 +7,4 @@ erofsfuse_SOURCES = main.c erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS} erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \ - ${libselinux_LIBS} ${liblzma_LIBS} + ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} diff --git a/fuse/main.c b/fuse/main.c index e6af890..821d98c 100644 --- a/fuse/main.c +++ b/fuse/main.c @@ -49,6 +49,7 @@ int erofsfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, }; erofs_dbg("readdir:%s offset=%llu", path, (long long)offset); + dir.sbi = &sbi; ret = erofs_ilookup(path, &dir); if (ret) return ret; @@ -84,7 +85,7 @@ static int erofsfuse_open(const char *path, struct fuse_file_info *fi) static int erofsfuse_getattr(const char *path, struct stat *stbuf) { - struct erofs_inode vi = {}; + struct erofs_inode vi = { .sbi = &sbi }; int ret; erofs_dbg("getattr(%s)", path); @@ -95,7 +96,7 @@ static int erofsfuse_getattr(const char *path, struct stat *stbuf) stbuf->st_mode = vi.i_mode; stbuf->st_nlink = vi.i_nlink; stbuf->st_size = vi.i_size; - stbuf->st_blocks = roundup(vi.i_size, EROFS_BLKSIZ) >> 9; + stbuf->st_blocks = roundup(vi.i_size, erofs_blksiz(vi.sbi)) >> 9; stbuf->st_uid = vi.i_uid; stbuf->st_gid = vi.i_gid; if (S_ISBLK(vi.i_mode) || S_ISCHR(vi.i_mode)) @@ -115,6 +116,7 @@ static int erofsfuse_read(const char *path, char *buffer, erofs_dbg("path:%s size=%zd offset=%llu", path, size, (long long)offset); + vi.sbi = &sbi; ret = erofs_ilookup(path, &vi); if (ret) return ret; @@ -155,6 +157,7 @@ static int erofsfuse_getxattr(const char *path, const char *name, char *value, erofs_dbg("getxattr(%s): name=%s size=%llu", path, name, size); + vi.sbi = &sbi; ret = erofs_ilookup(path, &vi); if (ret) return ret; @@ -169,6 +172,7 @@ static int erofsfuse_listxattr(const char *path, char *list, size_t size) erofs_dbg("listxattr(%s): size=%llu", path, size); + vi.sbi = &sbi; ret = erofs_ilookup(path, &vi); if (ret) return ret; @@ -244,7 +248,7 @@ static int optional_opt_func(void *data, const char *arg, int key, switch (key) { case 1: - ret = blob_open_ro(arg + sizeof("--device=") - 1); + ret = blob_open_ro(&sbi, arg + sizeof("--device=") - 1); if (ret) return -1; ++sbi.extra_devices; @@ -325,13 +329,13 @@ int main(int argc, char *argv[]) cfg.c_offset = fusecfg.offset; erofsfuse_dumpcfg(); - ret = dev_open_ro(fusecfg.disk); + ret = dev_open_ro(&sbi, fusecfg.disk); if (ret) { fprintf(stderr, "failed to open: %s\n", fusecfg.disk); goto err_fuse_free_args; } - ret = erofs_read_superblock(); + ret = erofs_read_superblock(&sbi); if (ret) { fprintf(stderr, "failed to read erofs super block\n"); goto err_dev_close; @@ -339,10 +343,10 @@ int main(int argc, char *argv[]) ret = fuse_main(args.argc, args.argv, &erofs_ops, NULL); - erofs_put_super(); + erofs_put_super(&sbi); err_dev_close: - blob_closeall(); - dev_close(); + blob_closeall(&sbi); + dev_close(&sbi); err_fuse_free_args: fuse_opt_free_args(&args); err: diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h index 49cb7bf..89c8048 100644 --- a/include/erofs/blobchunk.h +++ b/include/erofs/blobchunk.h @@ -14,12 +14,16 @@ extern "C" #include "erofs/internal.h" +struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id, + erofs_blk_t blkaddr, erofs_off_t sourceoffset); int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off); -int erofs_blob_write_chunked_file(struct erofs_inode *inode); -int erofs_blob_remap(void); +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, + erofs_off_t startoff); +int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset); +int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi); void erofs_blob_exit(void); int erofs_blob_init(const char *blobfile_path); -int erofs_generate_devtable(void); +int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices); #ifdef __cplusplus } diff --git a/include/erofs/block_list.h b/include/erofs/block_list.h index 78fab44..9f9975e 100644 --- a/include/erofs/block_list.h +++ b/include/erofs/block_list.h @@ -13,9 +13,12 @@ extern "C" #include "internal.h" +int erofs_blocklist_open(char *filename, bool srcmap); +void erofs_blocklist_close(void); + +void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks, + erofs_off_t srcoff); #ifdef WITH_ANDROID -int erofs_droid_blocklist_fopen(void); -void erofs_droid_blocklist_fclose(void); void erofs_droid_blocklist_write(struct erofs_inode *inode, erofs_blk_t blk_start, erofs_blk_t nblocks); void erofs_droid_blocklist_write_tail_end(struct erofs_inode *inode, diff --git a/include/erofs/cache.h b/include/erofs/cache.h index de12399..de5584e 100644 --- a/include/erofs/cache.h +++ b/include/erofs/cache.h @@ -22,10 +22,12 @@ struct erofs_buffer_block; #define META 1 /* including inline xattrs, extent */ #define INODE 2 +/* directory data */ +#define DIRA 3 /* shared xattrs */ -#define XATTR 3 +#define XATTR 4 /* device table */ -#define DEVT 4 +#define DEVT 5 struct erofs_bhops { bool (*preflush)(struct erofs_buffer_head *bh); @@ -55,11 +57,14 @@ struct erofs_buffer_block { static inline const int get_alignsize(int type, int *type_ret) { if (type == DATA) - return EROFS_BLKSIZ; + return erofs_blksiz(&sbi); if (type == INODE) { *type_ret = META; return sizeof(struct erofs_inode_compact); + } else if (type == DIRA) { + *type_ret = META; + return erofs_blksiz(&sbi); } else if (type == XATTR) { *type_ret = META; return sizeof(struct erofs_xattr_entry); @@ -75,7 +80,6 @@ static inline const int get_alignsize(int type, int *type_ret) extern const struct erofs_bhops erofs_drop_directly_bhops; extern const struct erofs_bhops erofs_skip_write_bhops; -extern const struct erofs_bhops erofs_buf_write_bhops; static inline erofs_off_t erofs_btell(struct erofs_buffer_head *bh, bool end) { @@ -84,7 +88,7 @@ static inline erofs_off_t erofs_btell(struct erofs_buffer_head *bh, bool end) if (bb->blkaddr == NULL_ADDR) return NULL_ADDR_UL; - return blknr_to_addr(bb->blkaddr) + + return erofs_pos(&sbi, bb->blkaddr) + (end ? list_next_entry(bh, list)->off : bh->off); } @@ -108,6 +112,7 @@ erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb); bool erofs_bflush(struct erofs_buffer_block *bb); void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke); +erofs_blk_t erofs_total_metablocks(void); #ifdef __cplusplus } diff --git a/include/erofs/compress.h b/include/erofs/compress.h index 08af9e3..46cff03 100644 --- a/include/erofs/compress.h +++ b/include/erofs/compress.h @@ -19,18 +19,22 @@ extern "C" void z_erofs_drop_inline_pcluster(struct erofs_inode *inode); int erofs_write_compressed_file(struct erofs_inode *inode, int fd); -int z_erofs_compress_init(struct erofs_buffer_head *bh); +int z_erofs_compress_init(struct erofs_sb_info *sbi, + struct erofs_buffer_head *bh); int z_erofs_compress_exit(void); -const char *z_erofs_list_available_compressors(unsigned int i); +const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask); +const char *z_erofs_list_available_compressors(int *i); static inline bool erofs_is_packed_inode(struct erofs_inode *inode) { + erofs_nid_t packed_nid = inode->sbi->packed_nid; + if (inode->nid == EROFS_PACKED_NID_UNALLOCATED) { - DBG_BUGON(sbi.packed_nid != EROFS_PACKED_NID_UNALLOCATED); + DBG_BUGON(packed_nid != EROFS_PACKED_NID_UNALLOCATED); return true; } - return (sbi.packed_nid > 0 && inode->nid == sbi.packed_nid); + return (packed_nid > 0 && inode->nid == packed_nid); } #ifdef __cplusplus diff --git a/include/erofs/compress_hints.h b/include/erofs/compress_hints.h index d836f22..9f0d8ae 100644 --- a/include/erofs/compress_hints.h +++ b/include/erofs/compress_hints.h @@ -25,7 +25,7 @@ struct erofs_compress_hints { bool z_erofs_apply_compress_hints(struct erofs_inode *inode); void erofs_cleanup_compress_hints(void); -int erofs_load_compress_hints(void); +int erofs_load_compress_hints(struct erofs_sb_info *sbi); #ifdef __cplusplus } diff --git a/include/erofs/config.h b/include/erofs/config.h index 648a3e8..e342722 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -45,13 +45,16 @@ struct erofs_configure { #endif char c_timeinherit; char c_chunkbits; - bool c_noinline_data; + bool c_inline_data; bool c_ztailpacking; bool c_fragments; bool c_all_fragments; bool c_dedupe; bool c_ignore_mtime; bool c_showprogress; + bool c_extra_ea_name_prefixes; + bool c_xattr_name_filter; + bool c_ovlfs_strip; #ifdef HAVE_LIBSELINUX struct selabel_handle *sehnd; diff --git a/include/erofs/decompress.h b/include/erofs/decompress.h index a9067cb..0d55483 100644 --- a/include/erofs/decompress.h +++ b/include/erofs/decompress.h @@ -14,6 +14,7 @@ extern "C" #include "internal.h" struct z_erofs_decompress_req { + struct erofs_sb_info *sbi; char *in, *out; /* diff --git a/include/erofs/defs.h b/include/erofs/defs.h index e5aa23c..fefa7e7 100644 --- a/include/erofs/defs.h +++ b/include/erofs/defs.h @@ -179,9 +179,29 @@ typedef int64_t s64; #define __maybe_unused __attribute__((__unused__)) #endif -static inline u32 get_unaligned_le32(const u8 *p) +#define __packed __attribute__((__packed__)) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) +#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpu(__get_unaligned_t(__le32, p)); +} + +static inline void put_unaligned_le32(u32 val, void *p) { - return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; + __put_unaligned_t(__le32, cpu_to_le32(val), p); } /** @@ -266,6 +286,11 @@ static inline unsigned int fls_long(unsigned long x) return x ? sizeof(x) * 8 - __builtin_clz(x) : 0; } +static inline unsigned long lowbit(unsigned long n) +{ + return n & -n; +} + /** * __roundup_pow_of_two() - round up to nearest power of two * @n: value to round up @@ -313,11 +338,6 @@ unsigned long __roundup_pow_of_two(unsigned long n) #define ST_MTIM_NSEC(stbuf) 0 #endif -#ifdef __APPLE__ -#define stat64 stat -#define lstat64 lstat -#endif - #ifdef __cplusplus } #endif diff --git a/include/erofs/dir.h b/include/erofs/dir.h index 74bffb5..5460ac4 100644 --- a/include/erofs/dir.h +++ b/include/erofs/dir.h @@ -62,7 +62,8 @@ struct erofs_dir_context { /* Iterate over inodes that are in directory */ int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck); /* Get a full pathname of the inode NID */ -int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size); +int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid, + char *buf, size_t size); #ifdef __cplusplus } diff --git a/include/erofs/diskbuf.h b/include/erofs/diskbuf.h new file mode 100644 index 0000000..29d9fe2 --- /dev/null +++ b/include/erofs/diskbuf.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ +#ifndef __EROFS_DISKBUF_H +#define __EROFS_DISKBUF_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "erofs/defs.h" + +struct erofs_diskbuf { + void *sp; /* internal stream pointer */ + u64 offset; /* internal offset */ +}; + +int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *off); + +int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off); +void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len); +void erofs_diskbuf_close(struct erofs_diskbuf *db); + +int erofs_diskbuf_init(unsigned int nstrms); +void erofs_diskbuf_exit(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/erofs/err.h b/include/erofs/err.h index 08b0bdb..2ae9e21 100644 --- a/include/erofs/err.h +++ b/include/erofs/err.h @@ -33,6 +33,12 @@ static inline long PTR_ERR(const void *ptr) return (long) ptr; } +static inline void * ERR_CAST(const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} + #ifdef __cplusplus } #endif diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h index 21753ec..4c6f755 100644 --- a/include/erofs/fragments.h +++ b/include/erofs/fragments.h @@ -12,17 +12,20 @@ extern "C" #include "erofs/internal.h" -extern const char *frags_packedname; -#define EROFS_PACKED_INODE frags_packedname +extern const char *erofs_frags_packedname; +#define EROFS_PACKED_INODE erofs_frags_packedname + +FILE *erofs_packedfile_init(void); +void erofs_packedfile_exit(void); +struct erofs_inode *erofs_mkfs_build_packedfile(void); int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc); int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc); int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, unsigned int len, u32 tofcrc); void z_erofs_fragments_commit(struct erofs_inode *inode); -struct erofs_inode *erofs_mkfs_build_fragments(void); -int erofs_fragments_init(void); -void erofs_fragments_exit(void); +int z_erofs_fragments_init(void); +void z_erofs_fragments_exit(void); #ifdef __cplusplus } diff --git a/include/erofs/hashmap.h b/include/erofs/hashmap.h index 3d38578..d25092d 100644 --- a/include/erofs/hashmap.h +++ b/include/erofs/hashmap.h @@ -61,7 +61,7 @@ struct hashmap_iter { /* hashmap functions */ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size); -void hashmap_free(struct hashmap *map, int free_entries); +int hashmap_free(struct hashmap *map); /* hashmap_entry functions */ static inline void hashmap_entry_init(void *entry, unsigned int hash) @@ -75,8 +75,7 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash) void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata); void *hashmap_get_next(const struct hashmap *map, const void *entry); void hashmap_add(struct hashmap *map, void *entry); -void *hashmap_put(struct hashmap *map, void *entry); -void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata); +void *hashmap_remove(struct hashmap *map, const void *key); static inline void *hashmap_get_from_hash(const struct hashmap *map, unsigned int hash, diff --git a/include/erofs/inode.h b/include/erofs/inode.h index bf20cd3..bcfd98e 100644 --- a/include/erofs/inode.h +++ b/include/erofs/inode.h @@ -15,13 +15,29 @@ extern "C" #include "erofs/internal.h" +static inline struct erofs_inode *erofs_igrab(struct erofs_inode *inode) +{ + ++inode->i_count; + return inode; +} + +u32 erofs_new_encode_dev(dev_t dev); unsigned char erofs_mode_to_ftype(umode_t mode); unsigned char erofs_ftype_to_dtype(unsigned int filetype); void erofs_inode_manager_init(void); +void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino); +struct erofs_inode *erofs_iget(dev_t dev, ino_t ino); +struct erofs_inode *erofs_iget_by_nid(erofs_nid_t nid); unsigned int erofs_iput(struct erofs_inode *inode); erofs_nid_t erofs_lookupnid(struct erofs_inode *inode); -struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent, - const char *path); +struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent, + const char *name); +int erofs_rebuild_dump_tree(struct erofs_inode *dir); +int erofs_init_empty_dir(struct erofs_inode *dir); +int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st, + const char *path); +struct erofs_inode *erofs_new_inode(void); +struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path); struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name); #ifdef __cplusplus diff --git a/include/erofs/internal.h b/include/erofs/internal.h index d4ae3b8..d859905 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -17,36 +17,20 @@ extern "C" typedef unsigned short umode_t; -#define __packed __attribute__((__packed__)) - #include "erofs_fs.h" #include <fcntl.h> #include <sys/types.h> /* for off_t definition */ +#include <sys/stat.h> /* for S_ISCHR definition */ +#include <stdio.h> #ifndef PATH_MAX #define PATH_MAX 4096 /* # chars in a path name including nul */ #endif -#ifndef PAGE_SHIFT -#define PAGE_SHIFT (12) -#endif - -#ifndef PAGE_SIZE -#define PAGE_SIZE (1U << PAGE_SHIFT) -#endif - -/* no obvious reason to support explicit PAGE_SIZE != 4096 for now */ -#if PAGE_SIZE != 4096 -#warning EROFS may be incompatible on your platform -#endif - -#ifndef PAGE_MASK -#define PAGE_MASK (~(PAGE_SIZE-1)) +#ifndef EROFS_MAX_BLOCK_SIZE +#define EROFS_MAX_BLOCK_SIZE 4096 #endif -#define LOG_BLOCK_SIZE (12) -#define EROFS_BLKSIZ (1U << LOG_BLOCK_SIZE) - #define EROFS_ISLOTBITS 5 #define EROFS_SLOTSIZE (1U << EROFS_ISLOTBITS) @@ -58,23 +42,33 @@ typedef u32 erofs_blk_t; #define NULL_ADDR ((unsigned int)-1) #define NULL_ADDR_UL ((unsigned long)-1) -#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) -#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ) -#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ) +/* global sbi */ +extern struct erofs_sb_info sbi; -#define BLK_ROUND_UP(addr) DIV_ROUND_UP(addr, EROFS_BLKSIZ) +#define erofs_blksiz(sbi) (1u << (sbi)->blkszbits) +#define erofs_blknr(sbi, addr) ((addr) >> (sbi)->blkszbits) +#define erofs_blkoff(sbi, addr) ((addr) & (erofs_blksiz(sbi) - 1)) +#define erofs_pos(sbi, nr) ((erofs_off_t)(nr) << (sbi)->blkszbits) +#define BLK_ROUND_UP(sbi, addr) DIV_ROUND_UP(addr, erofs_blksiz(sbi)) struct erofs_buffer_head; struct erofs_device_info { + u8 tag[64]; u32 blocks; u32 mapped_blkaddr; }; +struct erofs_xattr_prefix_item { + struct erofs_xattr_long_prefix *prefix; + u8 infix_len; +}; + #define EROFS_PACKED_NID_UNALLOCATED -1 struct erofs_sb_info { struct erofs_device_info *devs; + char *devname; u64 total_blocks; u64 primarydevice_blocks; @@ -87,7 +81,9 @@ struct erofs_sb_info { u64 build_time; u32 build_time_nsec; + u8 extslots; unsigned char islotbits; + unsigned char blkszbits; /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; @@ -107,35 +103,40 @@ struct erofs_sb_info { u16 device_id_mask; /* used for others */ }; erofs_nid_t packed_nid; -}; + u32 xattr_prefix_start; + u8 xattr_prefix_count; + struct erofs_xattr_prefix_item *xattr_prefixes; -/* make sure that any user of the erofs headers has atleast 64bit off_t type */ -extern int erofs_assert_largefile[sizeof(off_t)-8]; + int devfd, devblksz; + u64 devsz; + dev_t dev; + unsigned int nblobs; + unsigned int blobfd[256]; -/* global sbi */ -extern struct erofs_sb_info sbi; + struct list_head list; -static inline erofs_off_t iloc(erofs_nid_t nid) -{ - return blknr_to_addr(sbi.meta_blkaddr) + (nid << sbi.islotbits); -} + u64 saved_by_deduplication; +}; + +/* make sure that any user of the erofs headers has atleast 64bit off_t type */ +extern int erofs_assert_largefile[sizeof(off_t)-8]; #define EROFS_FEATURE_FUNCS(name, compat, feature) \ -static inline bool erofs_sb_has_##name(void) \ +static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ { \ - return sbi.feature_##compat & EROFS_FEATURE_##feature; \ + return sbi->feature_##compat & EROFS_FEATURE_##feature; \ } \ -static inline void erofs_sb_set_##name(void) \ +static inline void erofs_sb_set_##name(struct erofs_sb_info *sbi) \ { \ - sbi.feature_##compat |= EROFS_FEATURE_##feature; \ + sbi->feature_##compat |= EROFS_FEATURE_##feature; \ } \ -static inline void erofs_sb_clear_##name(void) \ +static inline void erofs_sb_clear_##name(struct erofs_sb_info *sbi) \ { \ - sbi.feature_##compat &= ~EROFS_FEATURE_##feature; \ + sbi->feature_##compat &= ~EROFS_FEATURE_##feature; \ } -EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_ZERO_PADDING) EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) @@ -143,23 +144,31 @@ EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) +EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) +EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) #define EROFS_I_EA_INITED (1 << 0) #define EROFS_I_Z_INITED (1 << 1) +struct erofs_diskbuf; + struct erofs_inode { struct list_head i_hash, i_subdirs, i_xattrs; union { /* (erofsfuse) runtime flags */ unsigned int flags; - /* (mkfs.erofs) device ID containing source file */ - u32 dev; + /* (mkfs.erofs) queued sub-directories blocking dump */ + u32 subdirs_queued; }; unsigned int i_count; + struct erofs_sb_info *sbi; struct erofs_inode *i_parent; + /* (mkfs.erofs) device ID containing source file */ + u32 dev; + umode_t i_mode; erofs_off_t i_size; @@ -181,12 +190,20 @@ struct erofs_inode { } u; char *i_srcpath; - + union { + char *i_link; + struct erofs_diskbuf *i_diskbuf; + }; unsigned char datalayout; unsigned char inode_isize; /* inline tail-end packing size */ unsigned short idata_size; bool compressed_idata; + bool lazy_tailblock; + bool with_diskbuf; + bool opaque; + /* OVL: non-merge dir that may contain whiteout entries */ + bool whiteouts; unsigned int xattr_isize; unsigned int extent_isize; @@ -224,6 +241,14 @@ struct erofs_inode { unsigned int fragment_size; }; +static inline erofs_off_t erofs_iloc(struct erofs_inode *inode) +{ + struct erofs_sb_info *sbi = inode->sbi; + + return erofs_pos(sbi, sbi->meta_blkaddr) + + (inode->nid << sbi->islotbits); +} + static inline bool is_inode_layout_compression(struct erofs_inode *inode) { return erofs_inode_is_data_compressed(inode->datalayout); @@ -310,7 +335,7 @@ enum { #define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref) struct erofs_map_blocks { - char mpage[EROFS_BLKSIZ]; + char mpage[EROFS_MAX_BLOCK_SIZE]; erofs_off_t m_pa, m_la; u64 m_plen, m_llen; @@ -341,13 +366,12 @@ struct erofs_map_dev { }; /* super.c */ -int erofs_read_superblock(void); -void erofs_put_super(void); +int erofs_read_superblock(struct erofs_sb_info *sbi); +void erofs_put_super(struct erofs_sb_info *sbi); /* namei.c */ int erofs_read_inode_from_disk(struct erofs_inode *vi); int erofs_ilookup(const char *path, struct erofs_inode *vi); -int erofs_read_inode_from_disk(struct erofs_inode *vi); /* data.c */ int erofs_pread(struct erofs_inode *inode, char *buf, @@ -355,11 +379,13 @@ int erofs_pread(struct erofs_inode *inode, char *buf, int erofs_map_blocks(struct erofs_inode *inode, struct erofs_map_blocks *map, int flags); int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map); -int erofs_read_one_data(struct erofs_map_blocks *map, char *buffer, u64 offset, - size_t len); +int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map, + char *buffer, u64 offset, size_t len); int z_erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map, char *raw, char *buffer, erofs_off_t skip, erofs_off_t length, bool trimmed); +void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid, + erofs_off_t *offset, int *lengthp); static inline int erofs_get_occupied_size(const struct erofs_inode *inode, erofs_off_t *size) @@ -371,12 +397,12 @@ static inline int erofs_get_occupied_size(const struct erofs_inode *inode, case EROFS_INODE_CHUNK_BASED: *size = inode->i_size; break; - case EROFS_INODE_FLAT_COMPRESSION_LEGACY: - case EROFS_INODE_FLAT_COMPRESSION: - *size = inode->u.i_blocks * EROFS_BLKSIZ; + case EROFS_INODE_COMPRESSED_FULL: + case EROFS_INODE_COMPRESSED_COMPACT: + *size = inode->u.i_blocks * erofs_blksiz(inode->sbi); break; default: - return -ENOTSUP; + return -EOPNOTSUPP; } return 0; } @@ -410,6 +436,12 @@ static inline u32 erofs_crc32c(u32 crc, const u8 *in, size_t len) return crc; } +#define EROFS_WHITEOUT_DEV 0 +static inline bool erofs_inode_is_whiteout(struct erofs_inode *inode) +{ + return S_ISCHR(inode->i_mode) && inode->u.i_rdev == EROFS_WHITEOUT_DEV; +} + #ifdef __cplusplus } #endif diff --git a/include/erofs/io.h b/include/erofs/io.h index 0f58c70..4db5716 100644 --- a/include/erofs/io.h +++ b/include/erofs/io.h @@ -22,36 +22,36 @@ extern "C" #define O_BINARY 0 #endif -void blob_closeall(void); -int blob_open_ro(const char *dev); -int dev_open(const char *devname); -int dev_open_ro(const char *dev); -void dev_close(void); -int dev_write(const void *buf, u64 offset, size_t len); -int dev_read(int device_id, void *buf, u64 offset, size_t len); -int dev_fillzero(u64 offset, size_t len, bool padding); -int dev_fsync(void); -int dev_resize(erofs_blk_t nblocks); -u64 dev_length(void); - -extern int erofs_devfd; +void blob_closeall(struct erofs_sb_info *sbi); +int blob_open_ro(struct erofs_sb_info *sbi, const char *dev); +int dev_open(struct erofs_sb_info *sbi, const char *devname); +int dev_open_ro(struct erofs_sb_info *sbi, const char *dev); +void dev_close(struct erofs_sb_info *sbi); +int dev_write(struct erofs_sb_info *sbi, const void *buf, + u64 offset, size_t len); +int dev_read(struct erofs_sb_info *sbi, int device_id, + void *buf, u64 offset, size_t len); +int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, + size_t len, bool padding); +int dev_fsync(struct erofs_sb_info *sbi); +int dev_resize(struct erofs_sb_info *sbi, erofs_blk_t nblocks); ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in, int fd_out, erofs_off_t *off_out, size_t length); -static inline int blk_write(const void *buf, erofs_blk_t blkaddr, - u32 nblocks) +static inline int blk_write(struct erofs_sb_info *sbi, const void *buf, + erofs_blk_t blkaddr, u32 nblocks) { - return dev_write(buf, blknr_to_addr(blkaddr), - blknr_to_addr(nblocks)); + return dev_write(sbi, buf, erofs_pos(sbi, blkaddr), + erofs_pos(sbi, nblocks)); } -static inline int blk_read(int device_id, void *buf, +static inline int blk_read(struct erofs_sb_info *sbi, int device_id, void *buf, erofs_blk_t start, u32 nblocks) { - return dev_read(device_id, buf, blknr_to_addr(start), - blknr_to_addr(nblocks)); + return dev_read(sbi, device_id, buf, erofs_pos(sbi, start), + erofs_pos(sbi, nblocks)); } #ifdef __cplusplus diff --git a/include/erofs/list.h b/include/erofs/list.h index 3f5da1a..d7a9fee 100644 --- a/include/erofs/list.h +++ b/include/erofs/list.h @@ -70,6 +70,26 @@ static inline int list_empty(struct list_head *head) return head->next == head; } +static inline void __list_splice(struct list_head *list, + struct list_head *prev, struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +static inline void list_splice_tail(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head->prev, head); +} + #define list_entry(ptr, type, member) container_of(ptr, type, member) #define list_first_entry(ptr, type, member) \ diff --git a/include/erofs/rebuild.h b/include/erofs/rebuild.h new file mode 100644 index 0000000..e99ce74 --- /dev/null +++ b/include/erofs/rebuild.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ +#ifndef __EROFS_REBUILD_H +#define __EROFS_REBUILD_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "internal.h" + +struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, + char *path, bool aufs, bool *whout, bool *opq, bool to_head); + +int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/erofs/tar.h b/include/erofs/tar.h new file mode 100644 index 0000000..a76f740 --- /dev/null +++ b/include/erofs/tar.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ +#ifndef __EROFS_TAR_H +#define __EROFS_TAR_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#if defined(HAVE_ZLIB) +#include <zlib.h> +#endif +#include <sys/stat.h> + +#include "internal.h" + +struct erofs_pax_header { + struct stat st; + struct list_head xattrs; + bool use_mtime; + bool use_size; + bool use_uid; + bool use_gid; + char *path, *link; +}; + +#define EROFS_IOS_DECODER_NONE 0 +#define EROFS_IOS_DECODER_GZIP 1 + +struct erofs_iostream { + union { + int fd; /* original fd */ + void *handler; + }; + u64 sz; + char *buffer; + unsigned int head, tail, bufsize; + int decoder; + bool feof; +}; + +struct erofs_tarfile { + struct erofs_pax_header global; + struct erofs_iostream ios; + char *mapfile; + + int fd; + u64 offset; + bool index_mode, aufs; +}; + +void erofs_iostream_close(struct erofs_iostream *ios); +int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder); +int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/erofs/xattr.h b/include/erofs/xattr.h index a0528c0..0f76037 100644 --- a/include/erofs/xattr.h +++ b/include/erofs/xattr.h @@ -24,15 +24,17 @@ static inline unsigned int inlinexattr_header_size(struct erofs_inode *vi) sizeof(u32) * vi->xattr_shared_count; } -static inline erofs_blk_t xattrblock_addr(unsigned int xattr_id) +static inline erofs_blk_t xattrblock_addr(struct erofs_inode *vi, + unsigned int xattr_id) { - return sbi.xattr_blkaddr + - xattr_id * sizeof(__u32) / EROFS_BLKSIZ; + return vi->sbi->xattr_blkaddr + + erofs_blknr(vi->sbi, xattr_id * sizeof(__u32)); } -static inline unsigned int xattrblock_offset(unsigned int xattr_id) +static inline unsigned int xattrblock_offset(struct erofs_inode *vi, + unsigned int xattr_id) { - return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ; + return erofs_blkoff(vi->sbi, xattr_id * sizeof(__u32)); } #define EROFS_INODE_XATTR_ICOUNT(_size) ({\ @@ -41,34 +43,23 @@ static inline unsigned int xattrblock_offset(unsigned int xattr_id) (_size - sizeof(struct erofs_xattr_ibody_header)) / \ sizeof(struct erofs_xattr_entry) + 1; }) -#ifndef XATTR_USER_PREFIX -#define XATTR_USER_PREFIX "user." -#endif -#ifndef XATTR_USER_PREFIX_LEN -#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) -#endif -#ifndef XATTR_SECURITY_PREFIX -#define XATTR_SECURITY_PREFIX "security." -#endif -#ifndef XATTR_SECURITY_PREFIX_LEN -#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) -#endif -#ifndef XATTR_TRUSTED_PREFIX -#define XATTR_TRUSTED_PREFIX "trusted." -#endif -#ifndef XATTR_TRUSTED_PREFIX_LEN -#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) -#endif -#ifndef XATTR_NAME_POSIX_ACL_ACCESS -#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access" -#endif -#ifndef XATTR_NAME_POSIX_ACL_DEFAULT -#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default" -#endif - +int erofs_scan_file_xattrs(struct erofs_inode *inode); int erofs_prepare_xattr_ibody(struct erofs_inode *inode); -char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size); -int erofs_build_shared_xattrs_from_path(const char *path); +char *erofs_export_xattr_ibody(struct erofs_inode *inode); +int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path); + +int erofs_xattr_insert_name_prefix(const char *prefix); +void erofs_xattr_cleanup_name_prefixes(void); +int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f); +void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi); +int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi); + +int erofs_setxattr(struct erofs_inode *inode, char *key, + const void *value, size_t size); +int erofs_set_opaque_xattr(struct erofs_inode *inode); +void erofs_clear_opaque_xattr(struct erofs_inode *inode); +int erofs_set_origin_xattr(struct erofs_inode *inode); +int erofs_read_xattrs_from_disk(struct erofs_inode *inode); #ifdef __cplusplus } diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h new file mode 100644 index 0000000..5441209 --- /dev/null +++ b/include/erofs/xxhash.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */ +#ifndef __EROFS_XXHASH_H +#define __EROFS_XXHASH_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include <stdint.h> + +/** + * xxh32() - calculate the 32-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * Return: The 32-bit hash of the data. + */ +uint32_t xxh32(const void *input, size_t length, uint32_t seed); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/erofs_fs.h b/include/erofs_fs.h index 8835a76..eba6c26 100644 --- a/include/erofs_fs.h +++ b/include/erofs_fs.h @@ -3,7 +3,7 @@ * EROFS (Enhanced ROM File System) on-disk format definition * * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ + * https://www.huawei.com/ * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_FS_H @@ -12,40 +12,42 @@ #define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 #define EROFS_SUPER_OFFSET 1024 -#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 -#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 +#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should * be incompatible with this kernel version. */ -#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 +#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 +#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 +#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 #define EROFS_ALL_FEATURE_INCOMPAT \ - (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ - EROFS_FEATURE_INCOMPAT_DEDUPE) + EROFS_FEATURE_INCOMPAT_DEDUPE | \ + EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES) #define EROFS_SB_EXTSLOT_SIZE 16 struct erofs_deviceslot { - union { - u8 uuid[16]; /* used for device manager later */ - u8 userdata[64]; /* digest(sha256), etc. */ - } u; - __le32 blocks; /* total fs blocks of this device */ - __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 tag[64]; /* digest(sha256), etc. */ + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ u8 reserved[56]; }; #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) @@ -55,14 +57,14 @@ struct erofs_super_block { __le32 magic; /* file system magic number */ __le32 checksum; /* crc32c(super_block) */ __le32 feature_compat; - __u8 blkszbits; /* support block_size == PAGE_SIZE only */ + __u8 blkszbits; /* filesystem block size in bit shift */ __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ __le16 root_nid; /* nid of root directory */ __le64 inos; /* total valid ino # (== f_files - f_favail) */ - __le64 build_time; /* inode v1 time derivation */ - __le32 build_time_nsec; /* inode v1 time derivation in nano scale */ + __le64 build_time; /* compact inode time derivation */ + __le32 build_time_nsec; /* compact inode time derivation in ns scale */ __le32 blocks; /* used for statfs */ __le32 meta_blkaddr; /* start block address of metadata area */ __le32 xattr_blkaddr; /* start block address of shared xattr area */ @@ -77,41 +79,39 @@ struct erofs_super_block { } __packed u1; __le16 extra_devices; /* # of devices besides the primary device */ __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ - __u8 reserved[6]; + __u8 dirblkbits; /* directory block size in bit shift */ + __u8 xattr_prefix_count; /* # of long xattr name prefixes */ + __le32 xattr_prefix_start; /* start of long xattr prefixes */ __le64 packed_nid; /* nid of the special packed inode */ - __u8 reserved2[24]; + __u8 xattr_filter_reserved; /* reserved for xattr name filter */ + __u8 reserved2[23]; }; /* - * erofs inode datalayout (i_format in on-disk inode): - * 0 - inode plain without inline data A: - * inode, [xattrs], ... | ... | no-holed data - * 1 - inode VLE compression B (legacy): - * inode, [xattrs], extents ... | ... - * 2 - inode plain with inline data C: - * inode, [xattrs], last_inline_data, ... | ... | no-holed data - * 3 - inode compression D: - * inode, [xattrs], map_header, extents ... | ... - * 4 - inode chunk-based E: - * inode, [xattrs], chunk indexes ... | ... + * EROFS inode datalayout (i_format in on-disk inode): + * 0 - uncompressed flat inode without tail-packing inline data: + * 1 - compressed inode with non-compact indexes: + * 2 - uncompressed flat inode with tail-packing inline data: + * 3 - compressed inode with compact indexes: + * 4 - chunk-based inode with (optional) multi-device support: * 5~7 - reserved */ enum { EROFS_INODE_FLAT_PLAIN = 0, - EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, + EROFS_INODE_COMPRESSED_FULL = 1, EROFS_INODE_FLAT_INLINE = 2, - EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_COMPRESSED_COMPACT = 3, EROFS_INODE_CHUNK_BASED = 4, EROFS_INODE_DATALAYOUT_MAX }; static inline bool erofs_inode_is_data_compressed(unsigned int datamode) { - return datamode == EROFS_INODE_FLAT_COMPRESSION || - datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; + return datamode == EROFS_INODE_COMPRESSED_COMPACT || + datamode == EROFS_INODE_COMPRESSED_FULL; } -/* bit definitions of inode i_advise */ +/* bit definitions of inode i_format */ #define EROFS_I_VERSION_BITS 1 #define EROFS_I_DATALAYOUT_BITS 3 @@ -129,11 +129,30 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) #define EROFS_CHUNK_FORMAT_ALL \ (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) +/* 32-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_COMPACT 0 +/* 64-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_EXTENDED 1 + struct erofs_inode_chunk_info { __le16 format; /* chunk blkbits, etc. */ __le16 reserved; }; +union erofs_inode_i_u { + /* total compressed blocks for compressed inodes */ + __le32 compressed_blocks; + + /* block address for uncompressed flat inodes */ + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; +}; + /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { __le16 i_format; /* inode format hints */ @@ -144,28 +163,14 @@ struct erofs_inode_compact { __le16 i_nlink; __le32 i_size; __le32 i_reserved; - union { - /* file total compressed blocks for data mapping 1 */ - __le32 compressed_blocks; - __le32 raw_blkaddr; + union erofs_inode_i_u i_u; - /* for device files, used to indicate old/new device # */ - __le32 rdev; - - /* for chunk-based files, it contains the summary info */ - struct erofs_inode_chunk_info c; - } i_u; - __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le32 i_ino; /* only used for 32-bit stat compatibility */ __le16 i_uid; __le16 i_gid; __le32 i_reserved2; }; -/* 32 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_COMPACT 0 -/* 64 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_EXTENDED 1 - /* 64-byte complete form of an ondisk inode */ struct erofs_inode_extended { __le16 i_format; /* inode format hints */ @@ -175,21 +180,9 @@ struct erofs_inode_extended { __le16 i_mode; __le16 i_reserved; __le64 i_size; - union { - /* file total compressed blocks for data mapping 1 */ - __le32 compressed_blocks; - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - - /* for chunk-based files, it contains the summary info */ - struct erofs_inode_chunk_info c; - } i_u; - - /* only used for 32-bit stat compatibility */ - __le32 i_ino; + union erofs_inode_i_u i_u; + __le32 i_ino; /* only used for 32-bit stat compatibility */ __le32 i_uid; __le32 i_gid; __le64 i_mtime; @@ -198,10 +191,6 @@ struct erofs_inode_extended { __u8 i_reserved2[16]; }; -#define EROFS_MAX_SHARED_XATTRS (128) -/* h_shared_count between 129 ... 255 are special # */ -#define EROFS_SHARED_XATTR_EXTENT (255) - /* * inline xattrs (n == i_xattr_icount): * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes @@ -214,7 +203,7 @@ struct erofs_inode_extended { * for read-only fs, no need to introduce h_refcount */ struct erofs_xattr_ibody_header { - __le32 h_reserved; + __le32 h_name_filter; /* bit value 1 indicates not-present */ __u8 h_shared_count; __u8 h_reserved2[7]; __le32 h_shared_xattrs[0]; /* shared xattr id array */ @@ -228,6 +217,17 @@ struct erofs_xattr_ibody_header { #define EROFS_XATTR_INDEX_LUSTRE 5 #define EROFS_XATTR_INDEX_SECURITY 6 +/* + * bit 7 of e_name_index is set when it refers to a long xattr name prefix, + * while the remained lower bits represent the index of the prefix. + */ +#define EROFS_XATTR_LONG_PREFIX 0x80 +#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f + +#define EROFS_XATTR_FILTER_BITS 32 +#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX +#define EROFS_XATTR_FILTER_SEED 0x25BBE08F + /* xattr entry (for both inline & shared xattrs) */ struct erofs_xattr_entry { __u8 e_name_len; /* length of name */ @@ -237,6 +237,12 @@ struct erofs_xattr_entry { char e_name[0]; /* attribute name */ }; +/* long xattr name prefix */ +struct erofs_xattr_long_prefix { + __u8 base_index; /* short xattr name prefix index */ + char infix[0]; /* infix apart from short prefix */ +}; + static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) { if (!i_xattr_icount) @@ -267,6 +273,29 @@ struct erofs_inode_chunk_index { __le32 blkaddr; /* start block address of this inode chunk */ }; +/* dirent sorts in alphabet order, thus we can do binary search */ +struct erofs_dirent { + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ +} __packed; + +/* file types used in inode_info->flags */ +enum { + EROFS_FT_UNKNOWN, + EROFS_FT_REG_FILE, + EROFS_FT_DIR, + EROFS_FT_CHRDEV, + EROFS_FT_BLKDEV, + EROFS_FT_FIFO, + EROFS_FT_SOCK, + EROFS_FT_SYMLINK, + EROFS_FT_MAX +}; + +#define EROFS_NAME_LEN 255 + /* maximum supported size of a physical compression cluster */ #define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) @@ -274,9 +303,10 @@ struct erofs_inode_chunk_index { enum { Z_EROFS_COMPRESSION_LZ4 = 0, Z_EROFS_COMPRESSION_LZMA = 1, + Z_EROFS_COMPRESSION_DEFLATE = 2, Z_EROFS_COMPRESSION_MAX }; -#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) +#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) /* 14 bytes (+ length field = 16 bytes) */ struct z_erofs_lz4_cfgs { @@ -291,8 +321,15 @@ struct z_erofs_lzma_cfgs { __le16 format; u8 reserved[8]; } __packed; + #define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) +/* 6 bytes (+ length field = 8 bytes) */ +struct z_erofs_deflate_cfgs { + u8 windowbits; /* 8..15 for DEFLATE */ + u8 reserved[5]; +} __packed; + /* * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) * e.g. for 4k logical cluster size, 4B if compacted 2B is off; @@ -310,15 +347,15 @@ struct z_erofs_lzma_cfgs { #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 #define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 -#define Z_EROFS_FRAGMENT_INODE_BIT 7 +#define Z_EROFS_FRAGMENT_INODE_BIT 7 struct z_erofs_map_header { union { /* fragment data offset in the packed inode */ - __le32 h_fragmentoff; + __le32 h_fragmentoff; struct { __le16 h_reserved1; /* indicates the encoded size of tailpacking data */ - __le16 h_idata_size; + __le16 h_idata_size; }; }; __le16 h_advise; @@ -335,105 +372,81 @@ struct z_erofs_map_header { __u8 h_clusterbits; }; -#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 - /* - * Fixed-sized output compression ondisk Logical Extent cluster type: - * 0 - literal (uncompressed) cluster - * 1 - compressed cluster (for the head logical cluster) - * 2 - compressed cluster (for the other logical clusters) + * On-disk logical cluster type: + * 0 - literal (uncompressed) lcluster + * 1,3 - compressed lcluster (for HEAD lclusters) + * 2 - compressed lcluster (for NONHEAD lclusters) * * In detail, - * 0 - literal (uncompressed) cluster, + * 0 - literal (uncompressed) lcluster, * di_advise = 0 - * di_clusterofs = the literal data offset of the cluster - * di_blkaddr = the blkaddr of the literal cluster + * di_clusterofs = the literal data offset of the lcluster + * di_blkaddr = the blkaddr of the literal pcluster * - * 1 - compressed cluster (for the head logical cluster) - * di_advise = 1 - * di_clusterofs = the decompressed data offset of the cluster - * di_blkaddr = the blkaddr of the compressed cluster + * 1,3 - compressed lcluster (for HEAD lclusters) + * di_advise = 1 or 3 + * di_clusterofs = the decompressed data offset of the lcluster + * di_blkaddr = the blkaddr of the compressed pcluster * - * 2 - compressed cluster (for the other logical clusters) + * 2 - compressed lcluster (for NONHEAD lclusters) * di_advise = 2 * di_clusterofs = - * the decompressed data offset in its own head cluster - * di_u.delta[0] = distance to its corresponding head cluster - * di_u.delta[1] = distance to its corresponding tail cluster - * (di_advise could be 0, 1 or 2) + * the decompressed data offset in its own HEAD lcluster + * di_u.delta[0] = distance to this HEAD lcluster + * di_u.delta[1] = distance to the next HEAD lcluster */ enum { - Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1, - Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, - Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3, - Z_EROFS_VLE_CLUSTER_TYPE_MAX + Z_EROFS_LCLUSTER_TYPE_PLAIN = 0, + Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1, + Z_EROFS_LCLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_LCLUSTER_TYPE_HEAD2 = 3, + Z_EROFS_LCLUSTER_TYPE_MAX }; -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 +#define Z_EROFS_LI_LCLUSTER_TYPE_BITS 2 +#define Z_EROFS_LI_LCLUSTER_TYPE_BIT 0 /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ -#define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15) +#define Z_EROFS_LI_PARTIAL_REF (1 << 15) /* * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the * compressed block count of a compressed extent (in logical clusters, aka. * block count of a pcluster). */ -#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11) +#define Z_EROFS_LI_D0_CBLKCNT (1 << 11) -struct z_erofs_vle_decompressed_index { +struct z_erofs_lcluster_index { __le16 di_advise; - /* where to decompress in the head cluster */ + /* where to decompress in the head lcluster */ __le16 di_clusterofs; union { - /* for the head cluster */ + /* for the HEAD lclusters */ __le32 blkaddr; /* - * for the rest clusters - * eg. for 4k page-sized cluster, maximum 4K*64k = 256M) - * [0] - pointing to the head cluster - * [1] - pointing to the tail cluster + * for the NONHEAD lclusters + * [0] - distance to its HEAD lcluster + * [1] - distance to the next HEAD lcluster */ __le16 delta[2]; } di_u; }; -#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ - (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ - sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) - -#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \ - sizeof(struct z_erofs_vle_decompressed_index)) - -/* dirent sorts in alphabet order, thus we can do binary search */ -struct erofs_dirent { - __le64 nid; /* node number */ - __le16 nameoff; /* start offset of file name */ - __u8 file_type; /* file type */ - __u8 reserved; /* reserved */ -} __packed; - -/* file types used in inode_info->flags */ -enum { - EROFS_FT_UNKNOWN, - EROFS_FT_REG_FILE, - EROFS_FT_DIR, - EROFS_FT_CHRDEV, - EROFS_FT_BLKDEV, - EROFS_FT_FIFO, - EROFS_FT_SOCK, - EROFS_FT_SYMLINK, - EROFS_FT_MAX -}; - -#define EROFS_NAME_LEN 255 +#define Z_EROFS_FULL_INDEX_ALIGN(end) \ + (round_up(end, 8) + sizeof(struct z_erofs_map_header) + 8) /* check the EROFS on-disk layout strictly at compile time */ static inline void erofs_check_ondisk_layout_definitions(void) { + const union { + struct z_erofs_map_header h; + __le64 v; + } fmh __maybe_unused = { + .h.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT, + }; + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); @@ -442,15 +455,18 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); - BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_lcluster_index) != 8); BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); /* keep in sync between 2 index structures for better extendibility */ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != - sizeof(struct z_erofs_vle_decompressed_index)); + sizeof(struct z_erofs_lcluster_index)); BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); - BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < - Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); + BUILD_BUG_ON(BIT(Z_EROFS_LI_LCLUSTER_TYPE_BITS) < + Z_EROFS_LCLUSTER_TYPE_MAX - 1); + /* exclude old compiler versions like gcc 7.5.0 */ + BUILD_BUG_ON(__builtin_constant_p(fmh.v) ? + fmh.v != cpu_to_le64(1ULL << 63) : 0); } #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index faa7311..483d410 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -9,6 +9,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \ $(top_srcdir)/include/erofs/config.h \ $(top_srcdir)/include/erofs/decompress.h \ $(top_srcdir)/include/erofs/defs.h \ + $(top_srcdir)/include/erofs/diskbuf.h \ $(top_srcdir)/include/erofs/err.h \ $(top_srcdir)/include/erofs/exclude.h \ $(top_srcdir)/include/erofs/flex-array.h \ @@ -19,19 +20,23 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \ $(top_srcdir)/include/erofs/io.h \ $(top_srcdir)/include/erofs/list.h \ $(top_srcdir)/include/erofs/print.h \ + $(top_srcdir)/include/erofs/tar.h \ $(top_srcdir)/include/erofs/trace.h \ $(top_srcdir)/include/erofs/xattr.h \ $(top_srcdir)/include/erofs/compress_hints.h \ $(top_srcdir)/include/erofs/fragments.h \ + $(top_srcdir)/include/erofs/xxhash.h \ + $(top_srcdir)/include/erofs/rebuild.h \ $(top_srcdir)/lib/liberofs_private.h noinst_HEADERS += compressor.h liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \ namei.c data.c compress.c compressor.c zmap.c decompress.c \ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \ - fragments.c rb_tree.c dedupe.c + fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \ + block_list.c xxhash.c rebuild.c diskbuf.c -liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include +liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include if ENABLE_LZ4 liberofs_la_CFLAGS += ${LZ4_CFLAGS} liberofs_la_SOURCES += compressor_lz4.c @@ -43,3 +48,8 @@ if ENABLE_LIBLZMA liberofs_la_CFLAGS += ${liblzma_CFLAGS} liberofs_la_SOURCES += compressor_liblzma.c endif + +liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c +if ENABLE_LIBDEFLATE +liberofs_la_SOURCES += compressor_libdeflate.c +endif diff --git a/lib/blobchunk.c b/lib/blobchunk.c index 3ff0f48..e4d0bad 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -14,77 +14,98 @@ #include <unistd.h> struct erofs_blobchunk { - struct hashmap_entry ent; + union { + struct hashmap_entry ent; + struct list_head list; + }; char sha256[32]; - erofs_off_t chunksize; + unsigned int device_id; + union { + erofs_off_t chunksize; + erofs_off_t sourceoffset; + }; erofs_blk_t blkaddr; }; static struct hashmap blob_hashmap; static FILE *blobfile; static erofs_blk_t remapped_base; +static erofs_off_t datablob_size; static bool multidev; static struct erofs_buffer_head *bh_devt; struct erofs_blobchunk erofs_holechunk = { .blkaddr = EROFS_NULL_ADDR, }; +static LIST_HEAD(unhashed_blobchunks); -static struct erofs_blobchunk *erofs_blob_getchunk(int fd, - erofs_off_t chunksize) +struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id, + erofs_blk_t blkaddr, erofs_off_t sourceoffset) { - static u8 zeroed[EROFS_BLKSIZ]; - u8 *chunkdata, sha256[32]; - int ret; - unsigned int hash; - erofs_off_t blkpos; struct erofs_blobchunk *chunk; - chunkdata = malloc(chunksize); - if (!chunkdata) + chunk = calloc(1, sizeof(struct erofs_blobchunk)); + if (!chunk) return ERR_PTR(-ENOMEM); - ret = read(fd, chunkdata, chunksize); - if (ret < chunksize) { - chunk = ERR_PTR(-EIO); - goto out; - } - erofs_sha256(chunkdata, chunksize, sha256); + chunk->device_id = device_id; + chunk->blkaddr = blkaddr; + chunk->sourceoffset = sourceoffset; + list_add_tail(&chunk->list, &unhashed_blobchunks); + return chunk; +} + +static struct erofs_blobchunk *erofs_blob_getchunk(struct erofs_sb_info *sbi, + u8 *buf, erofs_off_t chunksize) +{ + static u8 zeroed[EROFS_MAX_BLOCK_SIZE]; + struct erofs_blobchunk *chunk; + unsigned int hash, padding; + u8 sha256[32]; + erofs_off_t blkpos; + int ret; + + erofs_sha256(buf, chunksize, sha256); hash = memhash(sha256, sizeof(sha256)); chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256); if (chunk) { DBG_BUGON(chunksize != chunk->chunksize); - goto out; + sbi->saved_by_deduplication += chunksize; + erofs_dbg("Found duplicated chunk at %u", chunk->blkaddr); + return chunk; } + chunk = malloc(sizeof(struct erofs_blobchunk)); - if (!chunk) { - chunk = ERR_PTR(-ENOMEM); - goto out; - } + if (!chunk) + return ERR_PTR(-ENOMEM); chunk->chunksize = chunksize; - blkpos = ftell(blobfile); - DBG_BUGON(erofs_blkoff(blkpos)); - chunk->blkaddr = erofs_blknr(blkpos); memcpy(chunk->sha256, sha256, sizeof(sha256)); - hashmap_entry_init(&chunk->ent, hash); - hashmap_add(&blob_hashmap, chunk); + blkpos = ftell(blobfile); + DBG_BUGON(erofs_blkoff(sbi, blkpos)); + + if (sbi->extra_devices) + chunk->device_id = 1; + else + chunk->device_id = 0; + chunk->blkaddr = erofs_blknr(sbi, blkpos); erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr); - ret = fwrite(chunkdata, chunksize, 1, blobfile); - if (ret == 1 && erofs_blkoff(chunksize)) - ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize), - 1, blobfile); - if (ret < 1) { - struct hashmap_entry key; + ret = fwrite(buf, chunksize, 1, blobfile); + if (ret == 1) { + padding = erofs_blkoff(sbi, chunksize); + if (padding) { + padding = erofs_blksiz(sbi) - padding; + ret = fwrite(zeroed, padding, 1, blobfile); + } + } - hashmap_entry_init(&key, hash); - hashmap_remove(&blob_hashmap, &key, sha256); + if (ret < 1) { free(chunk); - chunk = ERR_PTR(-ENOSPC); - goto out; + return ERR_PTR(-ENOSPC); } -out: - free(chunkdata); + + hashmap_entry_init(&chunk->ent, hash); + hashmap_add(&blob_hashmap, chunk); return chunk; } @@ -107,109 +128,151 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, { struct erofs_inode_chunk_index idx = {0}; erofs_blk_t extent_start = EROFS_NULL_ADDR; - erofs_blk_t extent_end, extents_blks; + erofs_blk_t extent_end, chunkblks; + erofs_off_t source_offset; unsigned int dst, src, unit; bool first_extent = true; - erofs_blk_t base_blkaddr = 0; - - if (multidev) { - idx.device_id = 1; - DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); - } else { - base_blkaddr = remapped_base; - } if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); for (dst = src = 0; dst < inode->extent_isize; src += sizeof(void *), dst += unit) { struct erofs_blobchunk *chunk; chunk = *(void **)(inode->chunkindexes + src); - if (chunk->blkaddr != EROFS_NULL_ADDR) - idx.blkaddr = base_blkaddr + chunk->blkaddr; - else + if (chunk->blkaddr == EROFS_NULL_ADDR) { idx.blkaddr = EROFS_NULL_ADDR; - - if (extent_start != EROFS_NULL_ADDR && - idx.blkaddr == extent_end + 1) { - extent_end = idx.blkaddr; + } else if (chunk->device_id) { + DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); + idx.blkaddr = chunk->blkaddr; + extent_start = EROFS_NULL_ADDR; } else { + idx.blkaddr = remapped_base + chunk->blkaddr; + } + + if (extent_start == EROFS_NULL_ADDR || + idx.blkaddr != extent_end) { if (extent_start != EROFS_NULL_ADDR) { + tarerofs_blocklist_write(extent_start, + extent_end - extent_start, + source_offset); erofs_droid_blocklist_write_extent(inode, extent_start, - (extent_end - extent_start) + 1, + extent_end - extent_start, first_extent, false); first_extent = false; } extent_start = idx.blkaddr; - extent_end = idx.blkaddr; + source_offset = chunk->sourceoffset; } + extent_end = idx.blkaddr + chunkblks; + idx.device_id = cpu_to_le16(chunk->device_id); + idx.blkaddr = cpu_to_le32(idx.blkaddr); + if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit); else memcpy(inode->chunkindexes + dst, &idx, sizeof(idx)); } off = roundup(off, unit); + if (extent_start != EROFS_NULL_ADDR) + tarerofs_blocklist_write(extent_start, extent_end - extent_start, + source_offset); + erofs_droid_blocklist_write_extent(inode, extent_start, + extent_start == EROFS_NULL_ADDR ? + 0 : extent_end - extent_start, + first_extent, true); + + return dev_write(inode->sbi, inode->chunkindexes, off, inode->extent_isize); +} + +int erofs_blob_mergechunks(struct erofs_inode *inode, unsigned int chunkbits, + unsigned int new_chunkbits) +{ + struct erofs_sb_info *sbi = inode->sbi; + unsigned int dst, src, unit, count; - if (extent_start == EROFS_NULL_ADDR) - extents_blks = 0; + if (new_chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + new_chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; + if (chunkbits >= new_chunkbits) /* no need to merge */ + goto out; + + if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) + unit = sizeof(struct erofs_inode_chunk_index); else - extents_blks = (extent_end - extent_start) + 1; - erofs_droid_blocklist_write_extent(inode, extent_start, extents_blks, - first_extent, true); + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + + count = round_up(inode->i_size, 1ULL << new_chunkbits) >> new_chunkbits; + for (dst = src = 0; dst < count; ++dst) { + *((void **)inode->chunkindexes + dst) = + *((void **)inode->chunkindexes + src); + src += 1U << (new_chunkbits - chunkbits); + } - return dev_write(inode->chunkindexes, off, inode->extent_isize); + DBG_BUGON(count * unit >= inode->extent_isize); + inode->extent_isize = count * unit; + chunkbits = new_chunkbits; +out: + inode->u.chunkformat = (chunkbits - sbi->blkszbits) | + (inode->u.chunkformat & ~EROFS_CHUNK_FORMAT_BLKBITS_MASK); + return 0; } -int erofs_blob_write_chunked_file(struct erofs_inode *inode) +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, + erofs_off_t startoff) { + struct erofs_sb_info *sbi = inode->sbi; unsigned int chunkbits = cfg.c_chunkbits; unsigned int count, unit; + struct erofs_blobchunk *chunk, *lastch; struct erofs_inode_chunk_index *idx; erofs_off_t pos, len, chunksize; - int fd, ret; + erofs_blk_t lb, minextblks; + u8 *chunkdata; + int ret; - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; #ifdef SEEK_DATA /* if the file is fully sparsed, use one big chunk instead */ - if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) { + if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) { chunkbits = ilog2(inode->i_size - 1) + 1; - if (chunkbits < LOG_BLOCK_SIZE) - chunkbits = LOG_BLOCK_SIZE; + if (chunkbits < sbi->blkszbits) + chunkbits = sbi->blkszbits; } #endif - if (chunkbits - LOG_BLOCK_SIZE > EROFS_CHUNK_FORMAT_BLKBITS_MASK) - chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + LOG_BLOCK_SIZE; + if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; chunksize = 1ULL << chunkbits; count = DIV_ROUND_UP(inode->i_size, chunksize); - inode->u.chunkformat |= chunkbits - LOG_BLOCK_SIZE; - if (multidev) - inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + if (sbi->extra_devices) + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; - inode->extent_isize = count * unit; - idx = malloc(count * max(sizeof(*idx), sizeof(void *))); - if (!idx) { - close(fd); + chunkdata = malloc(chunksize); + if (!chunkdata) return -ENOMEM; + + inode->extent_isize = count * unit; + inode->chunkindexes = malloc(count * max(sizeof(*idx), sizeof(void *))); + if (!inode->chunkindexes) { + ret = -ENOMEM; + goto err; } - inode->chunkindexes = idx; + idx = inode->chunkindexes; + lastch = NULL; + minextblks = BLK_ROUND_UP(sbi, inode->i_size); for (pos = 0; pos < inode->i_size; pos += len) { - struct erofs_blobchunk *chunk; #ifdef SEEK_DATA - off_t offset = lseek(fd, pos, SEEK_DATA); + off_t offset = lseek(fd, pos + startoff, SEEK_DATA); if (offset < 0) { if (errno != ENXIO) @@ -217,7 +280,16 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) else offset = ((pos >> chunkbits) + 1) << chunkbits; } else { - offset &= ~(chunksize - 1); + offset -= startoff; + + if (offset != (offset & ~(chunksize - 1))) { + offset &= ~(chunksize - 1); + if (lseek(fd, offset + startoff, SEEK_SET) != + startoff + offset) { + ret = -EIO; + goto err; + } + } } if (offset > pos) { @@ -227,76 +299,189 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) pos += chunksize; } while (pos < offset); DBG_BUGON(pos != offset); + lastch = NULL; continue; } #endif len = min_t(u64, inode->i_size - pos, chunksize); - chunk = erofs_blob_getchunk(fd, len); + ret = read(fd, chunkdata, len); + if (ret < len) { + ret = -EIO; + goto err; + } + + chunk = erofs_blob_getchunk(sbi, chunkdata, len); if (IS_ERR(chunk)) { ret = PTR_ERR(chunk); goto err; } + + if (lastch && (lastch->device_id != chunk->device_id || + erofs_pos(sbi, lastch->blkaddr) + lastch->chunksize != + erofs_pos(sbi, chunk->blkaddr))) { + lb = lowbit(pos >> sbi->blkszbits); + if (lb && lb < minextblks) + minextblks = lb; + } *(void **)idx++ = chunk; + lastch = chunk; } inode->datalayout = EROFS_INODE_CHUNK_BASED; - close(fd); - return 0; + free(chunkdata); + return erofs_blob_mergechunks(inode, chunkbits, + ilog2(minextblks) + sbi->blkszbits); err: - close(fd); free(inode->chunkindexes); inode->chunkindexes = NULL; + free(chunkdata); return ret; } -int erofs_blob_remap(void) +int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset) +{ + struct erofs_sb_info *sbi = inode->sbi; + unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; + unsigned int count, unit, device_id; + erofs_off_t chunksize, len, pos; + erofs_blk_t blkaddr; + struct erofs_inode_chunk_index *idx; + + if (chunkbits < sbi->blkszbits) + chunkbits = sbi->blkszbits; + if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; + + inode->u.chunkformat |= chunkbits - sbi->blkszbits; + if (sbi->extra_devices) { + device_id = 1; + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + unit = sizeof(struct erofs_inode_chunk_index); + DBG_BUGON(erofs_blkoff(sbi, data_offset)); + blkaddr = erofs_blknr(sbi, data_offset); + } else { + device_id = 0; + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; + DBG_BUGON(erofs_blkoff(sbi, datablob_size)); + blkaddr = erofs_blknr(sbi, datablob_size); + datablob_size += round_up(inode->i_size, erofs_blksiz(sbi)); + } + chunksize = 1ULL << chunkbits; + count = DIV_ROUND_UP(inode->i_size, chunksize); + + inode->extent_isize = count * unit; + idx = calloc(count, max(sizeof(*idx), sizeof(void *))); + if (!idx) + return -ENOMEM; + inode->chunkindexes = idx; + + for (pos = 0; pos < inode->i_size; pos += len) { + struct erofs_blobchunk *chunk; + + len = min_t(erofs_off_t, inode->i_size - pos, chunksize); + + chunk = erofs_get_unhashed_chunk(device_id, blkaddr, + data_offset); + if (IS_ERR(chunk)) { + free(inode->chunkindexes); + inode->chunkindexes = NULL; + return PTR_ERR(chunk); + } + + *(void **)idx++ = chunk; + blkaddr += erofs_blknr(sbi, len); + data_offset += len; + } + inode->datalayout = EROFS_INODE_CHUNK_BASED; + return 0; +} + +int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi) { struct erofs_buffer_head *bh; ssize_t length; erofs_off_t pos_in, pos_out; ssize_t ret; - fflush(blobfile); - length = ftell(blobfile); - if (length < 0) - return -errno; - if (multidev) { - struct erofs_deviceslot dis = { - .blocks = erofs_blknr(length), - }; + if (blobfile) { + fflush(blobfile); + length = ftell(blobfile); + if (length < 0) + return -errno; - pos_out = erofs_btell(bh_devt, false); - ret = dev_write(&dis, pos_out, sizeof(dis)); - if (ret) - return ret; + if (sbi->extra_devices) + sbi->devs[0].blocks = erofs_blknr(sbi, length); + else + datablob_size = length; + } + if (sbi->extra_devices) { + unsigned int i, ret; + erofs_blk_t nblocks; + + nblocks = erofs_mapbh(NULL); + pos_out = erofs_btell(bh_devt, false); + i = 0; + do { + struct erofs_deviceslot dis = { + .mapped_blkaddr = cpu_to_le32(nblocks), + .blocks = cpu_to_le32(sbi->devs[i].blocks), + }; + + memcpy(dis.tag, sbi->devs[i].tag, sizeof(dis.tag)); + ret = dev_write(sbi, &dis, pos_out, sizeof(dis)); + if (ret) + return ret; + pos_out += sizeof(dis); + nblocks += sbi->devs[i].blocks; + } while (++i < sbi->extra_devices); bh_devt->op = &erofs_drop_directly_bhops; erofs_bdrop(bh_devt, false); return 0; } - if (!length) /* bail out if there is no chunked data */ - return 0; - bh = erofs_balloc(DATA, length, 0, 0); + + bh = erofs_balloc(DATA, blobfile ? datablob_size : 0, 0, 0); if (IS_ERR(bh)) return PTR_ERR(bh); erofs_mapbh(bh->block); + pos_out = erofs_btell(bh, false); - pos_in = 0; - remapped_base = erofs_blknr(pos_out); - ret = erofs_copy_file_range(fileno(blobfile), &pos_in, - erofs_devfd, &pos_out, length); + remapped_base = erofs_blknr(sbi, pos_out); + if (blobfile) { + pos_in = 0; + ret = erofs_copy_file_range(fileno(blobfile), &pos_in, + sbi->devfd, &pos_out, datablob_size); + ret = ret < datablob_size ? -EIO : 0; + } else { + ret = 0; + } bh->op = &erofs_drop_directly_bhops; erofs_bdrop(bh, false); - return ret < length ? -EIO : 0; + return ret; } void erofs_blob_exit(void) { + struct hashmap_iter iter; + struct hashmap_entry *e; + struct erofs_blobchunk *bc, *n; + if (blobfile) fclose(blobfile); - hashmap_free(&blob_hashmap, 1); + while ((e = hashmap_iter_first(&blob_hashmap, &iter))) { + bc = container_of((struct hashmap_entry *)e, + struct erofs_blobchunk, ent); + DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e); + free(bc); + } + DBG_BUGON(hashmap_free(&blob_hashmap)); + + list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) { + list_del(&bc->list); + free(bc); + } } int erofs_blob_init(const char *blobfile_path) @@ -319,22 +504,25 @@ int erofs_blob_init(const char *blobfile_path) return 0; } -int erofs_generate_devtable(void) +int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices) { - struct erofs_deviceslot dis; - - if (!multidev) + if (!devices) return 0; - bh_devt = erofs_balloc(DEVT, sizeof(dis), 0, 0); - if (IS_ERR(bh_devt)) - return PTR_ERR(bh_devt); + sbi->devs = calloc(devices, sizeof(sbi->devs[0])); + if (!sbi->devs) + return -ENOMEM; - dis = (struct erofs_deviceslot) {}; + bh_devt = erofs_balloc(DEVT, + sizeof(struct erofs_deviceslot) * devices, 0, 0); + if (IS_ERR(bh_devt)) { + free(sbi->devs); + return PTR_ERR(bh_devt); + } erofs_mapbh(bh_devt->block); bh_devt->op = &erofs_skip_write_bhops; - sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; - sbi.extra_devices = 1; - erofs_sb_set_device_table(); + sbi->devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; + sbi->extra_devices = devices; + erofs_sb_set_device_table(sbi); return 0; } diff --git a/lib/block_list.c b/lib/block_list.c index 896fb01..f47a746 100644 --- a/lib/block_list.c +++ b/lib/block_list.c @@ -3,7 +3,6 @@ * Copyright (C), 2021, Coolpad Group Limited. * Created by Yue Hu <huyue2@yulong.com> */ -#ifdef WITH_ANDROID #include <stdio.h> #include <sys/stat.h> #include "erofs/block_list.h" @@ -12,17 +11,19 @@ #include "erofs/print.h" static FILE *block_list_fp; +bool srcmap_enabled; -int erofs_droid_blocklist_fopen(void) +int erofs_blocklist_open(char *filename, bool srcmap) { - block_list_fp = fopen(cfg.block_list_file, "w"); + block_list_fp = fopen(filename, "w"); if (!block_list_fp) - return -1; + return -errno; + srcmap_enabled = srcmap; return 0; } -void erofs_droid_blocklist_fclose(void) +void erofs_blocklist_close(void) { if (!block_list_fp) return; @@ -31,6 +32,18 @@ void erofs_droid_blocklist_fclose(void) block_list_fp = NULL; } +/* XXX: really need to be cleaned up */ +void tarerofs_blocklist_write(erofs_blk_t blkaddr, erofs_blk_t nblocks, + erofs_off_t srcoff) +{ + if (!block_list_fp || !nblocks || !srcmap_enabled) + return; + + fprintf(block_list_fp, "%08x %8x %08" PRIx64 "\n", + blkaddr, nblocks, srcoff); +} + +#ifdef WITH_ANDROID static void blocklist_write(const char *path, erofs_blk_t blk_start, erofs_blk_t nblocks, bool first_extent, bool last_extent) @@ -95,7 +108,7 @@ void erofs_droid_blocklist_write_tail_end(struct erofs_inode *inode, return; /* XXX: another hack, which means it has been outputed before */ - if (erofs_blknr(inode->i_size)) { + if (erofs_blknr(inode->sbi, inode->i_size)) { if (blkaddr == NULL_ADDR) fprintf(block_list_fp, "\n"); else diff --git a/lib/cache.c b/lib/cache.c index c735363..caca49b 100644 --- a/lib/cache.c +++ b/lib/cache.c @@ -14,10 +14,10 @@ static struct erofs_buffer_block blkh = { .list = LIST_HEAD_INIT(blkh.list), .blkaddr = NULL_ADDR, }; -static erofs_blk_t tail_blkaddr; +static erofs_blk_t tail_blkaddr, erofs_metablkcnt; /* buckets for all mapped buffer blocks to boost up allocation */ -static struct list_head mapped_buckets[META + 1][EROFS_BLKSIZ]; +static struct list_head mapped_buckets[META + 1][EROFS_MAX_BLOCK_SIZE]; /* last mapped buffer block to accelerate erofs_mapbh() */ static struct erofs_buffer_block *last_mapped_block = &blkh; @@ -39,29 +39,6 @@ const struct erofs_bhops erofs_skip_write_bhops = { .flush = erofs_bh_flush_skip_write, }; -int erofs_bh_flush_generic_write(struct erofs_buffer_head *bh, void *buf) -{ - struct erofs_buffer_head *nbh = list_next_entry(bh, list); - erofs_off_t offset = erofs_btell(bh, false); - - DBG_BUGON(nbh->off < bh->off); - return dev_write(buf, offset, nbh->off - bh->off); -} - -static bool erofs_bh_flush_buf_write(struct erofs_buffer_head *bh) -{ - int err = erofs_bh_flush_generic_write(bh, bh->fsprivate); - - if (err) - return false; - free(bh->fsprivate); - return erofs_bh_flush_generic_end(bh); -} - -const struct erofs_bhops erofs_buf_write_bhops = { - .flush = erofs_bh_flush_buf_write, -}; - /* return buffer_head of erofs super block (with size 0) */ struct erofs_buffer_head *erofs_buffer_init(void) { @@ -86,7 +63,8 @@ static void erofs_bupdate_mapped(struct erofs_buffer_block *bb) if (bb->blkaddr == NULL_ADDR) return; - bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ; + bkt = mapped_buckets[bb->type] + + (bb->buffers.off & (erofs_blksiz(&sbi) - 1)); list_del(&bb->mapped_list); list_add_tail(&bb->mapped_list, bkt); } @@ -99,10 +77,11 @@ static int __erofs_battach(struct erofs_buffer_block *bb, unsigned int extrasize, bool dryrun) { + const unsigned int blksiz = erofs_blksiz(&sbi); + const unsigned int blkmask = blksiz - 1; const erofs_off_t alignedoffset = roundup(bb->buffers.off, alignsize); - const int oob = cmpsgn(roundup((bb->buffers.off - 1) % EROFS_BLKSIZ + 1, - alignsize) + incr + extrasize, - EROFS_BLKSIZ); + const int oob = cmpsgn(roundup(((bb->buffers.off - 1) & blkmask) + 1, + alignsize) + incr + extrasize, blksiz); bool tailupdate = false; erofs_blk_t blkaddr; @@ -114,7 +93,7 @@ static int __erofs_battach(struct erofs_buffer_block *bb, blkaddr = bb->blkaddr; if (blkaddr != NULL_ADDR) { tailupdate = (tail_blkaddr == blkaddr + - BLK_ROUND_UP(bb->buffers.off)); + DIV_ROUND_UP(bb->buffers.off, blksiz)); if (oob && !tailupdate) return -EINVAL; } @@ -129,10 +108,11 @@ static int __erofs_battach(struct erofs_buffer_block *bb, bb->buffers.off = alignedoffset + incr; /* need to update the tail_blkaddr */ if (tailupdate) - tail_blkaddr = blkaddr + BLK_ROUND_UP(bb->buffers.off); + tail_blkaddr = blkaddr + + DIV_ROUND_UP(bb->buffers.off, blksiz); erofs_bupdate_mapped(bb); } - return (alignedoffset + incr - 1) % EROFS_BLKSIZ + 1; + return ((alignedoffset + incr - 1) & blkmask) + 1; } int erofs_bh_balloon(struct erofs_buffer_head *bh, erofs_off_t incr) @@ -152,16 +132,17 @@ static int erofs_bfind_for_attach(int type, erofs_off_t size, unsigned int alignsize, struct erofs_buffer_block **bbp) { + const unsigned int blksiz = erofs_blksiz(&sbi); struct erofs_buffer_block *cur, *bb; unsigned int used0, used_before, usedmax, used; int ret; - used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext; + used0 = ((size + required_ext) & (blksiz - 1)) + inline_ext; /* inline data should be in the same fs block */ - if (used0 > EROFS_BLKSIZ) + if (used0 > blksiz) return -ENOSPC; - if (!used0 || alignsize == EROFS_BLKSIZ) { + if (!used0 || alignsize == blksiz) { *bbp = NULL; return 0; } @@ -170,10 +151,10 @@ static int erofs_bfind_for_attach(int type, erofs_off_t size, bb = NULL; /* try to find a most-fit mapped buffer block first */ - if (size + required_ext + inline_ext >= EROFS_BLKSIZ) + if (size + required_ext + inline_ext >= blksiz) goto skip_mapped; - used_before = rounddown(EROFS_BLKSIZ - + used_before = rounddown(blksiz - (size + required_ext + inline_ext), alignsize); for (; used_before; --used_before) { struct list_head *bt = mapped_buckets[type] + used_before; @@ -191,7 +172,7 @@ static int erofs_bfind_for_attach(int type, erofs_off_t size, DBG_BUGON(cur->type != type); DBG_BUGON(cur->blkaddr == NULL_ADDR); - DBG_BUGON(used_before != cur->buffers.off % EROFS_BLKSIZ); + DBG_BUGON(used_before != (cur->buffers.off & (blksiz - 1))); ret = __erofs_battach(cur, NULL, size, alignsize, required_ext + inline_ext, true); @@ -202,7 +183,7 @@ static int erofs_bfind_for_attach(int type, erofs_off_t size, /* should contain all data in the current block */ used = ret + required_ext + inline_ext; - DBG_BUGON(used > EROFS_BLKSIZ); + DBG_BUGON(used > blksiz); bb = cur; usedmax = used; @@ -215,7 +196,7 @@ skip_mapped: if (cur == &blkh) cur = list_next_entry(cur, list); for (; cur != &blkh; cur = list_next_entry(cur, list)) { - used_before = cur->buffers.off % EROFS_BLKSIZ; + used_before = cur->buffers.off & (blksiz - 1); /* skip if buffer block is just full */ if (!used_before) @@ -230,10 +211,10 @@ skip_mapped: if (ret < 0) continue; - used = (ret + required_ext) % EROFS_BLKSIZ + inline_ext; + used = ((ret + required_ext) & (blksiz - 1)) + inline_ext; /* should contain inline data in current block */ - if (used > EROFS_BLKSIZ) + if (used > blksiz) continue; /* @@ -288,7 +269,10 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size, bb->blkaddr = NULL_ADDR; bb->buffers.off = 0; init_list_head(&bb->buffers.list); - list_add_tail(&bb->list, &blkh.list); + if (type == DATA) + list_add(&bb->list, &last_mapped_block->list); + else + list_add_tail(&bb->list, &blkh.list); init_list_head(&bb->mapped_list); bh = malloc(sizeof(struct erofs_buffer_head)); @@ -300,8 +284,10 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size, ret = __erofs_battach(bb, bh, size, alignsize, required_ext + inline_ext, false); - if (ret < 0) + if (ret < 0) { + free(bh); return ERR_PTR(ret); + } return bh; } @@ -343,7 +329,7 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb) erofs_bupdate_mapped(bb); } - blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off); + blkaddr = bb->blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off); if (blkaddr > tail_blkaddr) tail_blkaddr = blkaddr; @@ -367,8 +353,21 @@ erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb) return tail_blkaddr; } +static void erofs_bfree(struct erofs_buffer_block *bb) +{ + DBG_BUGON(!list_empty(&bb->buffers.list)); + + if (bb == last_mapped_block) + last_mapped_block = list_prev_entry(bb, list); + + list_del(&bb->mapped_list); + list_del(&bb->list); + free(bb); +} + bool erofs_bflush(struct erofs_buffer_block *bb) { + const unsigned int blksiz = erofs_blksiz(&sbi); struct erofs_buffer_block *p, *n; erofs_blk_t blkaddr; @@ -396,18 +395,15 @@ bool erofs_bflush(struct erofs_buffer_block *bb) if (skip) continue; - padding = EROFS_BLKSIZ - p->buffers.off % EROFS_BLKSIZ; - if (padding != EROFS_BLKSIZ) - dev_fillzero(blknr_to_addr(blkaddr) - padding, + padding = blksiz - (p->buffers.off & (blksiz - 1)); + if (padding != blksiz) + dev_fillzero(&sbi, erofs_pos(&sbi, blkaddr) - padding, padding, true); - DBG_BUGON(!list_empty(&p->buffers.list)); - + if (p->type != DATA) + erofs_metablkcnt += BLK_ROUND_UP(&sbi, p->buffers.off); erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1); - - list_del(&p->mapped_list); - list_del(&p->list); - free(p); + erofs_bfree(p); } return true; } @@ -420,7 +416,7 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke) /* tail_blkaddr could be rolled back after revoking all bhs */ if (tryrevoke && blkaddr != NULL_ADDR && - tail_blkaddr == blkaddr + BLK_ROUND_UP(bb->buffers.off)) + tail_blkaddr == blkaddr + BLK_ROUND_UP(&sbi, bb->buffers.off)) rollback = true; bh->op = &erofs_drop_directly_bhops; @@ -429,13 +425,14 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke) if (!list_empty(&bb->buffers.list)) return; - if (bb == last_mapped_block) - last_mapped_block = list_prev_entry(bb, list); - - list_del(&bb->mapped_list); - list_del(&bb->list); - free(bb); - + if (!rollback && bb->type != DATA) + erofs_metablkcnt += BLK_ROUND_UP(&sbi, bb->buffers.off); + erofs_bfree(bb); if (rollback) tail_blkaddr = blkaddr; } + +erofs_blk_t erofs_total_metablocks(void) +{ + return erofs_metablkcnt; +} diff --git a/lib/compress.c b/lib/compress.c index afa3bf7..f6dc12a 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -47,28 +47,19 @@ struct z_erofs_vle_compress_ctx { bool fragemitted; }; -#define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ - (sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) - -static unsigned int vle_compressmeta_capacity(erofs_off_t filesize) -{ - const unsigned int indexsize = BLK_ROUND_UP(filesize) * - sizeof(struct z_erofs_vle_decompressed_index); - - return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize; -} +#define Z_EROFS_LEGACY_MAP_HEADER_SIZE Z_EROFS_FULL_INDEX_ALIGN(0) static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx) { - const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN; - struct z_erofs_vle_decompressed_index di; + const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN; + struct z_erofs_lcluster_index di; if (!ctx->clusterofs) return; di.di_clusterofs = cpu_to_le16(ctx->clusterofs); di.di_u.blkaddr = 0; - di.di_advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT); + di.di_advise = cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT); memcpy(ctx->metacur, &di, sizeof(di)); ctx->metacur += sizeof(di); @@ -77,10 +68,11 @@ static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx) static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx) { struct erofs_inode *inode = ctx->inode; + struct erofs_sb_info *sbi = inode->sbi; unsigned int clusterofs = ctx->clusterofs; unsigned int count = ctx->e.length; - unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ; - struct z_erofs_vle_decompressed_index di; + unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi); + struct z_erofs_lcluster_index di; unsigned int type, advise; if (!count) @@ -97,12 +89,12 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx) */ DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments); DBG_BUGON(ctx->e.partial); - type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN : - Z_EROFS_VLE_CLUSTER_TYPE_HEAD; - advise = type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT; + type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN : + Z_EROFS_LCLUSTER_TYPE_HEAD1; + advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT; di.di_advise = cpu_to_le16(advise); - if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY && + if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL && !ctx->e.compressedblks) di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32); else @@ -118,13 +110,13 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx) do { advise = 0; /* XXX: big pcluster feature should be per-inode */ - if (d0 == 1 && erofs_sb_has_big_pcluster()) { - type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD; + if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) { + type = Z_EROFS_LCLUSTER_TYPE_NONHEAD; di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks | - Z_EROFS_VLE_DI_D0_CBLKCNT); + Z_EROFS_LI_D0_CBLKCNT); di.di_u.delta[1] = cpu_to_le16(d1); } else if (d0) { - type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD; + type = Z_EROFS_LCLUSTER_TYPE_NONHEAD; /* * If the |Z_EROFS_VLE_DI_D0_CBLKCNT| bit is set, parser @@ -137,17 +129,17 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx) * To solve this, we replace d0 with * Z_EROFS_VLE_DI_D0_CBLKCNT-1. */ - if (d0 >= Z_EROFS_VLE_DI_D0_CBLKCNT) + if (d0 >= Z_EROFS_LI_D0_CBLKCNT) di.di_u.delta[0] = cpu_to_le16( - Z_EROFS_VLE_DI_D0_CBLKCNT - 1); + Z_EROFS_LI_D0_CBLKCNT - 1); else di.di_u.delta[0] = cpu_to_le16(d0); di.di_u.delta[1] = cpu_to_le16(d1); } else { - type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN : - Z_EROFS_VLE_CLUSTER_TYPE_HEAD; + type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN : + Z_EROFS_LCLUSTER_TYPE_HEAD1; - if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY && + if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL && !ctx->e.compressedblks) di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32); else @@ -155,21 +147,21 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx) if (ctx->e.partial) { DBG_BUGON(ctx->e.raw); - advise |= Z_EROFS_VLE_DI_PARTIAL_REF; + advise |= Z_EROFS_LI_PARTIAL_REF; } } - advise |= type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT; + advise |= type << Z_EROFS_LI_LCLUSTER_TYPE_BIT; di.di_advise = cpu_to_le16(advise); memcpy(ctx->metacur, &di, sizeof(di)); ctx->metacur += sizeof(di); - count -= EROFS_BLKSIZ - clusterofs; + count -= erofs_blksiz(sbi) - clusterofs; clusterofs = 0; ++d0; --d1; - } while (clusterofs + count >= EROFS_BLKSIZ); + } while (clusterofs + count >= erofs_blksiz(sbi)); ctx->clusterofs = clusterofs + count; } @@ -178,6 +170,8 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx, unsigned int *len) { struct erofs_inode *inode = ctx->inode; + const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1; + struct erofs_sb_info *sbi = inode->sbi; int ret = 0; /* @@ -190,12 +184,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx, do { struct z_erofs_dedupe_ctx dctx = { .start = ctx->queue + ctx->head - ({ int rc; - if (ctx->e.length <= EROFS_BLKSIZ) + if (ctx->e.length <= erofs_blksiz(sbi)) rc = 0; - else if (ctx->e.length - EROFS_BLKSIZ >= ctx->head) + else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head) rc = ctx->head; else - rc = ctx->e.length - EROFS_BLKSIZ; + rc = ctx->e.length - erofs_blksiz(sbi); rc; }), .end = ctx->queue + ctx->head + *len, .cur = ctx->queue + ctx->head, @@ -212,22 +206,34 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx, * decompresssion could be done as another try in practice. */ if (dctx.e.compressedblks > 1 && - (ctx->clusterofs + ctx->e.length - delta) % EROFS_BLKSIZ + - dctx.e.length < 2 * EROFS_BLKSIZ) + ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) + + dctx.e.length < 2 * (lclustermask + 1)) break; - /* fall back to noncompact indexes for deduplication */ - inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY; - erofs_sb_set_dedupe(); - if (delta) { DBG_BUGON(delta < 0); DBG_BUGON(!ctx->e.length); + + /* + * For big pcluster dedupe, if we decide to shorten the + * previous big pcluster, make sure that the previous + * CBLKCNT is still kept. + */ + if (ctx->e.compressedblks > 1 && + (ctx->clusterofs & lclustermask) + ctx->e.length + - delta < 2 * (lclustermask + 1)) + break; ctx->e.partial = true; ctx->e.length -= delta; } + /* fall back to noncompact indexes for deduplication */ + inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B; + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + erofs_sb_set_dedupe(sbi); + + sbi->saved_by_deduplication += + dctx.e.compressedblks * erofs_blksiz(sbi); erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks", dctx.e.length, dctx.e.raw ? "un" : "", delta, dctx.e.blkaddr, dctx.e.compressedblks); @@ -239,7 +245,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx, if (ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = - round_down(ctx->head, EROFS_BLKSIZ); + round_down(ctx->head, erofs_blksiz(sbi)); const unsigned int qh_after = ctx->head - qh_aligned; memmove(ctx->queue, ctx->queue + qh_aligned, @@ -260,52 +266,57 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx, unsigned int *len, char *dst) { int ret; + struct erofs_sb_info *sbi = ctx->inode->sbi; unsigned int count, interlaced_offset, rightpart; /* reset clusterofs to 0 if permitted */ - if (!erofs_sb_has_lz4_0padding() && ctx->clusterofs && + if (!erofs_sb_has_lz4_0padding(sbi) && ctx->clusterofs && ctx->head >= ctx->clusterofs) { ctx->head -= ctx->clusterofs; *len += ctx->clusterofs; ctx->clusterofs = 0; } - count = min(EROFS_BLKSIZ, *len); + count = min(erofs_blksiz(sbi), *len); /* write interlaced uncompressed data if needed */ if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) interlaced_offset = ctx->clusterofs; else interlaced_offset = 0; - rightpart = min(EROFS_BLKSIZ - interlaced_offset, count); + rightpart = min(erofs_blksiz(sbi) - interlaced_offset, count); - memset(dst, 0, EROFS_BLKSIZ); + memset(dst, 0, erofs_blksiz(sbi)); memcpy(dst + interlaced_offset, ctx->queue + ctx->head, rightpart); memcpy(dst, ctx->queue + ctx->head + rightpart, count - rightpart); erofs_dbg("Writing %u uncompressed data to block %u", count, ctx->blkaddr); - ret = blk_write(dst, ctx->blkaddr, 1); + ret = blk_write(sbi, dst, ctx->blkaddr, 1); if (ret) return ret; return count; } -static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode) +static unsigned int z_erofs_get_max_pclustersize(struct erofs_inode *inode) { + unsigned int pclusterblks; + if (erofs_is_packed_inode(inode)) - return cfg.c_pclusterblks_packed; + pclusterblks = cfg.c_pclusterblks_packed; #ifndef NDEBUG - if (cfg.c_random_pclusterblks) - return 1 + rand() % cfg.c_pclusterblks_max; + else if (cfg.c_random_pclusterblks) + pclusterblks = 1 + rand() % cfg.c_pclusterblks_max; #endif - if (cfg.c_compress_hints_file) { + else if (cfg.c_compress_hints_file) { z_erofs_apply_compress_hints(inode); DBG_BUGON(!inode->z_physical_clusterblks); - return inode->z_physical_clusterblks; + pclusterblks = inode->z_physical_clusterblks; + } else { + pclusterblks = cfg.c_pclusterblks_def; } - return cfg.c_pclusterblks_def; + return pclusterblks * erofs_blksiz(inode->sbi); } static int z_erofs_fill_inline_data(struct erofs_inode *inode, void *data, @@ -324,23 +335,25 @@ static int z_erofs_fill_inline_data(struct erofs_inode *inode, void *data, return len; } -static void tryrecompress_trailing(struct erofs_compress *ec, +static void tryrecompress_trailing(struct z_erofs_vle_compress_ctx *ctx, + struct erofs_compress *ec, void *in, unsigned int *insize, void *out, int *compressedsize) { + struct erofs_sb_info *sbi = ctx->inode->sbi; static char tmp[Z_EROFS_PCLUSTER_MAX_SIZE]; unsigned int count; int ret = *compressedsize; /* no need to recompress */ - if (!(ret & (EROFS_BLKSIZ - 1))) + if (!(ret & (erofs_blksiz(sbi) - 1))) return; count = *insize; ret = erofs_compress_destsize(ec, in, &count, (void *)tmp, - rounddown(ret, EROFS_BLKSIZ), false); + rounddown(ret, erofs_blksiz(sbi)), false); if (ret <= 0 || ret + (*insize - count) >= - roundup(*compressedsize, EROFS_BLKSIZ)) + roundup(*compressedsize, erofs_blksiz(sbi))) return; /* replace the original compressed data if any gain */ @@ -353,14 +366,16 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx, unsigned int len) { struct erofs_inode *inode = ctx->inode; + struct erofs_sb_info *sbi = inode->sbi; const unsigned int newsize = ctx->remaining + len; DBG_BUGON(!inode->fragment_size); /* try to fix again if it gets larger (should be rare) */ if (inode->fragment_size < newsize) { - ctx->pclustersize = roundup(newsize - inode->fragment_size, - EROFS_BLKSIZ); + ctx->pclustersize = min(z_erofs_get_max_pclustersize(inode), + roundup(newsize - inode->fragment_size, + erofs_blksiz(sbi))); return false; } @@ -379,9 +394,10 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx, static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx) { - static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ]; + static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_MAX_BLOCK_SIZE]; struct erofs_inode *inode = ctx->inode; - char *const dst = dstbuf + EROFS_BLKSIZ; + struct erofs_sb_info *sbi = inode->sbi; + char *const dst = dstbuf + erofs_blksiz(sbi); struct erofs_compress *const h = &ctx->ccfg->handle; unsigned int len = ctx->tail - ctx->head; bool is_packed_inode = erofs_is_packed_inode(inode); @@ -404,13 +420,13 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx) if (may_packing) { if (inode->fragment_size && !fix_dedupedfrag) { ctx->pclustersize = - roundup(len, EROFS_BLKSIZ); + roundup(len, erofs_blksiz(sbi)); goto fix_dedupedfrag; } ctx->e.length = len; goto frag_packing; } - if (!may_inline && len <= EROFS_BLKSIZ) + if (!may_inline && len <= erofs_blksiz(sbi)) goto nocompression; } @@ -426,7 +442,7 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx) erofs_strerror(ret)); } - if (may_inline && len < EROFS_BLKSIZ) { + if (may_inline && len < erofs_blksiz(sbi)) { ret = z_erofs_fill_inline_data(inode, ctx->queue + ctx->head, len, true); @@ -463,8 +479,8 @@ frag_packing: fix_dedupedfrag = false; /* tailpcluster should be less than 1 block */ } else if (may_inline && len == ctx->e.length && - ret < EROFS_BLKSIZ) { - if (ctx->clusterofs + len <= EROFS_BLKSIZ) { + ret < erofs_blksiz(sbi)) { + if (ctx->clusterofs + len <= erofs_blksiz(sbi)) { inode->eof_tailraw = malloc(len); if (!inode->eof_tailraw) return -ENOMEM; @@ -490,36 +506,37 @@ frag_packing: * Otherwise, just drop it and go to packing. */ if (may_packing && len == ctx->e.length && - (ret & (EROFS_BLKSIZ - 1)) && + (ret & (erofs_blksiz(sbi) - 1)) && ctx->tail < sizeof(ctx->queue)) { - ctx->pclustersize = - BLK_ROUND_UP(ret) * EROFS_BLKSIZ; + ctx->pclustersize = BLK_ROUND_UP(sbi, ret) * + erofs_blksiz(sbi); goto fix_dedupedfrag; } if (may_inline && len == ctx->e.length) - tryrecompress_trailing(h, ctx->queue + ctx->head, + tryrecompress_trailing(ctx, h, + ctx->queue + ctx->head, &ctx->e.length, dst, &ret); - tailused = ret & (EROFS_BLKSIZ - 1); + tailused = ret & (erofs_blksiz(sbi) - 1); padding = 0; - ctx->e.compressedblks = BLK_ROUND_UP(ret); - DBG_BUGON(ctx->e.compressedblks * EROFS_BLKSIZ >= + ctx->e.compressedblks = BLK_ROUND_UP(sbi, ret); + DBG_BUGON(ctx->e.compressedblks * erofs_blksiz(sbi) >= ctx->e.length); /* zero out garbage trailing data for non-0padding */ - if (!erofs_sb_has_lz4_0padding()) + if (!erofs_sb_has_lz4_0padding(sbi)) memset(dst + ret, 0, - roundup(ret, EROFS_BLKSIZ) - ret); + roundup(ret, erofs_blksiz(sbi)) - ret); else if (tailused) - padding = EROFS_BLKSIZ - tailused; + padding = erofs_blksiz(sbi) - tailused; /* write compressed data */ erofs_dbg("Writing %u compressed data to %u of %u blocks", ctx->e.length, ctx->blkaddr, ctx->e.compressedblks); - ret = blk_write(dst - padding, ctx->blkaddr, + ret = blk_write(sbi, dst - padding, ctx->blkaddr, ctx->e.compressedblks); if (ret) return ret; @@ -542,7 +559,7 @@ frag_packing: if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = - round_down(ctx->head, EROFS_BLKSIZ); + round_down(ctx->head, erofs_blksiz(sbi)); const unsigned int qh_after = ctx->head - qh_aligned; memmove(ctx->queue, ctx->queue + qh_aligned, @@ -574,18 +591,18 @@ struct z_erofs_compressindex_vec { static void *parse_legacy_indexes(struct z_erofs_compressindex_vec *cv, unsigned int nr, void *metacur) { - struct z_erofs_vle_decompressed_index *const db = metacur; + struct z_erofs_lcluster_index *const db = metacur; unsigned int i; for (i = 0; i < nr; ++i, ++cv) { - struct z_erofs_vle_decompressed_index *const di = db + i; + struct z_erofs_lcluster_index *const di = db + i; const unsigned int advise = le16_to_cpu(di->di_advise); - cv->clustertype = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & - ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); + cv->clustertype = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) & + ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1); cv->clusterofs = le16_to_cpu(di->di_clusterofs); - if (cv->clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (cv->clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { cv->u.delta[0] = le16_to_cpu(di->di_u.delta[0]); cv->u.delta[1] = le16_to_cpu(di->di_u.delta[1]); } else { @@ -600,10 +617,10 @@ static void *write_compacted_indexes(u8 *out, erofs_blk_t *blkaddr_ret, unsigned int destsize, unsigned int logical_clusterbits, - bool final, bool *dummy_head) + bool final, bool *dummy_head, + bool update_blkaddr) { unsigned int vcnt, encodebits, pos, i, cblks; - bool update_blkaddr; erofs_blk_t blkaddr; if (destsize == 4) @@ -614,16 +631,15 @@ static void *write_compacted_indexes(u8 *out, return ERR_PTR(-EINVAL); encodebits = (vcnt * destsize * 8 - 32) / vcnt; blkaddr = *blkaddr_ret; - update_blkaddr = erofs_sb_has_big_pcluster(); pos = 0; for (i = 0; i < vcnt; ++i) { unsigned int offset, v; u8 ch, rem; - if (cv[i].clustertype == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { - if (cv[i].u.delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { - cblks = cv[i].u.delta[0] & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + if (cv[i].clustertype == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + if (cv[i].u.delta[0] & Z_EROFS_LI_D0_CBLKCNT) { + cblks = cv[i].u.delta[0] & ~Z_EROFS_LI_D0_CBLKCNT; offset = cv[i].u.delta[0]; blkaddr += cblks; *dummy_head = false; @@ -668,25 +684,37 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, unsigned int legacymetasize, void *compressmeta) { - const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize + - inode->xattr_isize) + + const unsigned int mpos = roundup(inode->inode_isize + + inode->xattr_isize, 8) + sizeof(struct z_erofs_map_header); const unsigned int totalidx = (legacymetasize - Z_EROFS_LEGACY_MAP_HEADER_SIZE) / - sizeof(struct z_erofs_vle_decompressed_index); + sizeof(struct z_erofs_lcluster_index); const unsigned int logical_clusterbits = inode->z_logical_clusterbits; u8 *out, *in; struct z_erofs_compressindex_vec cv[16]; + struct erofs_sb_info *sbi = inode->sbi; /* # of 8-byte units so that it can be aligned with 32 bytes */ unsigned int compacted_4b_initial, compacted_4b_end; unsigned int compacted_2b; bool dummy_head; + bool big_pcluster = erofs_sb_has_big_pcluster(sbi); - if (logical_clusterbits < LOG_BLOCK_SIZE || LOG_BLOCK_SIZE < 12) + if (logical_clusterbits < sbi->blkszbits || sbi->blkszbits < 12) return -EINVAL; - if (logical_clusterbits > 14) /* currently not supported */ - return -ENOTSUP; - if (logical_clusterbits == 12) { + if (logical_clusterbits > 14) { + erofs_err("compact format is unsupported for lcluster size %u", + 1 << logical_clusterbits); + return -EOPNOTSUPP; + } + + if (inode->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) { + if (logical_clusterbits != 12) { + erofs_err("compact 2B is unsupported for lcluster size %u", + 1 << logical_clusterbits); + return -EINVAL; + } + compacted_4b_initial = (32 - mpos % 32) / 4; if (compacted_4b_initial == 32 / 4) compacted_4b_initial = 0; @@ -712,7 +740,7 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, dummy_head = false; /* prior to bigpcluster, blkaddr was bumped up once coming into HEAD */ - if (!erofs_sb_has_big_pcluster()) { + if (!big_pcluster) { --blkaddr; dummy_head = true; } @@ -722,7 +750,7 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, in = parse_legacy_indexes(cv, 2, in); out = write_compacted_indexes(out, cv, &blkaddr, 4, logical_clusterbits, false, - &dummy_head); + &dummy_head, big_pcluster); compacted_4b_initial -= 2; } DBG_BUGON(compacted_4b_initial); @@ -732,7 +760,7 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, in = parse_legacy_indexes(cv, 16, in); out = write_compacted_indexes(out, cv, &blkaddr, 2, logical_clusterbits, false, - &dummy_head); + &dummy_head, big_pcluster); compacted_2b -= 16; } DBG_BUGON(compacted_2b); @@ -742,7 +770,7 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, in = parse_legacy_indexes(cv, 2, in); out = write_compacted_indexes(out, cv, &blkaddr, 4, logical_clusterbits, false, - &dummy_head); + &dummy_head, big_pcluster); compacted_4b_end -= 2; } @@ -752,7 +780,7 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, in = parse_legacy_indexes(cv, 1, in); out = write_compacted_indexes(out, cv, &blkaddr, 4, logical_clusterbits, true, - &dummy_head); + &dummy_head, big_pcluster); } inode->extent_isize = out - (u8 *)compressmeta; return 0; @@ -761,12 +789,13 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode, static void z_erofs_write_mapheader(struct erofs_inode *inode, void *compressmeta) { + struct erofs_sb_info *sbi = inode->sbi; struct z_erofs_map_header h = { .h_advise = cpu_to_le16(inode->z_advise), .h_algorithmtype = inode->z_algorithmtype[1] << 4 | inode->z_algorithmtype[0], /* lclustersize */ - .h_clusterbits = inode->z_logical_clusterbits - 12, + .h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits, }; if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) @@ -781,7 +810,8 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode, void z_erofs_drop_inline_pcluster(struct erofs_inode *inode) { - const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN; + struct erofs_sb_info *sbi = inode->sbi; + const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN; struct z_erofs_map_header *h = inode->compressmeta; h->h_advise = cpu_to_le16(le16_to_cpu(h->h_advise) & @@ -792,26 +822,26 @@ void z_erofs_drop_inline_pcluster(struct erofs_inode *inode) DBG_BUGON(inode->compressed_idata != true); /* patch the EOF lcluster to uncompressed type first */ - if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { - struct z_erofs_vle_decompressed_index *di = + if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) { + struct z_erofs_lcluster_index *di = (inode->compressmeta + inode->extent_isize) - - sizeof(struct z_erofs_vle_decompressed_index); + sizeof(struct z_erofs_lcluster_index); __le16 advise = - cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT); + cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT); di->di_advise = advise; - } else if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION) { + } else if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) { /* handle the last compacted 4B pack */ unsigned int eofs, base, pos, v, lo; u8 *out; eofs = inode->extent_isize - - (4 << (BLK_ROUND_UP(inode->i_size) & 1)); + (4 << (BLK_ROUND_UP(sbi, inode->i_size) & 1)); base = round_down(eofs, 8); pos = 16 /* encodebits */ * ((eofs - base) / 4); out = inode->compressmeta + base; - lo = get_unaligned_le32(out + pos / 8) & (EROFS_BLKSIZ - 1); - v = (type << LOG_BLOCK_SIZE) | lo; + lo = erofs_blkoff(sbi, get_unaligned_le32(out + pos / 8)); + v = (type << sbi->blkszbits) | lo; out[pos / 8] = v & 0xff; out[pos / 8 + 1] = v >> 8; } else { @@ -833,7 +863,10 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) erofs_blk_t blkaddr, compressed_blocks; unsigned int legacymetasize; int ret; - u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size)); + struct erofs_sb_info *sbi = inode->sbi; + u8 *compressmeta = malloc(BLK_ROUND_UP(sbi, inode->i_size) * + sizeof(struct z_erofs_lcluster_index) + + Z_EROFS_LEGACY_MAP_HEADER_SIZE); if (!compressmeta) return -ENOMEM; @@ -847,16 +880,18 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) /* initialize per-file compression setting */ inode->z_advise = 0; - if (!cfg.c_legacy_compress) { - inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_FLAT_COMPRESSION; + inode->z_logical_clusterbits = sbi->blkszbits; + if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) { + if (inode->z_logical_clusterbits <= 12) + inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; + inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT; } else { - inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY; + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; } - if (erofs_sb_has_big_pcluster()) { + if (erofs_sb_has_big_pcluster(sbi)) { inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1; - if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION) + if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2; } if (cfg.c_fragments && !cfg.c_dedupe) @@ -875,7 +910,6 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) ctx.ccfg = &erofs_ccfg[inode->z_algorithmtype[0]]; inode->z_algorithmtype[0] = ctx.ccfg[0].algorithmtype; inode->z_algorithmtype[1] = 0; - inode->z_logical_clusterbits = LOG_BLOCK_SIZE; inode->idata_size = 0; inode->fragment_size = 0; @@ -892,7 +926,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) blkaddr = erofs_mapbh(bh->block); /* start_blkaddr */ ctx.inode = inode; - ctx.pclustersize = z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ; + ctx.pclustersize = z_erofs_get_max_pclustersize(inode); ctx.blkaddr = blkaddr; ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE; ctx.head = ctx.tail = 0; @@ -904,6 +938,8 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) && !inode->fragment_size) { ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum); + if (ret) + goto err_free_idata; } else { while (ctx.remaining) { const u64 rx = min_t(u64, ctx.remaining, @@ -945,7 +981,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) legacymetasize = ctx.metacur - compressmeta; /* estimate if data compression saves space or not */ if (!inode->fragment_size && - compressed_blocks * EROFS_BLKSIZ + inode->idata_size + + compressed_blocks * erofs_blksiz(sbi) + inode->idata_size + legacymetasize >= inode->i_size) { z_erofs_dedupe_commit(true); ret = -ENOSPC; @@ -954,18 +990,21 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) z_erofs_dedupe_commit(false); z_erofs_write_mapheader(inode, compressmeta); + if (!ctx.fragemitted) + sbi->saved_by_deduplication += inode->fragment_size; + /* if the entire file is a fragment, a simplified form is used. */ if (inode->i_size == inode->fragment_size) { DBG_BUGON(inode->fragmentoff >> 63); *(__le64 *)compressmeta = cpu_to_le64(inode->fragmentoff | 1ULL << 63); - inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY; + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; legacymetasize = Z_EROFS_LEGACY_MAP_HEADER_SIZE; } if (compressed_blocks) { - ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks)); - DBG_BUGON(ret != EROFS_BLKSIZ); + ret = erofs_bh_balloon(bh, erofs_pos(sbi, compressed_blocks)); + DBG_BUGON(ret != erofs_blksiz(sbi)); } else { if (!cfg.c_fragments && !cfg.c_dedupe) DBG_BUGON(!inode->idata_size); @@ -984,7 +1023,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) inode->u.i_blocks = compressed_blocks; - if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL) { inode->extent_isize = legacymetasize; } else { ret = z_erofs_convert_to_compacted_format(inode, blkaddr, @@ -1009,21 +1048,13 @@ err_free_meta: return ret; } -static int erofs_get_compress_algorithm_id(const char *name) -{ - if (!strcmp(name, "lz4") || !strcmp(name, "lz4hc")) - return Z_EROFS_COMPRESSION_LZ4; - if (!strcmp(name, "lzma")) - return Z_EROFS_COMPRESSION_LZMA; - return -ENOTSUP; -} - -int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh) +static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi, + struct erofs_buffer_head *sb_bh) { struct erofs_buffer_head *bh = sb_bh; int ret = 0; - if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) { + if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) { struct { __le16 size; struct z_erofs_lz4_cfgs lz4; @@ -1031,7 +1062,7 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh) .size = cpu_to_le16(sizeof(struct z_erofs_lz4_cfgs)), .lz4 = { .max_distance = - cpu_to_le16(sbi.lz4_max_distance), + cpu_to_le16(sbi->lz4_max_distance), .max_pclusterblks = cfg.c_pclusterblks_max, } }; @@ -1042,12 +1073,12 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh) return PTR_ERR(bh); } erofs_mapbh(bh->block); - ret = dev_write(&lz4alg, erofs_btell(bh, false), + ret = dev_write(sbi, &lz4alg, erofs_btell(bh, false), sizeof(lz4alg)); bh->op = &erofs_drop_directly_bhops; } #ifdef HAVE_LIBLZMA - if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) { + if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) { struct { __le16 size; struct z_erofs_lzma_cfgs lzma; @@ -1064,37 +1095,57 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh) return PTR_ERR(bh); } erofs_mapbh(bh->block); - ret = dev_write(&lzmaalg, erofs_btell(bh, false), + ret = dev_write(sbi, &lzmaalg, erofs_btell(bh, false), sizeof(lzmaalg)); bh->op = &erofs_drop_directly_bhops; } #endif + if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) { + struct { + __le16 size; + struct z_erofs_deflate_cfgs z; + } __packed zalg = { + .size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)), + .z = { + .windowbits = + cpu_to_le32(ilog2(cfg.c_dict_size)), + } + }; + + bh = erofs_battach(bh, META, sizeof(zalg)); + if (IS_ERR(bh)) { + DBG_BUGON(1); + return PTR_ERR(bh); + } + erofs_mapbh(bh->block); + ret = dev_write(sbi, &zalg, erofs_btell(bh, false), + sizeof(zalg)); + bh->op = &erofs_drop_directly_bhops; + } return ret; } -int z_erofs_compress_init(struct erofs_buffer_head *sb_bh) +int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh) { int i, ret; for (i = 0; cfg.c_compr_alg[i]; ++i) { - ret = erofs_compressor_init(&erofs_ccfg[i].handle, - cfg.c_compr_alg[i]); + struct erofs_compress *c = &erofs_ccfg[i].handle; + + ret = erofs_compressor_init(sbi, c, cfg.c_compr_alg[i]); if (ret) return ret; - ret = erofs_compressor_setlevel(&erofs_ccfg[i].handle, - cfg.c_compr_level[i]); + ret = erofs_compressor_setlevel(c, cfg.c_compr_level[i]); if (ret) return ret; - ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg[i]); - if (ret < 0) - return ret; - erofs_ccfg[i].algorithmtype = ret; + erofs_ccfg[i].algorithmtype = + z_erofs_get_compress_algorithm_id(c); erofs_ccfg[i].enable = true; - sbi.available_compr_algs |= 1 << ret; - if (ret != Z_EROFS_COMPRESSION_LZ4) - erofs_sb_set_compr_cfgs(); + sbi->available_compr_algs |= 1 << erofs_ccfg[i].algorithmtype; + if (erofs_ccfg[i].algorithmtype != Z_EROFS_COMPRESSION_LZ4) + erofs_sb_set_compr_cfgs(sbi); } /* @@ -1103,7 +1154,7 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh) */ if (!cfg.c_compr_alg[0] || (cfg.c_legacy_compress && !strncmp(cfg.c_compr_alg[0], "lz4", 3))) - erofs_sb_clear_lz4_0padding(); + erofs_sb_clear_lz4_0padding(sbi); if (!cfg.c_compr_alg[0]) return 0; @@ -1114,22 +1165,20 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh) */ if (cfg.c_pclusterblks_max > 1) { if (cfg.c_pclusterblks_max > - Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) { + Z_EROFS_PCLUSTER_MAX_SIZE / erofs_blksiz(sbi)) { erofs_err("unsupported clusterblks %u (too large)", cfg.c_pclusterblks_max); return -EINVAL; } - erofs_sb_set_big_pcluster(); + erofs_sb_set_big_pcluster(sbi); } if (cfg.c_pclusterblks_packed > cfg.c_pclusterblks_max) { erofs_err("invalid physical cluster size for the packed file"); return -EINVAL; } - if (erofs_sb_has_compr_cfgs()) { - sbi.available_compr_algs |= 1 << ret; - return z_erofs_build_compr_cfgs(sb_bh); - } + if (erofs_sb_has_compr_cfgs(sbi)) + return z_erofs_build_compr_cfgs(sbi, sb_bh); return 0; } diff --git a/lib/compress_hints.c b/lib/compress_hints.c index 1e9e05d..afc9f8f 100644 --- a/lib/compress_hints.c +++ b/lib/compress_hints.c @@ -86,7 +86,7 @@ void erofs_cleanup_compress_hints(void) } } -int erofs_load_compress_hints(void) +int erofs_load_compress_hints(struct erofs_sb_info *sbi) { char buf[PATH_MAX + 100]; FILE *f; @@ -133,21 +133,21 @@ int erofs_load_compress_hints(void) } } - if (pclustersize % EROFS_BLKSIZ) { + if (pclustersize % erofs_blksiz(sbi)) { erofs_warn("invalid physical clustersize %u, " "use default pclusterblks %u", pclustersize, cfg.c_pclusterblks_def); continue; } erofs_insert_compress_hints(pattern, - pclustersize / EROFS_BLKSIZ, ccfg); + pclustersize / erofs_blksiz(sbi), ccfg); if (pclustersize > max_pclustersize) max_pclustersize = pclustersize; } - if (cfg.c_pclusterblks_max * EROFS_BLKSIZ < max_pclustersize) { - cfg.c_pclusterblks_max = max_pclustersize / EROFS_BLKSIZ; + if (cfg.c_pclusterblks_max * erofs_blksiz(sbi) < max_pclustersize) { + cfg.c_pclusterblks_max = max_pclustersize / erofs_blksiz(sbi); erofs_warn("update max pclusterblks to %u", cfg.c_pclusterblks_max); } out: diff --git a/lib/compressor.c b/lib/compressor.c index a46bc39..93f5617 100644 --- a/lib/compressor.c +++ b/lib/compressor.c @@ -10,18 +10,71 @@ #define EROFS_CONFIG_COMPR_DEF_BOUNDARY (128) -static const struct erofs_compressor *compressors[] = { +static const struct erofs_algorithm { + char *name; + const struct erofs_compressor *c; + unsigned int id; + + /* its name won't be shown as a supported algorithm */ + bool optimisor; +} erofs_algs[] = { + { "lz4", #if LZ4_ENABLED -#if LZ4HC_ENABLED - &erofs_compressor_lz4hc, -#endif &erofs_compressor_lz4, +#else + NULL, #endif + Z_EROFS_COMPRESSION_LZ4, false }, + +#if LZ4HC_ENABLED + { "lz4hc", &erofs_compressor_lz4hc, + Z_EROFS_COMPRESSION_LZ4, true }, +#endif + + { "lzma", #if HAVE_LIBLZMA &erofs_compressor_lzma, +#else + NULL, +#endif + Z_EROFS_COMPRESSION_LZMA, false }, + + { "deflate", &erofs_compressor_deflate, + Z_EROFS_COMPRESSION_DEFLATE, false }, + +#if HAVE_LIBDEFLATE + { "libdeflate", &erofs_compressor_libdeflate, + Z_EROFS_COMPRESSION_DEFLATE, true }, #endif }; +int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c) +{ + DBG_BUGON(!c->alg); + return c->alg->id; +} + +const char *z_erofs_list_supported_algorithms(int i, unsigned int *mask) +{ + if (i >= ARRAY_SIZE(erofs_algs)) + return NULL; + if (!erofs_algs[i].optimisor && (*mask & (1 << erofs_algs[i].id))) { + *mask ^= 1 << erofs_algs[i].id; + return erofs_algs[i].name; + } + return ""; +} + +const char *z_erofs_list_available_compressors(int *i) +{ + for (;*i < ARRAY_SIZE(erofs_algs); ++*i) { + if (!erofs_algs[*i].c) + continue; + return erofs_algs[(*i)++].name; + } + return NULL; +} + int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, void *dst, unsigned int dstsize, bool inblocks) @@ -30,18 +83,18 @@ int erofs_compress_destsize(const struct erofs_compress *c, int ret; DBG_BUGON(!c->alg); - if (!c->alg->compress_destsize) + if (!c->alg->c->compress_destsize) return -ENOTSUP; uncompressed_capacity = *srcsize; - ret = c->alg->compress_destsize(c, src, srcsize, dst, dstsize); + ret = c->alg->c->compress_destsize(c, src, srcsize, dst, dstsize); if (ret < 0) return ret; - /* XXX: ret >= EROFS_BLKSIZ is a temporary hack for ztailpacking */ - if (inblocks || ret >= EROFS_BLKSIZ || + /* XXX: ret >= destsize_alignsize is a temporary hack for ztailpacking */ + if (inblocks || ret >= c->destsize_alignsize || uncompressed_capacity != *srcsize) - compressed_size = roundup(ret, EROFS_BLKSIZ); + compressed_size = roundup(ret, c->destsize_alignsize); else compressed_size = ret; DBG_BUGON(c->compress_threshold < 100); @@ -51,16 +104,11 @@ int erofs_compress_destsize(const struct erofs_compress *c, return ret; } -const char *z_erofs_list_available_compressors(unsigned int i) -{ - return i >= ARRAY_SIZE(compressors) ? NULL : compressors[i]->name; -} - int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level) { DBG_BUGON(!c->alg); - if (c->alg->setlevel) - return c->alg->setlevel(c, compression_level); + if (c->alg->c->setlevel) + return c->alg->c->setlevel(c, compression_level); if (compression_level >= 0) return -EINVAL; @@ -68,16 +116,19 @@ int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level) return 0; } -int erofs_compressor_init(struct erofs_compress *c, char *alg_name) +int erofs_compressor_init(struct erofs_sb_info *sbi, + struct erofs_compress *c, char *alg_name) { int ret, i; + c->sbi = sbi; + /* should be written in "minimum compression ratio * 100" */ c->compress_threshold = 100; /* optimize for 4k size page */ - c->destsize_alignsize = EROFS_BLKSIZ; - c->destsize_redzone_begin = EROFS_BLKSIZ - 16; + c->destsize_alignsize = erofs_blksiz(sbi); + c->destsize_redzone_begin = erofs_blksiz(sbi) - 16; c->destsize_redzone_end = EROFS_CONFIG_COMPR_DEF_BOUNDARY; if (!alg_name) { @@ -86,13 +137,16 @@ int erofs_compressor_init(struct erofs_compress *c, char *alg_name) } ret = -EINVAL; - for (i = 0; i < ARRAY_SIZE(compressors); ++i) { - if (alg_name && strcmp(alg_name, compressors[i]->name)) + for (i = 0; i < ARRAY_SIZE(erofs_algs); ++i) { + if (alg_name && strcmp(alg_name, erofs_algs[i].name)) + continue; + + if (!erofs_algs[i].c) continue; - ret = compressors[i]->init(c); + ret = erofs_algs[i].c->init(c); if (!ret) { - DBG_BUGON(!c->alg); + c->alg = &erofs_algs[i]; return 0; } } @@ -102,7 +156,7 @@ int erofs_compressor_init(struct erofs_compress *c, char *alg_name) int erofs_compressor_exit(struct erofs_compress *c) { - if (c->alg && c->alg->exit) - return c->alg->exit(c); + if (c->alg && c->alg->c->exit) + return c->alg->c->exit(c); return 0; } diff --git a/lib/compressor.h b/lib/compressor.h index cf063f1..9fa01d1 100644 --- a/lib/compressor.h +++ b/lib/compressor.h @@ -12,8 +12,6 @@ struct erofs_compress; struct erofs_compressor { - const char *name; - int default_level; int best_level; @@ -26,8 +24,11 @@ struct erofs_compressor { void *dst, unsigned int dstsize); }; +struct erofs_algorithm; + struct erofs_compress { - const struct erofs_compressor *alg; + struct erofs_sb_info *sbi; + const struct erofs_algorithm *alg; unsigned int compress_threshold; unsigned int compression_level; @@ -44,13 +45,17 @@ struct erofs_compress { extern const struct erofs_compressor erofs_compressor_lz4; extern const struct erofs_compressor erofs_compressor_lz4hc; extern const struct erofs_compressor erofs_compressor_lzma; +extern const struct erofs_compressor erofs_compressor_deflate; +extern const struct erofs_compressor erofs_compressor_libdeflate; +int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c); int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, void *dst, unsigned int dstsize, bool inblocks); int erofs_compressor_setlevel(struct erofs_compress *c, int compression_level); -int erofs_compressor_init(struct erofs_compress *c, char *alg_name); +int erofs_compressor_init(struct erofs_sb_info *sbi, + struct erofs_compress *c, char *alg_name); int erofs_compressor_exit(struct erofs_compress *c); #endif diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c new file mode 100644 index 0000000..4e5902e --- /dev/null +++ b/lib/compressor_deflate.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * Copyright (C) 2023, Alibaba Cloud + * Copyright (C) 2023, Gao Xiang <xiang@kernel.org> + */ +#include "erofs/internal.h" +#include "erofs/print.h" +#include "erofs/config.h" +#include "compressor.h" + +void *kite_deflate_init(int level, unsigned int dict_size); +void kite_deflate_end(void *s); +int kite_deflate_destsize(void *s, const u8 *in, u8 *out, + unsigned int *srcsize, unsigned int target_dstsize); + +static int deflate_compress_destsize(const struct erofs_compress *c, + const void *src, unsigned int *srcsize, + void *dst, unsigned int dstsize) +{ + int rc = kite_deflate_destsize(c->private_data, src, dst, + srcsize, dstsize); + + if (rc <= 0) + return -EFAULT; + return rc; +} + +static int compressor_deflate_exit(struct erofs_compress *c) +{ + if (!c->private_data) + return -EINVAL; + + kite_deflate_end(c->private_data); + return 0; +} + +static int compressor_deflate_init(struct erofs_compress *c) +{ + c->private_data = NULL; + + erofs_warn("EXPERIMENTAL DEFLATE algorithm in use. Use at your own risk!"); + erofs_warn("*Carefully* check filesystem data correctness to avoid corruption!"); + erofs_warn("Please send a report to <linux-erofs@lists.ozlabs.org> if something is wrong."); + return 0; +} + +static int erofs_compressor_deflate_setlevel(struct erofs_compress *c, + int compression_level) +{ + void *s; + + if (c->private_data) { + kite_deflate_end(c->private_data); + c->private_data = NULL; + } + + if (compression_level < 0) + compression_level = erofs_compressor_deflate.default_level; + + s = kite_deflate_init(compression_level, cfg.c_dict_size); + if (IS_ERR(s)) + return PTR_ERR(s); + + c->private_data = s; + c->compression_level = compression_level; + return 0; +} + +const struct erofs_compressor erofs_compressor_deflate = { + .default_level = 1, + .best_level = 9, + .init = compressor_deflate_init, + .exit = compressor_deflate_exit, + .setlevel = erofs_compressor_deflate_setlevel, + .compress_destsize = deflate_compress_destsize, +}; diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c new file mode 100644 index 0000000..c0b019a --- /dev/null +++ b/lib/compressor_libdeflate.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include "erofs/internal.h" +#include "erofs/print.h" +#include "erofs/config.h" +#include <libdeflate.h> +#include "compressor.h" + +static int libdeflate_compress_destsize(const struct erofs_compress *c, + const void *src, unsigned int *srcsize, + void *dst, unsigned int dstsize) +{ + static size_t last_uncompressed_size = 0; + size_t l = 0; /* largest input that fits so far */ + size_t l_csize = 0; + size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */ + size_t m; + u8 tmpbuf[dstsize + 9]; + + if (last_uncompressed_size) + m = last_uncompressed_size * 15 / 16; + else + m = dstsize * 4; + for (;;) { + size_t csize; + + m = max(m, l + 1); + m = min(m, r - 1); + + csize = libdeflate_deflate_compress(c->private_data, src, m, + tmpbuf, dstsize + 9); + /*printf("Tried %zu => %zu\n", m, csize);*/ + if (csize > 0 && csize <= dstsize) { + /* Fits */ + memcpy(dst, tmpbuf, csize); + l = m; + l_csize = csize; + if (r <= l + 1 || csize + + (22 - 2*(int)c->compression_level) >= dstsize) + break; + /* + * Estimate needed input prefix size based on current + * compression ratio. + */ + m = (dstsize * m) / csize; + } else { + /* Doesn't fit */ + r = m; + if (r <= l + 1) + break; + m = (l + r) / 2; + } + } + + /* + * Since generic EROFS on-disk compressed data will be filled with + * leading 0s (but no more than one block, 4KB for example, even the + * whole pcluster is 128KB) if not filled, it will be used to identify + * the actual compressed length as well without taking more reserved + * compressed bytes or some extra metadata to record this. + * + * DEFLATE streams can also be used in this way, if it starts from a + * non-last stored block, flag an unused bit instead to avoid the zero + * byte. It's still a valid one according to the DEFLATE specification. + */ + if (l_csize && !((u8 *)dst)[0]) + ((u8 *)dst)[0] = 1 << (2 + 1); + + /*printf("Choosing %zu => %zu\n", l, l_csize);*/ + *srcsize = l; + last_uncompressed_size = l; + return l_csize; +} + +static int compressor_libdeflate_exit(struct erofs_compress *c) +{ + if (!c->private_data) + return -EINVAL; + + libdeflate_free_compressor(c->private_data); + return 0; +} + +static int compressor_libdeflate_init(struct erofs_compress *c) +{ + c->private_data = NULL; + + erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!"); + return 0; +} + +static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c, + int compression_level) +{ + if (compression_level < 0) + compression_level = erofs_compressor_deflate.default_level; + + libdeflate_free_compressor(c->private_data); + c->private_data = libdeflate_alloc_compressor(compression_level); + if (!c->private_data) + return -ENOMEM; + c->compression_level = compression_level; + return 0; +} + +const struct erofs_compressor erofs_compressor_libdeflate = { + .default_level = 1, + .best_level = 12, + .init = compressor_libdeflate_init, + .exit = compressor_libdeflate_exit, + .setlevel = erofs_compressor_libdeflate_setlevel, + .compress_destsize = libdeflate_compress_destsize, +}; diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c index f274dce..0ed6f23 100644 --- a/lib/compressor_liblzma.c +++ b/lib/compressor_liblzma.c @@ -88,7 +88,6 @@ static int erofs_compressor_liblzma_init(struct erofs_compress *c) { struct erofs_liblzma_context *ctx; - c->alg = &erofs_compressor_lzma; ctx = malloc(sizeof(*ctx)); if (!ctx) return -ENOMEM; @@ -100,7 +99,6 @@ static int erofs_compressor_liblzma_init(struct erofs_compress *c) } const struct erofs_compressor erofs_compressor_lzma = { - .name = "lzma", .default_level = LZMA_PRESET_DEFAULT, .best_level = 109, .init = erofs_compressor_liblzma_init, diff --git a/lib/compressor_lz4.c b/lib/compressor_lz4.c index b6f6e7e..6677693 100644 --- a/lib/compressor_lz4.c +++ b/lib/compressor_lz4.c @@ -32,13 +32,11 @@ static int compressor_lz4_exit(struct erofs_compress *c) static int compressor_lz4_init(struct erofs_compress *c) { - c->alg = &erofs_compressor_lz4; - sbi.lz4_max_distance = LZ4_DISTANCE_MAX; + c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX; return 0; } const struct erofs_compressor erofs_compressor_lz4 = { - .name = "lz4", .default_level = 0, .best_level = 0, .init = compressor_lz4_init, diff --git a/lib/compressor_lz4hc.c b/lib/compressor_lz4hc.c index eec1c84..b410e15 100644 --- a/lib/compressor_lz4hc.c +++ b/lib/compressor_lz4hc.c @@ -38,13 +38,11 @@ static int compressor_lz4hc_exit(struct erofs_compress *c) static int compressor_lz4hc_init(struct erofs_compress *c) { - c->alg = &erofs_compressor_lz4hc; - c->private_data = LZ4_createStreamHC(); if (!c->private_data) return -ENOMEM; - sbi.lz4_max_distance = LZ4_DISTANCE_MAX; + c->sbi->lz4_max_distance = LZ4_DISTANCE_MAX; return 0; } @@ -60,7 +58,6 @@ static int compressor_lz4hc_setlevel(struct erofs_compress *c, } const struct erofs_compressor erofs_compressor_lz4hc = { - .name = "lz4hc", .default_level = LZ4HC_CLEVEL_DEFAULT, .best_level = LZ4HC_CLEVEL_MAX, .init = compressor_lz4hc_init, @@ -18,27 +18,29 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode, erofs_blk_t nblocks, lastblk; u64 offset = map->m_la; struct erofs_inode *vi = inode; + struct erofs_sb_info *sbi = inode->sbi; bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); trace_erofs_map_blocks_flatmode_enter(inode, map, flags); - nblocks = BLK_ROUND_UP(inode->i_size); + nblocks = BLK_ROUND_UP(sbi, inode->i_size); lastblk = nblocks - tailendpacking; /* there is no hole in flatmode */ map->m_flags = EROFS_MAP_MAPPED; - if (offset < blknr_to_addr(lastblk)) { - map->m_pa = blknr_to_addr(vi->u.i_blkaddr) + map->m_la; - map->m_plen = blknr_to_addr(lastblk) - offset; + if (offset < erofs_pos(sbi, lastblk)) { + map->m_pa = erofs_pos(sbi, vi->u.i_blkaddr) + map->m_la; + map->m_plen = erofs_pos(sbi, lastblk) - offset; } else if (tailendpacking) { /* 2 - inode inline B: inode, [xattrs], inline last blk... */ - map->m_pa = iloc(vi->nid) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(map->m_la); + map->m_pa = erofs_iloc(vi) + vi->inode_isize + + vi->xattr_isize + erofs_blkoff(sbi, map->m_la); map->m_plen = inode->i_size - offset; /* inline data should be located in the same meta block */ - if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) { + if (erofs_blkoff(sbi, map->m_pa) + map->m_plen > + erofs_blksiz(sbi)) { erofs_err("inline data cross block boundary @ nid %" PRIu64, vi->nid); DBG_BUGON(1); @@ -65,8 +67,9 @@ int erofs_map_blocks(struct erofs_inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *vi = inode; + struct erofs_sb_info *sbi = inode->sbi; struct erofs_inode_chunk_index *idx; - u8 buf[EROFS_BLKSIZ]; + u8 buf[EROFS_MAX_BLOCK_SIZE]; u64 chunknr; unsigned int unit; erofs_off_t pos; @@ -89,39 +92,39 @@ int erofs_map_blocks(struct erofs_inode *inode, unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ chunknr = map->m_la >> vi->u.chunkbits; - pos = roundup(iloc(vi->nid) + vi->inode_isize + + pos = roundup(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - err = blk_read(0, buf, erofs_blknr(pos), 1); + err = blk_read(sbi, 0, buf, erofs_blknr(sbi, pos), 1); if (err < 0) return -EIO; map->m_la = chunknr << vi->u.chunkbits; map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits, - roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); + roundup(inode->i_size - map->m_la, erofs_blksiz(sbi))); /* handle block map */ if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { - __le32 *blkaddr = (void *)buf + erofs_blkoff(pos); + __le32 *blkaddr = (void *)buf + erofs_blkoff(sbi, pos); if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { map->m_flags = 0; } else { - map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); + map->m_pa = erofs_pos(sbi, le32_to_cpu(*blkaddr)); map->m_flags = EROFS_MAP_MAPPED; } goto out; } /* parse chunk indexes */ - idx = (void *)buf + erofs_blkoff(pos); + idx = (void *)buf + erofs_blkoff(sbi, pos); switch (le32_to_cpu(idx->blkaddr)) { case EROFS_NULL_ADDR: map->m_flags = 0; break; default: map->m_deviceid = le16_to_cpu(idx->device_id) & - sbi.device_id_mask; - map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); + sbi->device_id_mask; + map->m_pa = erofs_pos(sbi, le32_to_cpu(idx->blkaddr)); map->m_flags = EROFS_MAP_MAPPED; break; } @@ -145,8 +148,8 @@ int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map) dif = sbi->devs + id; if (!dif->mapped_blkaddr) continue; - startoff = blknr_to_addr(dif->mapped_blkaddr); - length = blknr_to_addr(dif->blocks); + startoff = erofs_pos(sbi, dif->mapped_blkaddr); + length = erofs_pos(sbi, dif->blocks); if (map->m_pa >= startoff && map->m_pa < startoff + length) { @@ -158,9 +161,10 @@ int erofs_map_dev(struct erofs_sb_info *sbi, struct erofs_map_dev *map) return 0; } -int erofs_read_one_data(struct erofs_map_blocks *map, char *buffer, u64 offset, - size_t len) +int erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map, + char *buffer, u64 offset, size_t len) { + struct erofs_sb_info *sbi = inode->sbi; struct erofs_map_dev mdev; int ret; @@ -168,11 +172,11 @@ int erofs_read_one_data(struct erofs_map_blocks *map, char *buffer, u64 offset, .m_deviceid = map->m_deviceid, .m_pa = map->m_pa, }; - ret = erofs_map_dev(&sbi, &mdev); + ret = erofs_map_dev(sbi, &mdev); if (ret) return ret; - ret = dev_read(mdev.m_deviceid, buffer, mdev.m_pa + offset, len); + ret = dev_read(sbi, mdev.m_deviceid, buffer, mdev.m_pa + offset, len); if (ret < 0) return -EIO; return 0; @@ -219,7 +223,8 @@ static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer, map.m_la = ptr; } - ret = erofs_read_one_data(&map, estart, moff, eend - map.m_la); + ret = erofs_read_one_data(inode, &map, estart, moff, + eend - map.m_la); if (ret) return ret; ptr = eend; @@ -231,12 +236,14 @@ int z_erofs_read_one_data(struct erofs_inode *inode, struct erofs_map_blocks *map, char *raw, char *buffer, erofs_off_t skip, erofs_off_t length, bool trimmed) { + struct erofs_sb_info *sbi = inode->sbi; struct erofs_map_dev mdev; int ret = 0; if (map->m_flags & EROFS_MAP_FRAGMENT) { struct erofs_inode packed_inode = { - .nid = sbi.packed_nid, + .sbi = sbi, + .nid = sbi->packed_nid, }; ret = erofs_read_inode_from_disk(&packed_inode); @@ -253,23 +260,24 @@ int z_erofs_read_one_data(struct erofs_inode *inode, mdev = (struct erofs_map_dev) { .m_pa = map->m_pa, }; - ret = erofs_map_dev(&sbi, &mdev); + ret = erofs_map_dev(sbi, &mdev); if (ret) { DBG_BUGON(1); return ret; } - ret = dev_read(mdev.m_deviceid, raw, mdev.m_pa, map->m_plen); + ret = dev_read(sbi, mdev.m_deviceid, raw, mdev.m_pa, map->m_plen); if (ret < 0) return ret; ret = z_erofs_decompress(&(struct z_erofs_decompress_req) { + .sbi = sbi, .in = raw, .out = buffer, .decodedskip = skip, .interlaced_offset = map->m_algorithmformat == Z_EROFS_COMPRESSION_INTERLACED ? - erofs_blkoff(map->m_la) : 0, + erofs_blkoff(sbi, map->m_la) : 0, .inputsize = map->m_plen, .decodedlength = length, .alg = map->m_algorithmformat, @@ -324,7 +332,7 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer, } if (!(map.m_flags & EROFS_MAP_MAPPED)) { - memset(buffer + end - offset, 0, length); + memset(buffer + end - offset, 0, length - skip); end = map.m_la; continue; } @@ -356,11 +364,95 @@ int erofs_pread(struct erofs_inode *inode, char *buf, case EROFS_INODE_FLAT_INLINE: case EROFS_INODE_CHUNK_BASED: return erofs_read_raw_data(inode, buf, count, offset); - case EROFS_INODE_FLAT_COMPRESSION_LEGACY: - case EROFS_INODE_FLAT_COMPRESSION: + case EROFS_INODE_COMPRESSED_FULL: + case EROFS_INODE_COMPRESSED_COMPACT: return z_erofs_read_data(inode, buf, count, offset); default: break; } return -EINVAL; } + +static void *erofs_read_metadata_nid(struct erofs_sb_info *sbi, erofs_nid_t nid, + erofs_off_t *offset, int *lengthp) +{ + struct erofs_inode vi = { .sbi = sbi, .nid = nid }; + __le16 __len; + int ret, len; + char *buffer; + + ret = erofs_read_inode_from_disk(&vi); + if (ret) + return ERR_PTR(ret); + + *offset = round_up(*offset, 4); + ret = erofs_pread(&vi, (void *)&__len, sizeof(__le16), *offset); + if (ret) + return ERR_PTR(ret); + + len = le16_to_cpu(__len); + if (!len) + return ERR_PTR(-EFSCORRUPTED); + + buffer = malloc(len); + if (!buffer) + return ERR_PTR(-ENOMEM); + *offset += sizeof(__le16); + *lengthp = len; + + ret = erofs_pread(&vi, buffer, len, *offset); + if (ret) { + free(buffer); + return ERR_PTR(ret); + } + *offset += len; + return buffer; +} + +static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi, + erofs_off_t *offset, int *lengthp) +{ + int ret, len, i, cnt; + void *buffer; + u8 data[EROFS_MAX_BLOCK_SIZE]; + + *offset = round_up(*offset, 4); + ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1); + if (ret) + return ERR_PTR(ret); + len = le16_to_cpu(*(__le16 *)&data[erofs_blkoff(sbi, *offset)]); + if (!len) + return ERR_PTR(-EFSCORRUPTED); + + buffer = malloc(len); + if (!buffer) + return ERR_PTR(-ENOMEM); + *offset += sizeof(__le16); + *lengthp = len; + + for (i = 0; i < len; i += cnt) { + cnt = min_t(int, erofs_blksiz(sbi) - erofs_blkoff(sbi, *offset), + len - i); + ret = blk_read(sbi, 0, data, erofs_blknr(sbi, *offset), 1); + if (ret) { + free(buffer); + return ERR_PTR(ret); + } + memcpy(buffer + i, data + erofs_blkoff(sbi, *offset), cnt); + *offset += cnt; + } + return buffer; +} + +/* + * read variable-sized metadata, offset will be aligned by 4-byte + * + * @nid is 0 if metadata is in meta inode + */ +void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid, + erofs_off_t *offset, int *lengthp) +{ + if (nid) + return erofs_read_metadata_nid(sbi, nid, offset, lengthp); + return erofs_read_metadata_bdi(sbi, offset, lengthp); +} diff --git a/lib/decompress.c b/lib/decompress.c index 36ddd9e..fe8a40c 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -9,12 +9,160 @@ #include "erofs/err.h" #include "erofs/print.h" +#ifdef HAVE_LIBDEFLATE +/* if libdeflate is available, use libdeflate instead. */ +#include <libdeflate.h> + +static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq) +{ + struct erofs_sb_info *sbi = rq->sbi; + u8 *dest = (u8 *)rq->out; + u8 *src = (u8 *)rq->in; + u8 *buff = NULL; + size_t actual_out; + unsigned int inputmargin = 0; + struct libdeflate_decompressor *inf; + enum libdeflate_result ret; + + while (!src[inputmargin & (erofs_blksiz(sbi) - 1)]) + if (!(++inputmargin & (erofs_blksiz(sbi) - 1))) + break; + + if (inputmargin >= rq->inputsize) + return -EFSCORRUPTED; + + if (rq->decodedskip) { + buff = malloc(rq->decodedlength); + if (!buff) + return -ENOMEM; + dest = buff; + } + + inf = libdeflate_alloc_decompressor(); + if (!inf) + return -ENOMEM; + + if (rq->partial_decoding) { + ret = libdeflate_deflate_decompress(inf, src + inputmargin, + rq->inputsize - inputmargin, dest, + rq->decodedlength, &actual_out); + if (ret && ret != LIBDEFLATE_INSUFFICIENT_SPACE) { + ret = -EIO; + goto out_inflate_end; + } + + if (actual_out != rq->decodedlength) { + ret = -EIO; + goto out_inflate_end; + } + } else { + ret = libdeflate_deflate_decompress(inf, src + inputmargin, + rq->inputsize - inputmargin, dest, + rq->decodedlength, NULL); + if (ret) { + ret = -EIO; + goto out_inflate_end; + } + } + + if (rq->decodedskip) + memcpy(rq->out, dest + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + +out_inflate_end: + libdeflate_free_decompressor(inf); + if (buff) + free(buff); + return ret; +} +#elif defined(HAVE_ZLIB) +#include <zlib.h> + +/* report a zlib or i/o error */ +static int zerr(int ret) +{ + switch (ret) { + case Z_STREAM_ERROR: + return -EINVAL; + case Z_DATA_ERROR: + return -EIO; + case Z_MEM_ERROR: + return -ENOMEM; + case Z_ERRNO: + case Z_VERSION_ERROR: + default: + return -EFAULT; + } +} + +static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq) +{ + struct erofs_sb_info *sbi = rq->sbi; + u8 *dest = (u8 *)rq->out; + u8 *src = (u8 *)rq->in; + u8 *buff = NULL; + unsigned int inputmargin = 0; + z_stream strm; + int ret; + + while (!src[inputmargin & (erofs_blksiz(sbi) - 1)]) + if (!(++inputmargin & (erofs_blksiz(sbi) - 1))) + break; + + if (inputmargin >= rq->inputsize) + return -EFSCORRUPTED; + + if (rq->decodedskip) { + buff = malloc(rq->decodedlength); + if (!buff) + return -ENOMEM; + dest = buff; + } + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (ret != Z_OK) { + free(buff); + return zerr(ret); + } + + strm.next_in = src + inputmargin; + strm.avail_in = rq->inputsize - inputmargin; + strm.next_out = dest; + strm.avail_out = rq->decodedlength; + + ret = inflate(&strm, rq->partial_decoding ? Z_SYNC_FLUSH : Z_FINISH); + if (ret != Z_STREAM_END || strm.total_out != rq->decodedlength) { + if (ret != Z_OK || !rq->partial_decoding) { + ret = zerr(ret); + goto out_inflate_end; + } + } + + if (rq->decodedskip) + memcpy(rq->out, dest + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + +out_inflate_end: + inflateEnd(&strm); + if (buff) + free(buff); + return ret; +} +#endif + #ifdef HAVE_LIBLZMA #include <lzma.h> static int z_erofs_decompress_lzma(struct z_erofs_decompress_req *rq) { int ret = 0; + struct erofs_sb_info *sbi = rq->sbi; u8 *dest = (u8 *)rq->out; u8 *src = (u8 *)rq->in; u8 *buff = NULL; @@ -22,8 +170,8 @@ static int z_erofs_decompress_lzma(struct z_erofs_decompress_req *rq) lzma_stream strm; lzma_ret ret2; - while (!src[inputmargin & ~PAGE_MASK]) - if (!(++inputmargin & ~PAGE_MASK)) + while (!src[inputmargin & (erofs_blksiz(sbi) - 1)]) + if (!(++inputmargin & (erofs_blksiz(sbi) - 1))) break; if (inputmargin >= rq->inputsize) @@ -81,12 +229,13 @@ static int z_erofs_decompress_lz4(struct z_erofs_decompress_req *rq) char *buff = NULL; bool support_0padding = false; unsigned int inputmargin = 0; + struct erofs_sb_info *sbi = rq->sbi; - if (erofs_sb_has_lz4_0padding()) { + if (erofs_sb_has_lz4_0padding(sbi)) { support_0padding = true; - while (!src[inputmargin & ~PAGE_MASK]) - if (!(++inputmargin & ~PAGE_MASK)) + while (!src[inputmargin & (erofs_blksiz(sbi) - 1)]) + if (!(++inputmargin & (erofs_blksiz(sbi) - 1))) break; if (inputmargin >= rq->inputsize) @@ -131,18 +280,24 @@ out: int z_erofs_decompress(struct z_erofs_decompress_req *rq) { + struct erofs_sb_info *sbi = rq->sbi; + if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) { unsigned int count, rightpart, skip; - /* XXX: should support inputsize >= EROFS_BLKSIZ later */ - if (rq->inputsize > EROFS_BLKSIZ) + /* XXX: should support inputsize >= erofs_blksiz(sbi) later */ + if (rq->inputsize > erofs_blksiz(sbi)) + return -EFSCORRUPTED; + + if (rq->decodedlength > erofs_blksiz(sbi)) + return -EFSCORRUPTED; + + if (rq->decodedlength < rq->decodedskip) return -EFSCORRUPTED; - DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ); - DBG_BUGON(rq->decodedlength < rq->decodedskip); count = rq->decodedlength - rq->decodedskip; - skip = erofs_blkoff(rq->interlaced_offset + rq->decodedskip); - rightpart = min(EROFS_BLKSIZ - skip, count); + skip = erofs_blkoff(sbi, rq->interlaced_offset + rq->decodedskip); + rightpart = min(erofs_blksiz(sbi) - skip, count); memcpy(rq->out, rq->in + skip, rightpart); memcpy(rq->out + rightpart, rq->in, count - rightpart); return 0; @@ -164,5 +319,9 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq) if (rq->alg == Z_EROFS_COMPRESSION_LZMA) return z_erofs_decompress_lzma(rq); #endif +#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE) + if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE) + return z_erofs_decompress_deflate(rq); +#endif return -EOPNOTSUPP; } diff --git a/lib/dedupe.c b/lib/dedupe.c index 0a69b8f..17da452 100644 --- a/lib/dedupe.c +++ b/lib/dedupe.c @@ -11,12 +11,14 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, unsigned long sz) { + const unsigned long *a1, *a2; unsigned long n = sz; - if (sz >= sizeof(long) && ((long)s1 & (sizeof(long) - 1)) == - ((long)s2 & (sizeof(long) - 1))) { - const unsigned long *a1, *a2; + if (sz < sizeof(long)) + goto out_bytes; + if (((long)s1 & (sizeof(long) - 1)) == + ((long)s2 & (sizeof(long) - 1))) { while ((long)s1 & (sizeof(long) - 1)) { if (*s1 != *s2) break; @@ -34,9 +36,20 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, ++a2; sz -= sizeof(long); } - s1 = (const u8 *)a1; - s2 = (const u8 *)a2; + } else { + a1 = (const unsigned long *)s1; + a2 = (const unsigned long *)s2; + do { + if (get_unaligned(a1) != get_unaligned(a2)) + break; + ++a1; + ++a2; + sz -= sizeof(long); + } while (sz >= sizeof(long)); } + s1 = (const u8 *)a1; + s2 = (const u8 *)a2; +out_bytes: while (sz) { if (*s1 != *s2) break; @@ -4,11 +4,25 @@ #include "erofs/print.h" #include "erofs/dir.h" +/* filename should not have a '/' in the name string */ +static bool erofs_validate_filename(const char *dname, int size) +{ + char *name = (char *)dname; + + while (name - dname < size && *name != '\0') { + if (*name == '/') + return false; + ++name; + } + return true; +} + static int traverse_dirents(struct erofs_dir_context *ctx, void *dentry_blk, unsigned int lblk, unsigned int next_nameoff, unsigned int maxsize, bool fsck) { + struct erofs_sb_info *sbi = ctx->dir->sbi; struct erofs_dirent *de = dentry_blk; const struct erofs_dirent *end = dentry_blk + next_nameoff; const char *prev_name = NULL; @@ -41,7 +55,7 @@ static int traverse_dirents(struct erofs_dir_context *ctx, break; } - if (nameoff + de_namelen > maxsize || + if (nameoff + de_namelen > maxsize || !de_namelen || de_namelen > EROFS_NAME_LEN) { errmsg = "bogus dirent namelen"; break; @@ -76,8 +90,8 @@ static int traverse_dirents(struct erofs_dir_context *ctx, goto out; } ctx->flags |= EROFS_READDIR_DOTDOT_FOUND; - if (sbi.root_nid == ctx->dir->nid) { - ctx->pnid = sbi.root_nid; + if (sbi->root_nid == ctx->dir->nid) { + ctx->pnid = sbi->root_nid; ctx->flags |= EROFS_READDIR_VALID_PNID; } if (fsck && @@ -101,6 +115,10 @@ static int traverse_dirents(struct erofs_dir_context *ctx, } break; } + } else if (fsck && + !erofs_validate_filename(de_name, de_namelen)) { + errmsg = "corrupted dirent with illegal filename"; + goto out; } ret = ctx->cb(ctx); if (ret) { @@ -123,9 +141,10 @@ out: int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck) { struct erofs_inode *dir = ctx->dir; + struct erofs_sb_info *sbi = dir->sbi; int err = 0; erofs_off_t pos; - char buf[EROFS_BLKSIZ]; + char buf[EROFS_MAX_BLOCK_SIZE]; if (!S_ISDIR(dir->i_mode)) return -ENOTDIR; @@ -133,9 +152,9 @@ int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck) ctx->flags &= ~EROFS_READDIR_ALL_SPECIAL_FOUND; pos = 0; while (pos < dir->i_size) { - erofs_blk_t lblk = erofs_blknr(pos); + erofs_blk_t lblk = erofs_blknr(sbi, pos); erofs_off_t maxsize = min_t(erofs_off_t, - dir->i_size - pos, EROFS_BLKSIZ); + dir->i_size - pos, erofs_blksiz(sbi)); const struct erofs_dirent *de = (const void *)buf; unsigned int nameoff; @@ -148,7 +167,7 @@ int erofs_iterate_dir(struct erofs_dir_context *ctx, bool fsck) nameoff = le16_to_cpu(de->nameoff); if (nameoff < sizeof(struct erofs_dirent) || - nameoff >= EROFS_BLKSIZ) { + nameoff >= erofs_blksiz(sbi)) { erofs_err("invalid de[0].nameoff %u @ nid %llu, lblk %u", nameoff, dir->nid | 0ULL, lblk); return -EFSCORRUPTED; @@ -203,7 +222,10 @@ static int erofs_get_pathname_iter(struct erofs_dir_context *ctx) } if (ctx->de_ftype == EROFS_FT_DIR || ctx->de_ftype == EROFS_FT_UNKNOWN) { - struct erofs_inode dir = { .nid = ctx->de_nid }; + struct erofs_inode dir = { + .sbi = ctx->dir->sbi, + .nid = ctx->de_nid + }; ret = erofs_read_inode_from_disk(&dir); if (ret) { @@ -212,10 +234,16 @@ static int erofs_get_pathname_iter(struct erofs_dir_context *ctx) } if (S_ISDIR(dir.i_mode)) { - ctx->dir = &dir; - pathctx->pos = pos + len + 1; - ret = erofs_iterate_dir(ctx, false); - pathctx->pos = pos; + struct erofs_get_pathname_context nctx = { + .ctx.flags = 0, + .ctx.dir = &dir, + .ctx.cb = erofs_get_pathname_iter, + .target_nid = pathctx->target_nid, + .buf = pathctx->buf, + .size = pathctx->size, + .pos = pos + len + 1, + }; + ret = erofs_iterate_dir(&nctx.ctx, false); if (ret == EROFS_PATHNAME_FOUND) { pathctx->buf[pos++] = '/'; strncpy(pathctx->buf + pos, dname, len); @@ -229,10 +257,14 @@ static int erofs_get_pathname_iter(struct erofs_dir_context *ctx) return 0; } -int erofs_get_pathname(erofs_nid_t nid, char *buf, size_t size) +int erofs_get_pathname(struct erofs_sb_info *sbi, erofs_nid_t nid, + char *buf, size_t size) { int ret; - struct erofs_inode root = { .nid = sbi.root_nid }; + struct erofs_inode root = { + .sbi = sbi, + .nid = sbi->root_nid, + }; struct erofs_get_pathname_context pathctx = { .ctx.flags = 0, .ctx.dir = &root, diff --git a/lib/diskbuf.c b/lib/diskbuf.c new file mode 100644 index 0000000..8205ba5 --- /dev/null +++ b/lib/diskbuf.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include "erofs/diskbuf.h" +#include "erofs/internal.h" +#include "erofs/print.h" +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> + +/* A simple approach to avoid creating too many temporary files */ +static struct erofs_diskbufstrm { + u64 count; + u64 tailoffset, devpos; + int fd; + unsigned int alignsize; + bool locked; +} *dbufstrm; + +int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *fpos) +{ + const struct erofs_diskbufstrm *strm = db->sp; + u64 offset; + + if (!strm) + return -1; + offset = db->offset + strm->devpos; + if (lseek(strm->fd, offset, SEEK_SET) != offset) + return -E2BIG; + if (fpos) + *fpos = offset; + return strm->fd; +} + +int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off) +{ + struct erofs_diskbufstrm *strm = dbufstrm + sid; + + if (strm->tailoffset & (strm->alignsize - 1)) { + strm->tailoffset = round_up(strm->tailoffset, strm->alignsize); + if (lseek(strm->fd, strm->tailoffset + strm->devpos, + SEEK_SET) != strm->tailoffset + strm->devpos) + return -EIO; + } + db->offset = strm->tailoffset; + if (off) + *off = db->offset + strm->devpos; + db->sp = strm; + ++strm->count; + strm->locked = true; /* TODO: need a real lock for MT */ + return strm->fd; +} + +void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len) +{ + struct erofs_diskbufstrm *strm = db->sp; + + DBG_BUGON(!strm); + DBG_BUGON(!strm->locked); + DBG_BUGON(strm->tailoffset != db->offset); + strm->tailoffset += len; +} + +void erofs_diskbuf_close(struct erofs_diskbuf *db) +{ + struct erofs_diskbufstrm *strm = db->sp; + + DBG_BUGON(!strm); + DBG_BUGON(strm->count <= 1); + --strm->count; + db->sp = NULL; +} + +int erofs_tmpfile(void) +{ +#define TRAILER "tmp.XXXXXXXXXX" + char buf[PATH_MAX]; + int fd; + umode_t u; + + (void)snprintf(buf, sizeof(buf), "%s/" TRAILER, + getenv("TMPDIR") ?: "/tmp"); + + fd = mkstemp(buf); + if (fd < 0) + return -errno; + + unlink(buf); + u = umask(0); + (void)umask(u); + (void)fchmod(fd, 0666 & ~u); + return fd; +} + +int erofs_diskbuf_init(unsigned int nstrms) +{ + struct erofs_diskbufstrm *strm; + + strm = calloc(nstrms + 1, sizeof(*strm)); + if (!strm) + return -ENOMEM; + strm[nstrms].fd = -1; + dbufstrm = strm; + + for (; strm < dbufstrm + nstrms; ++strm) { + struct stat st; + + /* try to use the devfd for regfiles on stream 0 */ + if (strm == dbufstrm && sbi.devsz == INT64_MAX) { + strm->devpos = 1ULL << 40; + if (!ftruncate(sbi.devfd, strm->devpos << 1)) { + strm->fd = dup(sbi.devfd); + if (lseek(strm->fd, strm->devpos, + SEEK_SET) != strm->devpos) + return -EIO; + goto setupone; + } + } + strm->devpos = 0; + strm->fd = erofs_tmpfile(); + if (strm->fd < 0) + return -ENOSPC; +setupone: + strm->tailoffset = 0; + strm->count = 1; + if (fstat(strm->fd, &st)) + return -errno; + strm->alignsize = max_t(u32, st.st_blksize, getpagesize()); + } + return 0; +} + +void erofs_diskbuf_exit(void) +{ + struct erofs_diskbufstrm *strm; + + if (!dbufstrm) + return; + + for (strm = dbufstrm; strm->fd >= 0; ++strm) { + DBG_BUGON(strm->count != 1); + + close(strm->fd); + strm->fd = -1; + } +} diff --git a/lib/fragments.c b/lib/fragments.c index 0366c82..d4f6be1 100644 --- a/lib/fragments.c +++ b/lib/fragments.c @@ -38,9 +38,8 @@ struct erofs_fragment_dedupe_item { #define FRAGMENT_HASH(c) ((c) & (FRAGMENT_HASHSIZE - 1)) static struct list_head dupli_frags[FRAGMENT_HASHSIZE]; - static FILE *packedfile; -const char *frags_packedname = "packed_file"; +const char *erofs_frags_packedname = "packed_file"; #ifndef HAVE_LSEEK64 #define erofs_lseek64 lseek @@ -195,15 +194,16 @@ static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len, return 0; } -static void z_erofs_fragments_dedupe_init(void) +int z_erofs_fragments_init(void) { unsigned int i; for (i = 0; i < FRAGMENT_HASHSIZE; ++i) init_list_head(&dupli_frags[i]); + return 0; } -static void z_erofs_fragments_dedupe_exit(void) +void z_erofs_fragments_exit(void) { struct erofs_fragment_dedupe_item *di, *n; struct list_head *head; @@ -226,10 +226,10 @@ void z_erofs_fragments_commit(struct erofs_inode *inode) * will be recorded by switching to the noncompact layout anyway. */ if (inode->fragmentoff >> 32) - inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY; + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; - erofs_sb_set_fragments(); + erofs_sb_set_fragments(inode->sbi); } int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, @@ -324,23 +324,21 @@ int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, return len; } -struct erofs_inode *erofs_mkfs_build_fragments(void) +struct erofs_inode *erofs_mkfs_build_packedfile(void) { fflush(packedfile); return erofs_mkfs_build_special_from_fd(fileno(packedfile), - frags_packedname); + EROFS_PACKED_INODE); } -void erofs_fragments_exit(void) +void erofs_packedfile_exit(void) { if (packedfile) fclose(packedfile); - - z_erofs_fragments_dedupe_exit(); } -int erofs_fragments_init(void) +FILE *erofs_packedfile_init(void) { #ifdef HAVE_TMPFILE64 packedfile = tmpfile64(); @@ -348,8 +346,6 @@ int erofs_fragments_init(void) packedfile = tmpfile(); #endif if (!packedfile) - return -ENOMEM; - - z_erofs_fragments_dedupe_init(); - return 0; + return ERR_PTR(-ENOMEM); + return packedfile; } diff --git a/lib/hashmap.c b/lib/hashmap.c index e11bd8d..45916ae 100644 --- a/lib/hashmap.c +++ b/lib/hashmap.c @@ -149,20 +149,21 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, alloc_table(map, size); } -void hashmap_free(struct hashmap *map, int free_entries) +int hashmap_free(struct hashmap *map) { - if (!map || !map->table) - return; - if (free_entries) { + if (map && map->table) { struct hashmap_iter iter; struct hashmap_entry *e; hashmap_iter_init(map, &iter); - while ((e = hashmap_iter_next(&iter))) - free(e); + e = hashmap_iter_next(&iter); + if (e) + return -EBUSY; + + free(map->table); + memset(map, 0, sizeof(*map)); } - free(map->table); - memset(map, 0, sizeof(*map)); + return 0; } void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata) @@ -194,10 +195,13 @@ void hashmap_add(struct hashmap *map, void *entry) rehash(map, map->tablesize << HASHMAP_RESIZE_BITS); } -void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) +void *hashmap_remove(struct hashmap *map, const void *entry) { struct hashmap_entry *old; - struct hashmap_entry **e = find_entry_ptr(map, key, keydata); + struct hashmap_entry **e = &map->table[bucket(map, entry)]; + + while (*e && *e != entry) + e = &(*e)->next; if (!*e) return NULL; @@ -214,14 +218,6 @@ void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) return old; } -void *hashmap_put(struct hashmap *map, void *entry) -{ - struct hashmap_entry *old = hashmap_remove(map, entry, NULL); - - hashmap_add(map, entry); - return old; -} - void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter) { iter->map = map; diff --git a/lib/inode.c b/lib/inode.c index bcb0986..fb062a1 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -16,6 +16,7 @@ #endif #include <dirent.h> #include "erofs/print.h" +#include "erofs/diskbuf.h" #include "erofs/inode.h" #include "erofs/cache.h" #include "erofs/io.h" @@ -75,10 +76,10 @@ void erofs_inode_manager_init(void) init_list_head(&inode_hashtable[i]); } -static struct erofs_inode *erofs_igrab(struct erofs_inode *inode) +void erofs_insert_ihash(struct erofs_inode *inode, dev_t dev, ino_t ino) { - ++inode->i_count; - return inode; + list_add(&inode->i_hash, + &inode_hashtable[(ino ^ dev) % NR_INODE_HASHTABLE]); } /* get the inode from the (source) inode # */ @@ -121,6 +122,12 @@ unsigned int erofs_iput(struct erofs_inode *inode) list_del(&inode->i_hash); if (inode->i_srcpath) free(inode->i_srcpath); + if (inode->with_diskbuf) { + erofs_diskbuf_close(inode->i_diskbuf); + free(inode->i_diskbuf); + } else if (inode->i_link) { + free(inode->i_link); + } free(inode); return 0; } @@ -142,7 +149,8 @@ struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent, /* allocate main data for a inode */ static int __allocate_inode_bh_data(struct erofs_inode *inode, - unsigned long nblocks) + unsigned long nblocks, + int type) { struct erofs_buffer_head *bh; int ret; @@ -154,7 +162,7 @@ static int __allocate_inode_bh_data(struct erofs_inode *inode, } /* allocate main data buffer */ - bh = erofs_balloc(DATA, blknr_to_addr(nblocks), 0, 0); + bh = erofs_balloc(type, erofs_pos(inode->sbi, nblocks), 0, 0); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -179,28 +187,14 @@ static int comp_subdir(const void *a, const void *b) return strcmp(da->name, db->name); } -int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) +static int erofs_prepare_dir_layout(struct erofs_inode *dir, + unsigned int nr_subdirs) { + struct erofs_sb_info *sbi = dir->sbi; struct erofs_dentry *d, *n, **sorted_d; - unsigned int d_size, i_nlink, i; - int ret; - - /* dot is pointed to the current dir inode */ - d = erofs_d_alloc(dir, "."); - if (IS_ERR(d)) - return PTR_ERR(d); - d->inode = erofs_igrab(dir); - d->type = EROFS_FT_DIR; - - /* dotdot is pointed to the parent dir */ - d = erofs_d_alloc(dir, ".."); - if (IS_ERR(d)) - return PTR_ERR(d); - d->inode = erofs_igrab(dir->i_parent); - d->type = EROFS_FT_DIR; + unsigned int i; + unsigned int d_size = 0; - /* sort subdirs */ - nr_subdirs += 2; sorted_d = malloc(nr_subdirs * sizeof(d)); if (!sorted_d) return -ENOMEM; @@ -215,40 +209,57 @@ int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs); free(sorted_d); - /* let's calculate dir size and update i_nlink */ - d_size = 0; - i_nlink = 0; + /* let's calculate dir size */ list_for_each_entry(d, &dir->i_subdirs, d_child) { int len = strlen(d->name) + sizeof(struct erofs_dirent); - if (d_size % EROFS_BLKSIZ + len > EROFS_BLKSIZ) - d_size = round_up(d_size, EROFS_BLKSIZ); + if (erofs_blkoff(sbi, d_size) + len > erofs_blksiz(sbi)) + d_size = round_up(d_size, erofs_blksiz(sbi)); d_size += len; - - i_nlink += (d->type == EROFS_FT_DIR); } dir->i_size = d_size; - /* - * if there're too many subdirs as compact form, set nlink=1 - * rather than upgrade to use extented form instead. - */ - if (i_nlink > USHRT_MAX && - dir->inode_isize == sizeof(struct erofs_inode_compact)) - dir->i_nlink = 1; - else - dir->i_nlink = i_nlink; /* no compression for all dirs */ dir->datalayout = EROFS_INODE_FLAT_INLINE; - /* allocate dir main data */ - ret = __allocate_inode_bh_data(dir, erofs_blknr(d_size)); + /* it will be used in erofs_prepare_inode_buffer */ + dir->idata_size = d_size % erofs_blksiz(sbi); + return 0; +} + +int erofs_init_empty_dir(struct erofs_inode *dir) +{ + struct erofs_dentry *d; + + /* dot is pointed to the current dir inode */ + d = erofs_d_alloc(dir, "."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(dir); + d->type = EROFS_FT_DIR; + + /* dotdot is pointed to the parent dir */ + d = erofs_d_alloc(dir, ".."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(dir->i_parent); + d->type = EROFS_FT_DIR; + + dir->i_nlink = 2; + return 0; +} + +int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) +{ + int ret; + + ret = erofs_init_empty_dir(dir); if (ret) return ret; - /* it will be used in erofs_prepare_inode_buffer */ - dir->idata_size = d_size % EROFS_BLKSIZ; - return 0; + /* sort subdirs */ + nr_subdirs += 2; + return erofs_prepare_dir_layout(dir, nr_subdirs); } static void fill_dirblock(char *buf, unsigned int size, unsigned int q, @@ -275,13 +286,42 @@ static void fill_dirblock(char *buf, unsigned int size, unsigned int q, memset(buf + q, 0, size - q); } -static int write_dirblock(unsigned int q, struct erofs_dentry *head, +static int write_dirblock(struct erofs_sb_info *sbi, + unsigned int q, struct erofs_dentry *head, struct erofs_dentry *end, erofs_blk_t blkaddr) { - char buf[EROFS_BLKSIZ]; + char buf[EROFS_MAX_BLOCK_SIZE]; - fill_dirblock(buf, EROFS_BLKSIZ, q, head, end); - return blk_write(buf, blkaddr, 1); + fill_dirblock(buf, erofs_blksiz(sbi), q, head, end); + return blk_write(sbi, buf, blkaddr, 1); +} + +erofs_nid_t erofs_lookupnid(struct erofs_inode *inode) +{ + struct erofs_buffer_head *const bh = inode->bh; + struct erofs_sb_info *sbi = inode->sbi; + erofs_off_t off, meta_offset; + + if (!bh || (long long)inode->nid > 0) + return inode->nid; + + erofs_mapbh(bh->block); + off = erofs_btell(bh, false); + + meta_offset = erofs_pos(sbi, sbi->meta_blkaddr); + DBG_BUGON(off < meta_offset); + inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS; + erofs_dbg("Assign nid %llu to file %s (mode %05o)", + inode->nid, inode->i_srcpath, inode->i_mode); + return inode->nid; +} + +static void erofs_d_invalidate(struct erofs_dentry *d) +{ + struct erofs_inode *const inode = d->inode; + + d->nid = erofs_lookupnid(inode); + erofs_iput(inode); } static int erofs_write_dir_file(struct erofs_inode *dir) @@ -289,18 +329,25 @@ static int erofs_write_dir_file(struct erofs_inode *dir) struct erofs_dentry *head = list_first_entry(&dir->i_subdirs, struct erofs_dentry, d_child); + struct erofs_sb_info *sbi = dir->sbi; struct erofs_dentry *d; int ret; unsigned int q, used, blkno; q = used = blkno = 0; + /* allocate dir main data */ + ret = __allocate_inode_bh_data(dir, erofs_blknr(sbi, dir->i_size), DIRA); + if (ret) + return ret; + list_for_each_entry(d, &dir->i_subdirs, d_child) { const unsigned int len = strlen(d->name) + sizeof(struct erofs_dirent); - if (used + len > EROFS_BLKSIZ) { - ret = write_dirblock(q, head, d, + erofs_d_invalidate(d); + if (used + len > erofs_blksiz(sbi)) { + ret = write_dirblock(sbi, q, head, d, dir->u.i_blkaddr + blkno); if (ret) return ret; @@ -313,13 +360,13 @@ static int erofs_write_dir_file(struct erofs_inode *dir) q += sizeof(struct erofs_dirent); } - DBG_BUGON(used > EROFS_BLKSIZ); - if (used == EROFS_BLKSIZ) { - DBG_BUGON(dir->i_size % EROFS_BLKSIZ); + DBG_BUGON(used > erofs_blksiz(sbi)); + if (used == erofs_blksiz(sbi)) { + DBG_BUGON(dir->i_size % erofs_blksiz(sbi)); DBG_BUGON(dir->idata_size); - return write_dirblock(q, head, d, dir->u.i_blkaddr + blkno); + return write_dirblock(sbi, q, head, d, dir->u.i_blkaddr + blkno); } - DBG_BUGON(used != dir->i_size % EROFS_BLKSIZ); + DBG_BUGON(used != dir->i_size % erofs_blksiz(sbi)); if (used) { /* fill tail-end dir block */ dir->idata = malloc(used); @@ -331,25 +378,26 @@ static int erofs_write_dir_file(struct erofs_inode *dir) return 0; } -static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) +int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) { - const unsigned int nblocks = erofs_blknr(inode->i_size); + struct erofs_sb_info *sbi = inode->sbi; + const unsigned int nblocks = erofs_blknr(sbi, inode->i_size); int ret; inode->datalayout = EROFS_INODE_FLAT_INLINE; - ret = __allocate_inode_bh_data(inode, nblocks); + ret = __allocate_inode_bh_data(inode, nblocks, DATA); if (ret) return ret; if (nblocks) - blk_write(buf, inode->u.i_blkaddr, nblocks); - inode->idata_size = inode->i_size % EROFS_BLKSIZ; + blk_write(sbi, buf, inode->u.i_blkaddr, nblocks); + inode->idata_size = inode->i_size % erofs_blksiz(sbi); if (inode->idata_size) { inode->idata = malloc(inode->idata_size); if (!inode->idata) return -ENOMEM; - memcpy(inode->idata, buf + blknr_to_addr(nblocks), + memcpy(inode->idata, buf + erofs_pos(sbi, nblocks), inode->idata_size); } return 0; @@ -367,31 +415,32 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) { int ret; unsigned int nblocks, i; + struct erofs_sb_info *sbi = inode->sbi; inode->datalayout = EROFS_INODE_FLAT_INLINE; - nblocks = inode->i_size / EROFS_BLKSIZ; + nblocks = inode->i_size / erofs_blksiz(sbi); - ret = __allocate_inode_bh_data(inode, nblocks); + ret = __allocate_inode_bh_data(inode, nblocks, DATA); if (ret) return ret; for (i = 0; i < nblocks; ++i) { - char buf[EROFS_BLKSIZ]; + char buf[EROFS_MAX_BLOCK_SIZE]; - ret = read(fd, buf, EROFS_BLKSIZ); - if (ret != EROFS_BLKSIZ) { + ret = read(fd, buf, erofs_blksiz(sbi)); + if (ret != erofs_blksiz(sbi)) { if (ret < 0) return -errno; return -EAGAIN; } - ret = blk_write(buf, inode->u.i_blkaddr + i, 1); + ret = blk_write(sbi, buf, inode->u.i_blkaddr + i, 1); if (ret) return ret; } /* read the tail-end data */ - inode->idata_size = inode->i_size % EROFS_BLKSIZ; + inode->idata_size = inode->i_size % erofs_blksiz(sbi); if (inode->idata_size) { inode->idata = malloc(inode->idata_size); if (!inode->idata) @@ -408,14 +457,11 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) return 0; } -static int erofs_write_file(struct erofs_inode *inode) +int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos) { - int ret, fd; + int ret; - if (!inode->i_size) { - inode->datalayout = EROFS_INODE_FLAT_PLAIN; - return 0; - } + DBG_BUGON(!inode->i_size); if (cfg.c_chunkbits) { inode->u.chunkbits = cfg.c_chunkbits; @@ -423,33 +469,27 @@ static int erofs_write_file(struct erofs_inode *inode) inode->u.chunkformat = 0; if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES) inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES; - return erofs_blob_write_chunked_file(inode); + return erofs_blob_write_chunked_file(inode, fd, fpos); } if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) { - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; ret = erofs_write_compressed_file(inode, fd); - close(fd); - if (!ret || ret != -ENOSPC) return ret; + + ret = lseek(fd, fpos, SEEK_SET); + if (ret < 0) + return -errno; } /* fallback to all data uncompressed */ - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; - - ret = write_uncompressed_file_from_fd(inode, fd); - close(fd); - return ret; + return write_uncompressed_file_from_fd(inode, fd); } static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh) { struct erofs_inode *const inode = bh->fsprivate; + struct erofs_sb_info *sbi = inode->sbi; const u16 icount = EROFS_INODE_XATTR_ICOUNT(inode->xattr_isize); erofs_off_t off = erofs_btell(bh, false); union { @@ -536,18 +576,18 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh) BUG_ON(1); } - ret = dev_write(&u, off, inode->inode_isize); + ret = dev_write(sbi, &u, off, inode->inode_isize); if (ret) return false; off += inode->inode_isize; if (inode->xattr_isize) { - char *xattrs = erofs_export_xattr_ibody(&inode->i_xattrs, - inode->xattr_isize); + char *xattrs = erofs_export_xattr_ibody(inode); + if (IS_ERR(xattrs)) return false; - ret = dev_write(xattrs, off, inode->xattr_isize); + ret = dev_write(sbi, xattrs, off, inode->xattr_isize); free(xattrs); if (ret) return false; @@ -562,8 +602,8 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh) return false; } else { /* write compression metadata */ - off = Z_EROFS_VLE_EXTENT_ALIGN(off); - ret = dev_write(inode->compressmeta, off, + off = roundup(off, 8); + ret = dev_write(sbi, inode->compressmeta, off, inode->extent_isize); if (ret) return false; @@ -582,6 +622,7 @@ static struct erofs_bhops erofs_write_inode_bhops = { static int erofs_prepare_tail_block(struct erofs_inode *inode) { + struct erofs_sb_info *sbi = inode->sbi; struct erofs_buffer_head *bh; int ret; @@ -589,23 +630,16 @@ static int erofs_prepare_tail_block(struct erofs_inode *inode) return 0; bh = inode->bh_data; - if (!bh) { - bh = erofs_balloc(DATA, EROFS_BLKSIZ, 0, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - bh->op = &erofs_skip_write_bhops; - - /* get blkaddr of bh */ - ret = erofs_mapbh(bh->block); - DBG_BUGON(ret < 0); - inode->u.i_blkaddr = bh->block->blkaddr; - - inode->bh_data = bh; - return 0; + if (bh) { + /* expend a block as the tail block (should be successful) */ + ret = erofs_bh_balloon(bh, erofs_blksiz(sbi)); + if (ret != erofs_blksiz(sbi)) { + DBG_BUGON(1); + return -EIO; + } + } else { + inode->lazy_tailblock = true; } - /* expend a block as the tail block (should be successful) */ - ret = erofs_bh_balloon(bh, EROFS_BLKSIZ); - DBG_BUGON(ret != EROFS_BLKSIZ); return 0; } @@ -618,15 +652,14 @@ static int erofs_prepare_inode_buffer(struct erofs_inode *inode) inodesize = inode->inode_isize + inode->xattr_isize; if (inode->extent_isize) - inodesize = Z_EROFS_VLE_EXTENT_ALIGN(inodesize) + - inode->extent_isize; + inodesize = roundup(inodesize, 8) + inode->extent_isize; /* TODO: tailpacking inline of chunk-based format isn't finalized */ if (inode->datalayout == EROFS_INODE_CHUNK_BASED) goto noinline; if (!is_inode_layout_compression(inode)) { - if (cfg.c_noinline_data && S_ISREG(inode->i_mode)) { + if (!cfg.c_inline_data && S_ISREG(inode->i_mode)) { inode->datalayout = EROFS_INODE_FLAT_PLAIN; goto noinline; } @@ -663,7 +696,7 @@ noinline: erofs_dbg("Inline %scompressed data (%u bytes) to %s", inode->compressed_idata ? "" : "un", inode->idata_size, inode->i_srcpath); - erofs_sb_set_ztailpacking(); + erofs_sb_set_ztailpacking(inode->sbi); } else { inode->datalayout = EROFS_INODE_FLAT_INLINE; erofs_dbg("Inline tail-end data (%u bytes) to %s", @@ -691,7 +724,7 @@ static bool erofs_bh_flush_write_inline(struct erofs_buffer_head *bh) const erofs_off_t off = erofs_btell(bh, false); int ret; - ret = dev_write(inode->idata, off, inode->idata_size); + ret = dev_write(inode->sbi, inode->idata, off, inode->idata_size); if (ret) return false; @@ -709,6 +742,7 @@ static struct erofs_bhops erofs_write_inline_bhops = { static int erofs_write_tail_end(struct erofs_inode *inode) { + struct erofs_sb_info *sbi = inode->sbi; struct erofs_buffer_head *bh, *ibh; bh = inode->bh_data; @@ -728,25 +762,47 @@ static int erofs_write_tail_end(struct erofs_inode *inode) int ret; erofs_off_t pos, zero_pos; - erofs_mapbh(bh->block); - pos = erofs_btell(bh, true) - EROFS_BLKSIZ; + if (!bh) { + bh = erofs_balloc(DATA, erofs_blksiz(sbi), 0, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); + bh->op = &erofs_skip_write_bhops; + + /* get blkaddr of bh */ + ret = erofs_mapbh(bh->block); + inode->u.i_blkaddr = bh->block->blkaddr; + inode->bh_data = bh; + } else { + if (inode->lazy_tailblock) { + /* expend a tail block (should be successful) */ + ret = erofs_bh_balloon(bh, erofs_blksiz(sbi)); + if (ret != erofs_blksiz(sbi)) { + DBG_BUGON(1); + return -EIO; + } + inode->lazy_tailblock = false; + } + ret = erofs_mapbh(bh->block); + } + DBG_BUGON(ret < 0); + pos = erofs_btell(bh, true) - erofs_blksiz(sbi); /* 0'ed data should be padded at head for 0padding conversion */ - if (erofs_sb_has_lz4_0padding() && inode->compressed_idata) { + if (erofs_sb_has_lz4_0padding(sbi) && inode->compressed_idata) { zero_pos = pos; - pos += EROFS_BLKSIZ - inode->idata_size; + pos += erofs_blksiz(sbi) - inode->idata_size; } else { /* pad 0'ed data for the other cases */ zero_pos = pos + inode->idata_size; } - ret = dev_write(inode->idata, pos, inode->idata_size); + ret = dev_write(sbi, inode->idata, pos, inode->idata_size); if (ret) return ret; - DBG_BUGON(inode->idata_size > EROFS_BLKSIZ); - if (inode->idata_size < EROFS_BLKSIZ) { - ret = dev_fillzero(zero_pos, - EROFS_BLKSIZ - inode->idata_size, + DBG_BUGON(inode->idata_size > erofs_blksiz(sbi)); + if (inode->idata_size < erofs_blksiz(sbi)) { + ret = dev_fillzero(sbi, zero_pos, + erofs_blksiz(sbi) - inode->idata_size, false); if (ret) return ret; @@ -755,7 +811,7 @@ static int erofs_write_tail_end(struct erofs_inode *inode) free(inode->idata); inode->idata = NULL; - erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(pos)); + erofs_droid_blocklist_write_tail_end(inode, erofs_blknr(sbi, pos)); } out: /* now bh_data can drop directly */ @@ -784,14 +840,14 @@ static bool erofs_should_use_inode_extended(struct erofs_inode *inode) return true; if (inode->i_nlink > USHRT_MAX) return true; - if ((inode->i_mtime != sbi.build_time || - inode->i_mtime_nsec != sbi.build_time_nsec) && + if ((inode->i_mtime != inode->sbi->build_time || + inode->i_mtime_nsec != inode->sbi->build_time_nsec) && !cfg.c_ignore_mtime) return true; return false; } -static u32 erofs_new_encode_dev(dev_t dev) +u32 erofs_new_encode_dev(dev_t dev) { const unsigned int major = major(dev); const unsigned int minor = minor(dev); @@ -856,14 +912,15 @@ static int erofs_droid_inode_fsconfig(struct erofs_inode *inode, } #endif -static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, - const char *path) +int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st, + const char *path) { int err = erofs_droid_inode_fsconfig(inode, st, path); + struct erofs_sb_info *sbi = inode->sbi; if (err) return err; - inode->i_mode = st->st_mode; + inode->i_uid = cfg.c_uid == -1 ? st->st_uid : cfg.c_uid; inode->i_gid = cfg.c_gid == -1 ? st->st_gid : cfg.c_gid; @@ -880,14 +937,27 @@ static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, switch (cfg.c_timeinherit) { case TIMESTAMP_CLAMPING: - if (inode->i_mtime < sbi.build_time) + if (inode->i_mtime < sbi->build_time) break; case TIMESTAMP_FIXED: - inode->i_mtime = sbi.build_time; - inode->i_mtime_nsec = sbi.build_time_nsec; + inode->i_mtime = sbi->build_time; + inode->i_mtime_nsec = sbi->build_time_nsec; default: break; } + + return 0; +} + +static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, + const char *path) +{ + int err = __erofs_fill_inode(inode, st, path); + + if (err) + return err; + + inode->i_mode = st->st_mode; inode->i_nlink = 1; /* fix up later if needed */ switch (inode->i_mode & S_IFMT) { @@ -911,8 +981,10 @@ static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, if (!inode->i_srcpath) return -ENOMEM; - inode->dev = st->st_dev; - inode->i_ino[1] = st->st_ino; + if (!S_ISDIR(inode->i_mode)) { + inode->dev = st->st_dev; + inode->i_ino[1] = st->st_ino; + } if (erofs_should_use_inode_extended(inode)) { if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) { @@ -925,13 +997,11 @@ static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, inode->inode_isize = sizeof(struct erofs_inode_compact); } - list_add(&inode->i_hash, - &inode_hashtable[(st->st_ino ^ st->st_dev) % - NR_INODE_HASHTABLE]); + erofs_insert_ihash(inode, st->st_dev, st->st_ino); return 0; } -static struct erofs_inode *erofs_new_inode(void) +struct erofs_inode *erofs_new_inode(void) { struct erofs_inode *inode; @@ -939,9 +1009,12 @@ static struct erofs_inode *erofs_new_inode(void) if (!inode) return ERR_PTR(-ENOMEM); + inode->sbi = &sbi; inode->i_ino[0] = sbi.inos++; /* inode serial number */ inode->i_count = 1; + inode->datalayout = EROFS_INODE_FLAT_PLAIN; + init_list_head(&inode->i_hash); init_list_head(&inode->i_subdirs); init_list_head(&inode->i_xattrs); return inode; @@ -990,87 +1063,72 @@ static void erofs_fixup_meta_blkaddr(struct erofs_inode *rootdir) { const erofs_off_t rootnid_maxoffset = 0xffff << EROFS_ISLOTBITS; struct erofs_buffer_head *const bh = rootdir->bh; + struct erofs_sb_info *sbi = rootdir->sbi; erofs_off_t off, meta_offset; erofs_mapbh(bh->block); off = erofs_btell(bh, false); if (off > rootnid_maxoffset) - meta_offset = round_up(off - rootnid_maxoffset, EROFS_BLKSIZ); + meta_offset = round_up(off - rootnid_maxoffset, erofs_blksiz(sbi)); else meta_offset = 0; - sbi.meta_blkaddr = erofs_blknr(meta_offset); + sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset); rootdir->nid = (off - meta_offset) >> EROFS_ISLOTBITS; } -erofs_nid_t erofs_lookupnid(struct erofs_inode *inode) -{ - struct erofs_buffer_head *const bh = inode->bh; - erofs_off_t off, meta_offset; - - if (!bh) - return inode->nid; - - erofs_mapbh(bh->block); - off = erofs_btell(bh, false); - - meta_offset = blknr_to_addr(sbi.meta_blkaddr); - DBG_BUGON(off < meta_offset); - return inode->nid = (off - meta_offset) >> EROFS_ISLOTBITS; -} - -static void erofs_d_invalidate(struct erofs_dentry *d) -{ - struct erofs_inode *const inode = d->inode; - - d->nid = erofs_lookupnid(inode); - erofs_iput(inode); -} - -static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir) +static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs) { int ret; DIR *_dir; struct dirent *dp; struct erofs_dentry *d; - unsigned int nr_subdirs; + unsigned int nr_subdirs, i_nlink; + + ret = erofs_scan_file_xattrs(dir); + if (ret < 0) + return ret; ret = erofs_prepare_xattr_ibody(dir); if (ret < 0) - return ERR_PTR(ret); + return ret; if (!S_ISDIR(dir->i_mode)) { if (S_ISLNK(dir->i_mode)) { char *const symlink = malloc(dir->i_size); if (!symlink) - return ERR_PTR(-ENOMEM); + return -ENOMEM; ret = readlink(dir->i_srcpath, symlink, dir->i_size); if (ret < 0) { free(symlink); - return ERR_PTR(-errno); + return -errno; } - ret = erofs_write_file_from_buffer(dir, symlink); free(symlink); - if (ret) - return ERR_PTR(ret); + } else if (dir->i_size) { + int fd = open(dir->i_srcpath, O_RDONLY | O_BINARY); + if (fd < 0) + return -errno; + + ret = erofs_write_file(dir, fd, 0); + close(fd); } else { - ret = erofs_write_file(dir); - if (ret) - return ERR_PTR(ret); + ret = 0; } + if (ret) + return ret; erofs_prepare_inode_buffer(dir); erofs_write_tail_end(dir); - return dir; + return 0; } _dir = opendir(dir->i_srcpath); if (!_dir) { erofs_err("failed to opendir at %s: %s", dir->i_srcpath, erofs_strerror(errno)); - return ERR_PTR(-errno); + return -errno; } nr_subdirs = 0; @@ -1097,10 +1155,6 @@ static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir) goto err_closedir; } nr_subdirs++; - - /* to count i_nlink for directories */ - d->type = (dp->d_type == DT_DIR ? - EROFS_FT_DIR : EROFS_FT_UNKNOWN); } if (errno) { @@ -1111,21 +1165,24 @@ static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir) ret = erofs_prepare_dir_file(dir, nr_subdirs); if (ret) - goto err; + return ret; ret = erofs_prepare_inode_buffer(dir); if (ret) - goto err; + return ret; + dir->bh->op = &erofs_skip_write_bhops; if (IS_ROOT(dir)) erofs_fixup_meta_blkaddr(dir); + i_nlink = 0; list_for_each_entry(d, &dir->i_subdirs, d_child) { - char buf[PATH_MAX], *trimmed; + char buf[PATH_MAX]; unsigned char ftype; + struct erofs_inode *inode; if (is_dot_dotdot(d->name)) { - erofs_d_invalidate(d); + ++i_nlink; continue; } @@ -1136,59 +1193,98 @@ static struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir) goto fail; } - trimmed = erofs_trim_for_progressinfo(erofs_fspath(buf), - sizeof("Processing ...") - 1); - erofs_update_progressinfo("Processing %s ...", trimmed); - free(trimmed); - d->inode = erofs_mkfs_build_tree_from_path(dir, buf); - if (IS_ERR(d->inode)) { - ret = PTR_ERR(d->inode); + inode = erofs_iget_from_path(buf, true); + + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); fail: d->inode = NULL; d->type = EROFS_FT_UNKNOWN; - goto err; + return ret; } - ftype = erofs_mode_to_ftype(d->inode->i_mode); - DBG_BUGON(ftype == EROFS_FT_DIR && d->type != ftype); + /* a hardlink to the existed inode */ + if (inode->i_parent) { + ++inode->i_nlink; + } else { + inode->i_parent = dir; + erofs_igrab(inode); + list_add_tail(&inode->i_subdirs, dirs); + ++dir->subdirs_queued; + } + ftype = erofs_mode_to_ftype(inode->i_mode); + i_nlink += (ftype == EROFS_FT_DIR); + d->inode = inode; d->type = ftype; - - erofs_d_invalidate(d); - erofs_info("add file %s/%s (nid %llu, type %u)", - dir->i_srcpath, d->name, (unsigned long long)d->nid, - d->type); + erofs_info("file %s/%s dumped (type %u)", + dir->i_srcpath, d->name, d->type); } - erofs_write_dir_file(dir); - erofs_write_tail_end(dir); - return dir; + /* + * if there're too many subdirs as compact form, set nlink=1 + * rather than upgrade to use extented form instead. + */ + if (i_nlink > USHRT_MAX && + dir->inode_isize == sizeof(struct erofs_inode_compact)) + dir->i_nlink = 1; + else + dir->i_nlink = i_nlink; + return 0; err_closedir: closedir(_dir); -err: - return ERR_PTR(ret); + return ret; } -struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent, - const char *path) +static void erofs_mkfs_dump_directory(struct erofs_inode *dir) { - struct erofs_inode *const inode = erofs_iget_from_path(path, true); + erofs_write_dir_file(dir); + erofs_write_tail_end(dir); + dir->bh->op = &erofs_write_inode_bhops; +} - if (IS_ERR(inode)) - return inode; +struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path) +{ + LIST_HEAD(dirs); + struct erofs_inode *inode, *root, *parent; - /* a hardlink to the existed inode */ - if (inode->i_parent) { - ++inode->i_nlink; - return inode; - } + root = erofs_iget_from_path(path, true); + if (IS_ERR(root)) + return root; - /* a completely new inode is found */ - if (parent) - inode->i_parent = parent; - else - inode->i_parent = inode; /* rootdir mark */ + (void)erofs_igrab(root); + root->i_parent = root; /* rootdir mark */ + root->subdirs_queued = 1; + list_add(&root->i_subdirs, &dirs); + + do { + int err; + char *trimmed; + + inode = list_first_entry(&dirs, struct erofs_inode, i_subdirs); + list_del(&inode->i_subdirs); + init_list_head(&inode->i_subdirs); + + trimmed = erofs_trim_for_progressinfo( + erofs_fspath(inode->i_srcpath), + sizeof("Processing ...") - 1); + erofs_update_progressinfo("Processing %s ...", trimmed); + free(trimmed); + + err = erofs_mkfs_build_tree(inode, &dirs); + if (err) { + root = ERR_PTR(err); + break; + } + parent = inode->i_parent; - return erofs_mkfs_build_tree(inode); + DBG_BUGON(!parent->subdirs_queued); + if (S_ISDIR(inode->i_mode) && !inode->subdirs_queued) + erofs_mkfs_dump_directory(inode); + if (!--parent->subdirs_queued) + erofs_mkfs_dump_directory(parent); + erofs_iput(inode); + } while (!list_empty(&dirs)); + return root; } struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name) @@ -1221,8 +1317,8 @@ struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name) } if (name == EROFS_PACKED_INODE) { - sbi.packed_nid = EROFS_PACKED_NID_UNALLOCATED; - inode->nid = sbi.packed_nid; + inode->sbi->packed_nid = EROFS_PACKED_NID_UNALLOCATED; + inode->nid = inode->sbi->packed_nid; } ret = erofs_write_compressed_file(inode, fd); @@ -1242,3 +1338,102 @@ struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name) erofs_write_tail_end(inode); return inode; } + +int erofs_rebuild_dump_tree(struct erofs_inode *dir) +{ + struct erofs_dentry *d, *n; + unsigned int nr_subdirs; + int ret; + + if (erofs_should_use_inode_extended(dir)) { + if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) { + erofs_err("file %s cannot be in compact form", + dir->i_srcpath); + return -EINVAL; + } + dir->inode_isize = sizeof(struct erofs_inode_extended); + } else { + dir->inode_isize = sizeof(struct erofs_inode_compact); + } + + /* strip all unnecessary overlayfs xattrs when ovlfs_strip is enabled */ + if (cfg.c_ovlfs_strip) + erofs_clear_opaque_xattr(dir); + else if (dir->whiteouts) + erofs_set_origin_xattr(dir); + + ret = erofs_prepare_xattr_ibody(dir); + if (ret < 0) + return ret; + + if (!S_ISDIR(dir->i_mode)) { + if (dir->bh) + return 0; + if (S_ISLNK(dir->i_mode)) { + ret = erofs_write_file_from_buffer(dir, dir->i_link); + free(dir->i_link); + dir->i_link = NULL; + } else if (dir->with_diskbuf) { + u64 fpos; + + ret = erofs_diskbuf_getfd(dir->i_diskbuf, &fpos); + if (ret >= 0) + ret = erofs_write_file(dir, ret, fpos); + erofs_diskbuf_close(dir->i_diskbuf); + free(dir->i_diskbuf); + dir->i_diskbuf = NULL; + dir->with_diskbuf = false; + } else { + ret = 0; + } + if (ret) + return ret; + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + erofs_write_tail_end(dir); + return 0; + } + + nr_subdirs = 0; + list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) { + if (cfg.c_ovlfs_strip && erofs_inode_is_whiteout(d->inode)) { + erofs_dbg("remove whiteout %s", d->inode->i_srcpath); + list_del(&d->d_child); + erofs_d_invalidate(d); + free(d); + continue; + } + ++nr_subdirs; + } + + ret = erofs_prepare_dir_layout(dir, nr_subdirs); + if (ret) + return ret; + + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + dir->bh->op = &erofs_skip_write_bhops; + + if (IS_ROOT(dir)) + erofs_fixup_meta_blkaddr(dir); + + list_for_each_entry(d, &dir->i_subdirs, d_child) { + struct erofs_inode *inode; + + if (is_dot_dotdot(d->name)) + continue; + + inode = erofs_igrab(d->inode); + ret = erofs_rebuild_dump_tree(inode); + dir->i_nlink += (erofs_mode_to_ftype(inode->i_mode) == EROFS_FT_DIR); + erofs_iput(inode); + if (ret) + return ret; + } + erofs_write_dir_file(dir); + erofs_write_tail_end(dir); + dir->bh->op = &erofs_write_inode_bhops; + return 0; +} @@ -10,6 +10,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <stdlib.h> #include <sys/stat.h> #include <sys/ioctl.h> #include "erofs/io.h" @@ -19,16 +20,13 @@ #ifdef HAVE_LINUX_FALLOC_H #include <linux/falloc.h> #endif - +#ifdef HAVE_SYS_STATFS_H +#include <sys/statfs.h> +#endif #define EROFS_MODNAME "erofs_io" #include "erofs/print.h" -static const char *erofs_devname; -int erofs_devfd = -1; -static u64 erofs_devsz; -static unsigned int erofs_nblobs, erofs_blobfd[256]; - -int dev_get_blkdev_size(int fd, u64 *bytes) +static int dev_get_blkdev_size(int fd, u64 *bytes) { errno = ENOTSUP; #ifdef BLKGETSIZE64 @@ -48,19 +46,25 @@ int dev_get_blkdev_size(int fd, u64 *bytes) return -errno; } -void dev_close(void) +void dev_close(struct erofs_sb_info *sbi) { - close(erofs_devfd); - erofs_devname = NULL; - erofs_devfd = -1; - erofs_devsz = 0; + close(sbi->devfd); + free(sbi->devname); + sbi->devname = NULL; + sbi->devfd = -1; + sbi->devsz = 0; } -int dev_open(const char *dev) +int dev_open(struct erofs_sb_info *sbi, const char *dev) { struct stat st; int fd, ret; +#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS) + bool again = false; + +repeat: +#endif fd = open(dev, O_RDWR | O_CREAT | O_BINARY, 0644); if (fd < 0) { erofs_err("failed to open(%s).", dev); @@ -76,23 +80,46 @@ int dev_open(const char *dev) switch (st.st_mode & S_IFMT) { case S_IFBLK: - ret = dev_get_blkdev_size(fd, &erofs_devsz); + ret = dev_get_blkdev_size(fd, &sbi->devsz); if (ret) { erofs_err("failed to get block device size(%s).", dev); close(fd); return ret; } - erofs_devsz = round_down(erofs_devsz, EROFS_BLKSIZ); + sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi)); break; case S_IFREG: - ret = ftruncate(fd, 0); - if (ret) { - erofs_err("failed to ftruncate(%s).", dev); - close(fd); - return -errno; + if (st.st_size) { +#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS) + struct statfs stfs; + + if (again) + return -ENOTEMPTY; + + /* + * fses like EXT4 and BTRFS will flush dirty blocks + * after truncate(0) even after the writeback happens + * (see kernel commit 7d8f9f7d150d and ccd2506bd431), + * which is NOT our intention. Let's work around this. + */ + if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 || + stfs.f_type == 0x9123683E)) { + close(fd); + unlink(dev); + again = true; + goto repeat; + } +#endif + ret = ftruncate(fd, 0); + if (ret) { + erofs_err("failed to ftruncate(%s).", dev); + close(fd); + return -errno; + } } /* INT64_MAX is the limit of kernel vfs */ - erofs_devsz = INT64_MAX; + sbi->devsz = INT64_MAX; + sbi->devblksz = st.st_blksize; break; default: erofs_err("bad file type (%s, %o).", dev, st.st_mode); @@ -100,23 +127,27 @@ int dev_open(const char *dev) return -EINVAL; } - erofs_devname = dev; - erofs_devfd = fd; + sbi->devname = strdup(dev); + if (!sbi->devname) { + close(fd); + return -ENOMEM; + } + sbi->devfd = fd; erofs_info("successfully to open %s", dev); return 0; } -void blob_closeall(void) +void blob_closeall(struct erofs_sb_info *sbi) { unsigned int i; - for (i = 0; i < erofs_nblobs; ++i) - close(erofs_blobfd[i]); - erofs_nblobs = 0; + for (i = 0; i < sbi->nblobs; ++i) + close(sbi->blobfd[i]); + sbi->nblobs = 0; } -int blob_open_ro(const char *dev) +int blob_open_ro(struct erofs_sb_info *sbi, const char *dev) { int fd = open(dev, O_RDONLY | O_BINARY); @@ -125,14 +156,14 @@ int blob_open_ro(const char *dev) return -errno; } - erofs_blobfd[erofs_nblobs] = fd; - erofs_info("successfully to open blob%u %s", erofs_nblobs, dev); - ++erofs_nblobs; + sbi->blobfd[sbi->nblobs] = fd; + erofs_info("successfully to open blob%u %s", sbi->nblobs, dev); + ++sbi->nblobs; return 0; } /* XXX: temporary soluation. Disk I/O implementation needs to be refactored. */ -int dev_open_ro(const char *dev) +int dev_open_ro(struct erofs_sb_info *sbi, const char *dev) { int fd = open(dev, O_RDONLY | O_BINARY); @@ -141,18 +172,17 @@ int dev_open_ro(const char *dev) return -errno; } - erofs_devfd = fd; - erofs_devname = dev; - erofs_devsz = INT64_MAX; + sbi->devname = strdup(dev); + if (!sbi->devname) { + close(fd); + return -ENOMEM; + } + sbi->devfd = fd; + sbi->devsz = INT64_MAX; return 0; } -u64 dev_length(void) -{ - return erofs_devsz; -} - -int dev_write(const void *buf, u64 offset, size_t len) +int dev_write(struct erofs_sb_info *sbi, const void *buf, u64 offset, size_t len) { int ret; @@ -164,60 +194,60 @@ int dev_write(const void *buf, u64 offset, size_t len) return -EINVAL; } - if (offset >= erofs_devsz || len > erofs_devsz || - offset > erofs_devsz - len) { + if (offset >= sbi->devsz || len > sbi->devsz || + offset > sbi->devsz - len) { erofs_err("Write posion[%" PRIu64 ", %zd] is too large beyond the end of device(%" PRIu64 ").", - offset, len, erofs_devsz); + offset, len, sbi->devsz); return -EINVAL; } #ifdef HAVE_PWRITE64 - ret = pwrite64(erofs_devfd, buf, len, (off64_t)offset); + ret = pwrite64(sbi->devfd, buf, len, (off64_t)offset); #else - ret = pwrite(erofs_devfd, buf, len, (off_t)offset); + ret = pwrite(sbi->devfd, buf, len, (off_t)offset); #endif if (ret != (int)len) { if (ret < 0) { erofs_err("Failed to write data into device - %s:[%" PRIu64 ", %zd].", - erofs_devname, offset, len); + sbi->devname, offset, len); return -errno; } erofs_err("Writing data into device - %s:[%" PRIu64 ", %zd] - was truncated.", - erofs_devname, offset, len); + sbi->devname, offset, len); return -ERANGE; } return 0; } -int dev_fillzero(u64 offset, size_t len, bool padding) +int dev_fillzero(struct erofs_sb_info *sbi, u64 offset, size_t len, bool padding) { - static const char zero[EROFS_BLKSIZ] = {0}; + static const char zero[EROFS_MAX_BLOCK_SIZE] = {0}; int ret; if (cfg.c_dry_run) return 0; #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) - if (!padding && fallocate(erofs_devfd, FALLOC_FL_PUNCH_HOLE | + if (!padding && fallocate(sbi->devfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len) >= 0) return 0; #endif - while (len > EROFS_BLKSIZ) { - ret = dev_write(zero, offset, EROFS_BLKSIZ); + while (len > erofs_blksiz(sbi)) { + ret = dev_write(sbi, zero, offset, erofs_blksiz(sbi)); if (ret) return ret; - len -= EROFS_BLKSIZ; - offset += EROFS_BLKSIZ; + len -= erofs_blksiz(sbi); + offset += erofs_blksiz(sbi); } - return dev_write(zero, offset, len); + return dev_write(sbi, zero, offset, len); } -int dev_fsync(void) +int dev_fsync(struct erofs_sb_info *sbi) { int ret; - ret = fsync(erofs_devfd); + ret = fsync(sbi->devfd); if (ret) { erofs_err("Could not fsync device!!!"); return -EIO; @@ -225,36 +255,37 @@ int dev_fsync(void) return 0; } -int dev_resize(unsigned int blocks) +int dev_resize(struct erofs_sb_info *sbi, unsigned int blocks) { int ret; struct stat st; u64 length; - if (cfg.c_dry_run || erofs_devsz != INT64_MAX) + if (cfg.c_dry_run || sbi->devsz != INT64_MAX) return 0; - ret = fstat(erofs_devfd, &st); + ret = fstat(sbi->devfd, &st); if (ret) { erofs_err("failed to fstat."); return -errno; } - length = (u64)blocks * EROFS_BLKSIZ; + length = (u64)blocks * erofs_blksiz(sbi); if (st.st_size == length) return 0; if (st.st_size > length) - return ftruncate(erofs_devfd, length); + return ftruncate(sbi->devfd, length); length = length - st.st_size; #if defined(HAVE_FALLOCATE) - if (fallocate(erofs_devfd, 0, st.st_size, length) >= 0) + if (fallocate(sbi->devfd, 0, st.st_size, length) >= 0) return 0; #endif - return dev_fillzero(st.st_size, length, true); + return dev_fillzero(sbi, st.st_size, length, true); } -int dev_read(int device_id, void *buf, u64 offset, size_t len) +int dev_read(struct erofs_sb_info *sbi, int device_id, + void *buf, u64 offset, size_t len) { int read_count, fd; @@ -269,13 +300,13 @@ int dev_read(int device_id, void *buf, u64 offset, size_t len) } if (!device_id) { - fd = erofs_devfd; + fd = sbi->devfd; } else { - if (device_id > erofs_nblobs) { + if (device_id > sbi->nblobs) { erofs_err("invalid device id %d", device_id); return -ENODEV; } - fd = erofs_blobfd[device_id - 1]; + fd = sbi->blobfd[device_id - 1]; } while (len > 0) { @@ -284,18 +315,18 @@ int dev_read(int device_id, void *buf, u64 offset, size_t len) #else read_count = pread(fd, buf, len, (off_t)offset); #endif - if (read_count == -1 || read_count == 0) { - if (errno) { + if (read_count < 1) { + if (!read_count) { + erofs_info("Reach EOF of device - %s:[%" PRIu64 ", %zd].", + sbi->devname, offset, len); + memset(buf, 0, len); + return 0; + } else if (errno != EINTR) { erofs_err("Failed to read data from device - %s:[%" PRIu64 ", %zd].", - erofs_devname, offset, len); + sbi->devname, offset, len); return -errno; - } else { - erofs_err("Reach EOF of device - %s:[%" PRIu64 ", %zd].", - erofs_devname, offset, len); - return -EINVAL; } } - offset += read_count; len -= read_count; buf += read_count; @@ -387,7 +418,7 @@ ssize_t erofs_copy_file_range(int fd_in, erofs_off_t *off_in, length, 0); if (ret >= 0) goto out; - if (errno != ENOSYS) { + if (errno != ENOSYS && errno != EXDEV) { ret = -errno; out: *off_in = off64_in; diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c new file mode 100644 index 0000000..8667954 --- /dev/null +++ b/lib/kite_deflate.c @@ -0,0 +1,1271 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * erofs-utils/lib/kite_deflate.c + * + * Copyright (C) 2023, Alibaba Cloud + * Copyright (C) 2023, Gao Xiang <xiang@kernel.org> + */ +#include "erofs/defs.h" +#include "erofs/print.h" +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> + +unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, + unsigned long sz); + +#ifdef TEST +#define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__) +#else +#define kite_dbg(x, ...) +#endif + +#define kHistorySize32 (1U << 15) + +#define kNumLenSymbols32 256 +#define kNumLenSymbolsMax kNumLenSymbols32 + +#define kSymbolEndOfBlock 256 +#define kSymbolMatch (kSymbolEndOfBlock + 1) +#define kNumLenSlots 29 +#define kMainTableSize (kSymbolMatch + kNumLenSlots) + +#define kFixedLenTableSize (kSymbolMatch + 31) +#define FixedDistTableSize 32 + +#define kMainTableSize (kSymbolMatch + kNumLenSlots) +#define kDistTableSize32 30 + +#define kNumLitLenCodesMin 257 +#define kNumDistCodesMin 1 + +#define kNumLensCodesMin 4 +#define kLensTableSize 19 + +#define kMatchMinLen 3 +#define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1 + +#define kTableDirectLevels 16 +#define kBitLensRepNumber_3_6 kTableDirectLevels +#define kBitLens0Number_3_10 (kBitLensRepNumber_3_6 + 1) +#define kBitLens0Number_11_138 (kBitLens0Number_3_10 + 1) + +static u32 kstaticHuff_mainCodes[kFixedLenTableSize]; +static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = { + [0 ... 143] = 8, [144 ... 255] = 9, + [256 ... 279] = 7, [280 ... 287] = 8, +}; +static u32 kstaticHuff_distCodes[kFixedLenTableSize]; + +const u8 kLenStart32[kNumLenSlots] = + {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255}; + +const u8 kLenExtraBits32[kNumLenSlots] = + {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0}; + +/* First normalized distance for each code (0 = distance of 1) */ +const u32 kDistStart[kDistTableSize32] = + {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768, + 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576}; + +/* extra bits for each distance code */ +const u8 kDistExtraBits[kDistTableSize32] = + {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +const u8 kCodeLengthAlphabetOrder[kLensTableSize] = + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +const u8 kLevelExtraBits[3] = {2, 3, 7}; + +#define kStored 0 +#define kFixedHuffman 1 +#define kDynamicHuffman 2 + +struct kite_deflate_symbol { + u16 len, dist; +}; + +struct kite_deflate_table { + u32 mainCodes[kMainTableSize]; + u8 litLenLevels[kMainTableSize]; + u32 distCodes[kDistTableSize32]; + u8 distLevels[kDistTableSize32]; + u32 levelCodes[kLensTableSize]; + u8 levelLens[kLensTableSize]; + + u8 numdistlens, numblcodes; + u16 numlitlens; +}; + +struct kite_deflate { + struct kite_deflate_table *tab; + const u8 *in; + u8 *out; + + u32 inlen, outlen; + u32 pos_in, pos_out; + u32 inflightbits; + u8 bitpos; + u8 numHuffBits; + u32 symbols; + + u32 costbits, startpos; + u8 encode_mode; + bool freq_changed, lastblock; + + /* Previous match for lazy matching */ + bool prev_valid; + u16 prev_longest; + + u32 mainFreqs[kMainTableSize]; + u32 distFreqs[kDistTableSize32]; + struct kite_deflate_table tables[2]; + + /* don't reset the following fields */ + struct kite_matchfinder *mf; + struct kite_deflate_symbol *sym; + u32 max_symbols; + bool lazy_search; +}; + +#define ZLIB_DISTANCE_TOO_FAR 4096 + +static u8 g_LenSlots[kNumLenSymbolsMax]; + +#define kNumLogBits 9 // do not change it +static u8 g_FastPos[1 << kNumLogBits]; + +static void writebits(struct kite_deflate *s, unsigned int v, u8 bits) +{ + unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos; + + s->inflightbits |= (v << s->bitpos) & (!rem - 1); + if (bits > rem) { + u8 *out = s->out + s->pos_out; + + out[0] = s->inflightbits & 0xff; + out[1] = (s->inflightbits >> 8) & 0xff; + out[2] = (s->inflightbits >> 16) & 0xff; + out[3] = (s->inflightbits >> 24) & 0xff; + s->pos_out += 4; + DBG_BUGON(s->pos_out > s->outlen); + s->inflightbits = v >> rem; + s->bitpos = bits - rem; + return; + } + s->bitpos += bits; +} + +static void flushbits(struct kite_deflate *s) +{ + u8 *out = s->out + s->pos_out; + + if (!s->bitpos) + return; + out[0] = s->inflightbits & 0xff; + if (s->bitpos >= 8) { + out[1] = (s->inflightbits >> 8) & 0xff; + if (s->bitpos >= 16) { + out[2] = (s->inflightbits >> 16) & 0xff; + if (s->bitpos >= 24) + out[3] = (s->inflightbits >> 24) & 0xff; + } + } + s->pos_out += round_up(s->bitpos, 8) >> 3; + DBG_BUGON(s->pos_out > s->outlen); + s->bitpos = 0; + s->inflightbits = 0; +} + +#define kMaxLen 16 + +static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes, + const u32 *bl_count) +{ + u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */ + unsigned int code = 0; /* running code value */ + unsigned int bits, k; + + for (bits = 1; bits <= kMaxLen; ++bits) { + code = (code + bl_count[bits - 1]) << 1; + nextCodes[bits] = code; + } + + DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen); + + for (k = 0; k < nr_codes; ++k) + p[k] = nextCodes[lens[k]]++; +} + +static u32 deflate_reversebits_one(u32 code, u8 bits) +{ + unsigned int x = code; + + x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1); + x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2); + x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4); + + return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits); +} + +static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n) +{ + while (n) { + u32 code = *codes; + + *codes++ = deflate_reversebits_one(code, *lens++); + --n; + } +} + +static void kite_deflate_init_once(void) +{ + static const u32 static_bl_count[kMaxLen + 1] = { + [7] = 279 - 256 + 1, + [8] = (143 + 1) + (287 - 280 + 1), + [9] = 255 - 144 + 1, + }; + unsigned int i, c, j, k; + + if (kstaticHuff_distCodes[31]) + return; + deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes, + kFixedLenTableSize, static_bl_count); + Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels, + kFixedLenTableSize); + + for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i) + kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5); + + for (i = 0; i < kNumLenSlots; i++) { + c = kLenStart32[i]; + j = 1 << kLenExtraBits32[i]; + + for (k = 0; k < j; k++, c++) + g_LenSlots[c] = (u8)i; + } + + c = 0; + for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) { + k = 1 << kDistExtraBits[i]; + for (j = 0; j < k; j++) + g_FastPos[c++] = i; + } +} + +static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = lens[0]; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (!nextlen) + max_count = 138, min_count = 3; + + for (n = 0; n < numlens; n++) { + curlen = nextlen; + nextlen = n + 1 < numlens ? lens[n + 1] : -1; + ++count; + + if (count < max_count && curlen == nextlen) + continue; + if (count < min_count) { + freqs[curlen] += count; + } else if (curlen != 0) { + if (curlen != prevlen) + freqs[curlen]++; + freqs[kBitLensRepNumber_3_6]++; + } else if (count <= 10) { + freqs[kBitLens0Number_3_10]++; + } else { + freqs[kBitLens0Number_11_138]++; + } + + count = 0; + prevlen = curlen; + if (!nextlen) + max_count = 138, min_count = 3; + else if (curlen == nextlen) + max_count = 6, min_count = 3; + else + max_count = 7, min_count = 4; + } +} + +static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens, + unsigned int numlens) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = lens[0]; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + const u8 *bl_lens = s->tab->levelLens; + const u32 *bl_codes = s->tab->levelCodes; + + if (!nextlen) + max_count = 138, min_count = 3; + + for (n = 0; n < numlens; n++) { + curlen = nextlen; + nextlen = n + 1 < numlens ? lens[n + 1] : -1; + ++count; + + if (count < max_count && curlen == nextlen) + continue; + if (count < min_count) { + do { + writebits(s, bl_codes[curlen], bl_lens[curlen]); + } while (--count); + } else if (curlen) { + if (curlen != prevlen) { + writebits(s, bl_codes[curlen], bl_lens[curlen]); + count--; + } + writebits(s, bl_codes[kBitLensRepNumber_3_6], + bl_lens[kBitLensRepNumber_3_6]); + writebits(s, count - 3, 2); + } else if (count <= 10) { + writebits(s, bl_codes[kBitLens0Number_3_10], + bl_lens[kBitLens0Number_3_10]); + writebits(s, count - 3, 3); + } else { + writebits(s, bl_codes[kBitLens0Number_11_138], + bl_lens[kBitLens0Number_11_138]); + writebits(s, count - 11, 7); + } + + count = 0; + prevlen = curlen; + if (!nextlen) + max_count = 138, min_count = 3; + else if (curlen == nextlen) + max_count = 6, min_count = 3; + else + max_count = 7, min_count = 4; + } +} + +static void kite_deflate_setfixedtrees(struct kite_deflate *s) +{ + writebits(s, (kFixedHuffman << 1) + s->lastblock, 3); +} + +static void kite_deflate_sendtrees(struct kite_deflate *s) +{ + struct kite_deflate_table *t = s->tab; + unsigned int i; + + writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3); + writebits(s, t->numlitlens - kNumLitLenCodesMin, 5); + writebits(s, t->numdistlens - kNumDistCodesMin, 5); + writebits(s, t->numblcodes - kNumLensCodesMin, 4); + + for (i = 0; i < t->numblcodes; i++) + writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3); + + Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize); + kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens); + kite_deflate_sendtree(s, t->distLevels, t->numdistlens); +} + +static inline unsigned int deflateDistSlot(unsigned int pos) +{ + const unsigned int zz = (kNumLogBits - 1) & + ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3)); + + return g_FastPos[pos >> zz] + (zz * 2); +} + +static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed) +{ + int i; + u32 *mainCodes, *distCodes; + const u8 *litLenLevels, *distLevels; + + if (!fixed) { + struct kite_deflate_table *t = s->tab; + + mainCodes = t->mainCodes; distCodes = t->distCodes; + litLenLevels = t->litLenLevels; distLevels = t->distLevels; + + Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize); + Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32); + } else { + mainCodes = kstaticHuff_mainCodes; + distCodes = kstaticHuff_distCodes; + + litLenLevels = kstaticHuff_litLenLevels; + distLevels = NULL; + } + + for (i = 0; i < s->symbols; ++i) { + struct kite_deflate_symbol *sym = &s->sym[i]; + + if (sym->len < kMatchMinLen) { /* literal */ + writebits(s, mainCodes[sym->dist], + litLenLevels[sym->dist]); + } else { + unsigned int lenSlot, distSlot; + unsigned int lc = sym->len - kMatchMinLen; + + lenSlot = g_LenSlots[lc]; + writebits(s, mainCodes[kSymbolMatch + lenSlot], + litLenLevels[kSymbolMatch + lenSlot]); + writebits(s, lc - kLenStart32[lenSlot], + kLenExtraBits32[lenSlot]); + + distSlot = deflateDistSlot(sym->dist - 1); + writebits(s, distCodes[distSlot], + fixed ? 5 : distLevels[distSlot]); + writebits(s, sym->dist - 1 - kDistStart[distSlot], + kDistExtraBits[distSlot]); + } + } + writebits(s, mainCodes[kSymbolEndOfBlock], + litLenLevels[kSymbolEndOfBlock]); +} + +static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num) +{ + u32 price = 0; + + while (num) { + price += (*lens++) * (*freqs++); + --num; + } + return price; +} + +static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num, + const u8 *extraBits, u32 extraBase) +{ + return Huffman_GetPrice(freqs, lens, num) + + Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase); +} + +/* Adapted from C/HuffEnc.c (7zip) for now */ +#define HeapSortDown(p, k, size, temp) \ + { for (;;) { \ + size_t s = (k << 1); \ + if (s > size) break; \ + if (s < size && p[s + 1] > p[s]) s++; \ + if (temp >= p[s]) break; \ + p[k] = p[s]; k = s; \ + } p[k] = temp; } + +static void HeapSort(u32 *p, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + u32 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp) + } + while (--i != 0); + } + /* + do + { + size_t k = 1; + UInt32 temp = p[size]; + p[size--] = p[1]; + HeapSortDown(p, k, size, temp) + } + while (size > 1); + */ + while (size > 3) + { + u32 temp = p[size]; + size_t k = (p[3] > p[2]) ? 3 : 2; + p[size--] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp) + } + { + u32 temp = p[size]; + p[size] = p[1]; + if (size > 2 && p[2] < temp) + { + p[1] = p[2]; + p[2] = temp; + } + else + p[1] = temp; + } +} + +#define NUM_BITS 10 +#define MASK (((unsigned)1 << NUM_BITS) - 1) + +static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens, + unsigned int numSymbols, unsigned int maxLen) +{ + u32 num, i; + + num = 0; + /* if (maxLen > 10) maxLen = 10; */ + + for (i = 0; i < numSymbols; i++) { + u32 freq = freqs[i]; + + if (!freq) + lens[i] = 0; + else + p[num++] = i | (freq << NUM_BITS); + } + HeapSort(p, num); + + if (num < 2) { + unsigned int minCode = 0, maxCode = 1; + + if (num == 1) { + maxCode = (unsigned int)p[0] & MASK; + if (!maxCode) + maxCode++; + } + p[minCode] = 0; + p[maxCode] = 1; + lens[minCode] = lens[maxCode] = 1; + return; + } + + { + u32 b, e, i; + + i = b = e = 0; + do { + u32 n, m, freq; + + n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq = (p[n] & ~MASK); + p[n] = (p[n] & MASK) | (e << NUM_BITS); + m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq += (p[m] & ~MASK); + p[m] = (p[m] & MASK) | (e << NUM_BITS); + p[e] = (p[e] & MASK) | freq; + e++; + } while (num - e > 1); + + { + u32 lenCounters[kMaxLen + 1]; + + for (i = 0; i <= kMaxLen; i++) + lenCounters[i] = 0; + + p[--e] &= MASK; + lenCounters[1] = 2; + while (e > 0) { + u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; + + p[e] = (p[e] & MASK) | (len << NUM_BITS); + if (len >= maxLen) + for (len = maxLen - 1; lenCounters[len] == 0; len--); + lenCounters[len]--; + lenCounters[(size_t)len + 1] += 2; + } + + { + u32 len; + + i = 0; + for (len = maxLen; len != 0; len--) { + u32 k; + for (k = lenCounters[len]; k != 0; k--) + lens[p[i++] & MASK] = (u8)len; + } + } + deflate_genhuffcodes(lens, p, numSymbols, lenCounters); + } + } +} + +static void kite_deflate_fixdynblock(struct kite_deflate *s) +{ + struct kite_deflate_table *t = s->tab; + unsigned int numlitlens, numdistlens, numblcodes; + u32 levelFreqs[kLensTableSize] = {0}; + u32 opt_mainlen; + + if (!s->freq_changed) + return; + + /* in order to match zlib */ + s->numHuffBits = kMaxLen; +// s->numHuffBits = (s->symbols > 18000 ? 12 : +// (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9))); + + Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels, + kMainTableSize, s->numHuffBits); + Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels, + kDistTableSize32, s->numHuffBits); + + /* code lengths for the literal/length alphabet */ + numlitlens = kMainTableSize; + while (numlitlens > kNumLitLenCodesMin && + !t->litLenLevels[numlitlens - 1]) + --numlitlens; + + /* code lengths for the distance alphabet */ + numdistlens = kDistTableSize32; + while (numdistlens > kNumDistCodesMin && + !t->distLevels[numdistlens - 1]) + --numdistlens; + + kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs); + kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs); + Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens, + kLensTableSize, 7); + numblcodes = kLensTableSize; + while (numblcodes > kNumLensCodesMin && + !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]]) + --numblcodes; + + t->numlitlens = numlitlens; + t->numdistlens = numdistlens; + t->numblcodes = numblcodes; + + opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels, + kMainTableSize, kLenExtraBits32, kSymbolMatch) + + Huffman_GetPriceEx(s->distFreqs, t->distLevels, + kDistTableSize32, kDistExtraBits, 0); + s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes + + Huffman_GetPriceEx(levelFreqs, t->levelLens, + kLensTableSize, kLevelExtraBits, kTableDirectLevels) + + opt_mainlen; + s->freq_changed = false; +} + + +/* + * an array used used by the LZ-based encoder to hold the length-distance pairs + * found by LZ matchfinder. + */ +struct kite_match { + unsigned int len; + unsigned int dist; +}; + +struct kite_matchfinder { + /* pointer to buffer with data to be compressed */ + const u8 *buffer; + + /* indicate the first byte that doesn't contain valid input data */ + const u8 *end; + + /* LZ matchfinder hash chain representation */ + u32 *hash, *chain; + + u32 base; + + /* indicate the next byte to run through the match finder */ + u32 offset; + + u32 cyclic_pos; + + /* maximum length of a match that the matchfinder will try to find. */ + u16 nice_len; + + /* the total sliding window size */ + u16 wsiz; + + /* how many rounds a matchfinder searches on a hash chain for */ + u16 depth; + + /* do not perform lazy search no less than this match length */ + u16 max_lazy; + + /* reduce lazy search no less than this match length */ + u8 good_len; + + /* current match for lazy matching */ + struct kite_match *matches; + struct kite_match matches_matrix[2][4]; +}; + +/* + * This mysterious table is just the CRC of each possible byte. It can be + * computed using the standard bit-at-a-time methods. The polynomial can + * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. + * Add the implicit x^16, and you have the standard CRC-CCITT. + */ +u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = { + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; + +int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen) +{ + const u8 *cur = mf->buffer + mf->offset; + const u8 *qbase = mf->buffer - mf->base; + u32 curMatch; + unsigned int v, hv, i, k, p, wsiz; + + if (mf->end - cur < bestlen + 1) + return 0; + + v = get_unaligned((u16 *)cur); + hv = v ^ crc_ccitt_table[cur[2]]; + curMatch = mf->hash[hv]; + p = mf->base + mf->offset; + mf->hash[hv] = p; + mf->chain[mf->cyclic_pos] = curMatch; + wsiz = mf->wsiz; + k = 1; + + if (depth) { + unsigned int wpos = wsiz + mf->cyclic_pos; + + hv = min_t(unsigned int, mf->nice_len, mf->end - cur); + DBG_BUGON(hv > kMatchMaxLen32); + do { + unsigned int diff = p - curMatch; + const u8 *q; + + if (diff >= wsiz) + break; + + q = qbase + curMatch; + curMatch = mf->chain[(wpos - diff) & (wsiz - 1)]; + if (v == get_unaligned((u16 *)q) && (bestlen < 3 || ( + get_unaligned((u16 *)(cur + bestlen - 1)) == + get_unaligned((u16 *)(q + bestlen - 1)) && + !memcmp(cur + 3, q + 3, bestlen - 3)))) { + DBG_BUGON(cur[2] != q[2]); + i = erofs_memcmp2(cur + bestlen + 1, + q + bestlen + 1, hv - bestlen - 1); + bestlen += 1 + i; + + k -= (k >= ARRAY_SIZE(mf->matches_matrix[0])); + mf->matches[k++] = (struct kite_match) { + .len = bestlen, + .dist = diff, + }; + if (bestlen >= hv) + break; + } + } while (--depth); + } + mf->offset++; + mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1); + return k - 1; +} + +/* let's align with zlib */ +static const struct kite_matchfinder_cfg { + u16 good_length; /* reduce lazy search above this match length */ + u16 max_lazy; /* do not perform lazy search above this match length */ + u16 nice_length; /* quit search above this match length */ + u16 depth; + bool lazy_search; +} kite_mfcfg[10] = { +/* good lazy nice depth */ +/* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */ +/* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, false}, +/* 3 */ {4, 6, 32, 32, false}, + +/* 4 */ {4, 4, 16, 16, true}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, true}, +/* 6 */ {8, 16, 128, 128, true}, +/* 7 */ {8, 32, 128, 256, true}, +/* 8 */ {32, 128, 258, 1024, true}, +/* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */ +}; + +static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level) +{ + const struct kite_matchfinder_cfg *cfg; + + if (!level || level >= ARRAY_SIZE(kite_mfcfg)) + return -EINVAL; + cfg = &kite_mfcfg[level]; + + if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz) + return -EINVAL; + + mf->hash = calloc(0x10000, sizeof(mf->hash[0])); + if (!mf->hash) + return -ENOMEM; + + mf->chain = malloc(sizeof(mf->chain[0]) * wsiz); + if (!mf->chain) { + free(mf->hash); + mf->hash = NULL; + return -ENOMEM; + } + mf->wsiz = wsiz; + + mf->good_len = cfg->good_length; + mf->nice_len = cfg->nice_length; + mf->depth = cfg->depth; + mf->max_lazy = cfg->max_lazy; + return cfg->lazy_search; +} + +static void kite_mf_reset(struct kite_matchfinder *mf, + const void *buffer, const void *end) +{ + mf->buffer = buffer; + mf->end = end; + + /* + * Set the initial value as max_distance + 1. This would avoid hash + * zero initialization. + */ + mf->base += mf->offset + kHistorySize32 + 1; + + mf->offset = 0; + mf->cyclic_pos = 0; + + mf->matches = mf->matches_matrix[0]; + mf->matches_matrix[0][0].len = + mf->matches_matrix[1][0].len = kMatchMinLen - 1; +} + +static bool deflate_count_code(struct kite_deflate *s, bool literal, + unsigned int lenSlot, unsigned int distSlot) +{ + struct kite_deflate_table *t = s->tab; + unsigned int lenbase = (literal ? 0 : kSymbolMatch); + u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos; + bool recalc = false; + unsigned int bits; + + s->freq_changed = true; + ++s->mainFreqs[lenbase + lenSlot]; + if (!literal) + ++s->distFreqs[distSlot]; + + if (s->encode_mode == 1) { + if (literal) { + bits = kstaticHuff_litLenLevels[lenSlot]; + goto out; + } + bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] + + kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot]; + goto out; + } + + /* XXX: more ideas to be done later */ + recalc |= (!literal && !t->distLevels[distSlot]); + recalc |= !t->litLenLevels[lenbase + lenSlot]; + if (recalc) { + kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm', + lenSlot, distSlot); + s->tab = s->tables + (s->tab == s->tables); + kite_deflate_fixdynblock(s); + bits = 0; + goto out; + } + + if (literal) { + bits = t->litLenLevels[lenSlot]; + goto out; + } + + bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] + + t->litLenLevels[kSymbolMatch + lenSlot] + + kLenExtraBits32[lenSlot]; +out: + if (rem < s->costbits + bits) { + --s->mainFreqs[lenbase + lenSlot]; + if (!literal) + --s->distFreqs[distSlot]; + if (recalc) + s->tab = s->tables + (s->tab == s->tables); + return false; + } + s->costbits += bits; + return true; +} + +static bool kite_deflate_tally(struct kite_deflate *s, + struct kite_match *match) +{ + struct kite_deflate_symbol *sym = s->sym + s->symbols; + u32 fixedcost = ~0; + bool hassp; + + *sym = (struct kite_deflate_symbol) { + .len = match->len, + .dist = match->dist, + }; + +retry: + if (sym->len < kMatchMinLen) { + hassp = deflate_count_code(s, true, sym->dist, 0); + } else { + unsigned int lc = sym->len - kMatchMinLen; + unsigned int lenSlot = g_LenSlots[lc]; + unsigned int distSlot = deflateDistSlot(sym->dist - 1); + + hassp = deflate_count_code(s, false, lenSlot, distSlot); + } + + if (!hassp) { + if (s->encode_mode == 1) { + fixedcost = s->costbits; + s->encode_mode = 2; + goto retry; + } + s->lastblock = true; + if (fixedcost <= s->costbits) + s->encode_mode = 1; + return true; + } + ++s->symbols; + return false; +} + +static void kite_deflate_writestore(struct kite_deflate *s) +{ + bool fb = !s->startpos && !s->bitpos; + unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos; + + do { + unsigned int len = min_t(unsigned int, totalsiz, 65535); + + totalsiz -= len; + writebits(s, (fb << 3) | (kStored << 1) | + (s->lastblock && !totalsiz), 3 + fb); + flushbits(s); + writebits(s, len, 16); + writebits(s, len ^ 0xffff, 16); + flushbits(s); + memcpy(s->out + s->pos_out, s->in + s->startpos, len); + s->pos_out += len; + s->startpos += len; + } while (totalsiz); +} + +static void kite_deflate_endblock(struct kite_deflate *s) +{ + if (s->encode_mode == 1) { + u32 fixedcost = s->costbits; + unsigned int storelen, storeblocks, storecost; + + kite_deflate_fixdynblock(s); + if (fixedcost > s->costbits) + s->encode_mode = 2; + else + s->costbits = fixedcost; + + storelen = s->pos_in - s->prev_valid - s->startpos; + storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U); + storecost = (8 - s->bitpos) + storeblocks - 1 + + storeblocks * 32 + storelen * 8; + if (s->costbits > storecost) { + s->costbits = storecost; + s->encode_mode = 0; + } + } + + s->lastblock |= (s->costbits + s->bitpos >= + (s->outlen - s->pos_out) * 8); +} + +static void kite_deflate_startblock(struct kite_deflate *s) +{ + memset(s->mainFreqs, 0, sizeof(s->mainFreqs)); + memset(s->distFreqs, 0, sizeof(s->distFreqs)); + memset(s->tables, 0, sizeof(s->tables[0])); + s->symbols = 0; + s->mainFreqs[kSymbolEndOfBlock]++; + s->encode_mode = 1; + s->tab = s->tables; + s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock]; +} + +static bool kite_deflate_commitblock(struct kite_deflate *s) +{ + if (s->encode_mode == 1) { + kite_deflate_setfixedtrees(s); + kite_deflate_writeblock(s, true); + } else if (s->encode_mode == 2) { + kite_deflate_sendtrees(s); + kite_deflate_writeblock(s, false); + } else { + kite_deflate_writestore(s); + } + s->startpos = s->pos_in - s->prev_valid; + return s->lastblock; +} + +static bool kite_deflate_fast(struct kite_deflate *s) +{ + struct kite_matchfinder *mf = s->mf; + + kite_deflate_startblock(s); + while (1) { + int matches = kite_mf_getmatches_hc3(mf, mf->depth, + kMatchMinLen - 1); + + if (matches) { + unsigned int len = mf->matches[matches].len; + unsigned int dist = mf->matches[matches].dist; + + if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR) + goto nomatch; + + kite_dbg("%u matches found: longest [%u,%u] of distance %u", + matches, s->pos_in, s->pos_in + len - 1, dist); + + if (kite_deflate_tally(s, mf->matches + matches)) + break; + s->pos_in += len; + /* skip the rest bytes */ + while (--len) + (void)kite_mf_getmatches_hc3(mf, 0, 0); + } else { +nomatch: + mf->matches[0].dist = s->in[s->pos_in]; + if (isprint(s->in[s->pos_in])) + kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in); + else + kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in); + + if (kite_deflate_tally(s, mf->matches)) + break; + ++s->pos_in; + } + + s->lastblock |= (s->pos_in >= s->inlen); + if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) { + kite_deflate_endblock(s); + break; + } + } + return kite_deflate_commitblock(s); +} + +static bool kite_deflate_slow(struct kite_deflate *s) +{ + struct kite_matchfinder *mf = s->mf; + bool flush = false; + + kite_deflate_startblock(s); + while (1) { + struct kite_match *prev_matches = mf->matches; + unsigned int len = kMatchMinLen - 1; + int matches; + unsigned int len0; + + mf->matches = mf->matches_matrix[ + mf->matches == mf->matches_matrix[0]]; + mf->matches[0].dist = s->in[s->pos_in]; + + len0 = prev_matches[s->prev_longest].len; + if (len0 < mf->max_lazy) { + matches = kite_mf_getmatches_hc3(mf, mf->depth >> + (len0 >= mf->good_len), len0); + if (matches) { + len = mf->matches[matches].len; + if (len == kMatchMinLen && + mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) { + matches = 0; + len = kMatchMinLen - 1; + } + } + } else { + matches = 0; + (void)kite_mf_getmatches_hc3(mf, 0, 0); + } + + if (len < len0) { + if (kite_deflate_tally(s, + prev_matches + s->prev_longest)) + break; + + s->pos_in += --len0; + /* skip the rest bytes */ + while (--len0) + (void)kite_mf_getmatches_hc3(mf, 0, 0); + s->prev_valid = false; + s->prev_longest = 0; + } else { + if (!s->prev_valid) + s->prev_valid = true; + else if (kite_deflate_tally(s, prev_matches)) + break; + ++s->pos_in; + s->prev_longest = matches; + } + + s->lastblock |= (s->pos_in >= s->inlen); + if (s->pos_in >= s->inlen) { + flush = true; + break; + } + if (s->symbols >= s->max_symbols) { + kite_deflate_endblock(s); + break; + } + } + + if (flush && s->prev_valid) { + (void)kite_deflate_tally(s, mf->matches + s->prev_longest); + s->prev_valid = false; + } + return kite_deflate_commitblock(s); +} + +void kite_deflate_end(struct kite_deflate *s) +{ + if (s->mf) { + if (s->mf->hash) + free(s->mf->hash); + if (s->mf->chain) + free(s->mf->chain); + free(s->mf); + } + if (s->sym) + free(s->sym); + free(s); +} + +struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size) +{ + struct kite_deflate *s; + int err; + + kite_deflate_init_once(); + s = calloc(1, sizeof(*s)); + if (!s) + return ERR_PTR(-ENOMEM); + + s->max_symbols = 16384; + s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols); + if (!s->sym) { + err = -ENOMEM; + goto err_out; + } + + s->mf = malloc(sizeof(*s->mf)); + if (!s->mf) { + err = -ENOMEM; + goto err_out; + } + + if (!dict_size) + dict_size = kHistorySize32; + + err = kite_mf_init(s->mf, dict_size, level); + if (err < 0) + goto err_out; + + s->lazy_search = err; + return s; +err_out: + if (s->mf) + free(s->mf); + if (s->sym) + free(s->sym); + free(s); + return ERR_PTR(err); +} + +int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out, + unsigned int *srcsize, unsigned int target_dstsize) +{ + memset(s, 0, offsetof(struct kite_deflate, mainFreqs)); + s->in = in; + s->inlen = *srcsize; + s->out = out; + s->outlen = target_dstsize; + kite_mf_reset(s->mf, in, in + s->inlen); + + if (s->lazy_search) + while (!kite_deflate_slow(s)); + else + while (!kite_deflate_fast(s)); + flushbits(s); + + *srcsize = s->startpos; + return s->pos_out; +} + +#if TEST +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> + +int main(int argc, char *argv[]) +{ + int fd; + u64 filelength; + u8 out[1048576], *buf; + int dstsize = 4096; + unsigned int srcsize, outsize; + struct kite_deflate *s; + + fd = open(argv[1], O_RDONLY); + if (fd < 0) + return -errno; + if (argc > 2) + dstsize = atoi(argv[2]); + filelength = lseek(fd, 0, SEEK_END); + + s = kite_deflate_init(9, 0); + if (IS_ERR(s)) + return PTR_ERR(s); + + filelength = lseek(fd, 0, SEEK_END); + buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) + return -errno; + close(fd); + + srcsize = filelength; + outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize); + fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644); + write(fd, out, outsize); + close(fd); + kite_deflate_end(s); + return 0; +} +#endif diff --git a/lib/liberofs_uuid.h b/lib/liberofs_uuid.h new file mode 100644 index 0000000..63b358a --- /dev/null +++ b/lib/liberofs_uuid.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ +#ifndef __EROFS_LIB_UUID_H +#define __EROFS_LIB_UUID_H + +void erofs_uuid_generate(unsigned char *out); +void erofs_uuid_unparse_lower(const unsigned char *buf, char *out); +int erofs_uuid_parse(const char *in, unsigned char *uu); + +#endif diff --git a/lib/namei.c b/lib/namei.c index 7b69a59..294d7a3 100644 --- a/lib/namei.c +++ b/lib/namei.c @@ -26,11 +26,15 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) { int ret, ifmt; char buf[sizeof(struct erofs_inode_extended)]; + struct erofs_sb_info *sbi = vi->sbi; struct erofs_inode_compact *dic; struct erofs_inode_extended *die; - const erofs_off_t inode_loc = iloc(vi->nid); + erofs_off_t inode_loc; - ret = dev_read(0, buf, inode_loc, sizeof(*dic)); + DBG_BUGON(!sbi); + inode_loc = erofs_iloc(vi); + + ret = dev_read(sbi, 0, buf, inode_loc, sizeof(*dic)); if (ret < 0) return -EIO; @@ -47,7 +51,8 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) case EROFS_INODE_LAYOUT_EXTENDED: vi->inode_isize = sizeof(struct erofs_inode_extended); - ret = dev_read(0, buf + sizeof(*dic), inode_loc + sizeof(*dic), + ret = dev_read(sbi, 0, buf + sizeof(*dic), + inode_loc + sizeof(*dic), sizeof(*die) - sizeof(*dic)); if (ret < 0) return -EIO; @@ -55,6 +60,7 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) die = (struct erofs_inode_extended *)buf; vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); vi->i_mode = le16_to_cpu(die->i_mode); + vi->i_ino[0] = le32_to_cpu(die->i_ino); switch (vi->i_mode & S_IFMT) { case S_IFREG: @@ -90,6 +96,7 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) vi->inode_isize = sizeof(struct erofs_inode_compact); vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); vi->i_mode = le16_to_cpu(dic->i_mode); + vi->i_ino[0] = le32_to_cpu(dic->i_ino); switch (vi->i_mode & S_IFMT) { case S_IFREG: @@ -114,8 +121,8 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) vi->i_gid = le16_to_cpu(dic->i_gid); vi->i_nlink = le16_to_cpu(dic->i_nlink); - vi->i_mtime = sbi.build_time; - vi->i_mtime_nsec = sbi.build_time_nsec; + vi->i_mtime = sbi->build_time; + vi->i_mtime_nsec = sbi->build_time_nsec; vi->i_size = le32_to_cpu(dic->i_size); if (vi->datalayout == EROFS_INODE_CHUNK_BASED) @@ -134,10 +141,11 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) vi->u.chunkformat, vi->nid | 0ULL); return -EOPNOTSUPP; } - vi->u.chunkbits = LOG_BLOCK_SIZE + + vi->u.chunkbits = sbi->blkszbits + (vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); - } else if (erofs_inode_is_data_compressed(vi->datalayout)) + } else if (erofs_inode_is_data_compressed(vi->datalayout)) { return z_erofs_fill_inode(vi); + } return 0; bogusimode: erofs_err("bogus i_mode (%o) @ nid %llu", vi->i_mode, vi->nid | 0ULL); @@ -182,17 +190,18 @@ struct erofs_dirent *find_target_dirent(erofs_nid_t pnid, } struct nameidata { + struct erofs_sb_info *sbi; erofs_nid_t nid; unsigned int ftype; }; -int erofs_namei(struct nameidata *nd, - const char *name, unsigned int len) +int erofs_namei(struct nameidata *nd, const char *name, unsigned int len) { erofs_nid_t nid = nd->nid; int ret; - char buf[EROFS_BLKSIZ]; - struct erofs_inode vi = { .nid = nid }; + char buf[EROFS_MAX_BLOCK_SIZE]; + struct erofs_sb_info *sbi = nd->sbi; + struct erofs_inode vi = { .sbi = sbi, .nid = nid }; erofs_off_t offset; ret = erofs_read_inode_from_disk(&vi); @@ -202,7 +211,7 @@ int erofs_namei(struct nameidata *nd, offset = 0; while (offset < vi.i_size) { erofs_off_t maxsize = min_t(erofs_off_t, - vi.i_size - offset, EROFS_BLKSIZ); + vi.i_size - offset, erofs_blksiz(sbi)); struct erofs_dirent *de = (void *)buf; unsigned int nameoff; @@ -212,7 +221,7 @@ int erofs_namei(struct nameidata *nd, nameoff = le16_to_cpu(de->nameoff); if (nameoff < sizeof(struct erofs_dirent) || - nameoff >= EROFS_BLKSIZ) { + nameoff >= erofs_blksiz(sbi)) { erofs_err("invalid de[0].nameoff %u @ nid %llu", nameoff, nid | 0ULL); return -EFSCORRUPTED; @@ -234,7 +243,7 @@ int erofs_namei(struct nameidata *nd, static int link_path_walk(const char *name, struct nameidata *nd) { - nd->nid = sbi.root_nid; + nd->nid = nd->sbi->root_nid; while (*name == '/') name++; @@ -253,7 +262,6 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (ret) return ret; - name = p; /* Skip until no more slashes. */ for (name = p; *name == '/'; ++name) ; @@ -264,7 +272,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) int erofs_ilookup(const char *path, struct erofs_inode *vi) { int ret; - struct nameidata nd; + struct nameidata nd = { .sbi = vi->sbi }; ret = link_path_walk(path, &nd); if (ret) diff --git a/lib/rb_tree.h b/lib/rb_tree.h index 5b35c74..67ec0a7 100644 --- a/lib/rb_tree.h +++ b/lib/rb_tree.h @@ -93,7 +93,7 @@ int rb_tree_remove_with_cb (struct rb_tree *self, void *value, rb_t int rb_tree_test (struct rb_tree *self, struct rb_node *root); struct rb_iter *rb_iter_alloc (); -struct rb_iter *rb_iter_init (); +struct rb_iter *rb_iter_init (struct rb_iter *self); struct rb_iter *rb_iter_create (); void rb_iter_dealloc (struct rb_iter *self); void *rb_iter_first (struct rb_iter *self, struct rb_tree *tree); diff --git a/lib/rebuild.c b/lib/rebuild.c new file mode 100644 index 0000000..5993730 --- /dev/null +++ b/lib/rebuild.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#define _GNU_SOURCE +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <config.h> +#if defined(HAVE_SYS_SYSMACROS_H) +#include <sys/sysmacros.h> +#endif +#include "erofs/print.h" +#include "erofs/inode.h" +#include "erofs/rebuild.h" +#include "erofs/io.h" +#include "erofs/dir.h" +#include "erofs/xattr.h" +#include "erofs/blobchunk.h" +#include "erofs/internal.h" + +#ifdef HAVE_LINUX_AUFS_TYPE_H +#include <linux/aufs_type.h> +#else +#define AUFS_WH_PFX ".wh." +#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" +#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME +#endif + +static struct erofs_dentry *erofs_rebuild_mkdir(struct erofs_inode *dir, + const char *s) +{ + struct erofs_inode *inode; + struct erofs_dentry *d; + + inode = erofs_new_inode(); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + inode->i_mode = S_IFDIR | 0755; + inode->i_parent = dir; + inode->i_uid = getuid(); + inode->i_gid = getgid(); + inode->i_mtime = inode->sbi->build_time; + inode->i_mtime_nsec = inode->sbi->build_time_nsec; + erofs_init_empty_dir(inode); + + d = erofs_d_alloc(dir, s); + if (!IS_ERR(d)) { + d->type = EROFS_FT_DIR; + d->inode = inode; + } + return d; +} + +struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, + char *path, bool aufs, bool *whout, bool *opq, bool to_head) +{ + struct erofs_dentry *d = NULL; + unsigned int len = strlen(path); + char *s = path; + + *whout = false; + *opq = false; + + while (s < path + len) { + char *slash = memchr(s, '/', path + len - s); + + if (slash) { + if (s == slash) { + while (*++s == '/'); /* skip '//...' */ + continue; + } + *slash = '\0'; + } + + if (!memcmp(s, ".", 2)) { + /* null */ + } else if (!memcmp(s, "..", 3)) { + pwd = pwd->i_parent; + } else { + struct erofs_inode *inode = NULL; + + if (aufs && !slash) { + if (!memcmp(s, AUFS_WH_DIROPQ, sizeof(AUFS_WH_DIROPQ))) { + *opq = true; + break; + } + if (!memcmp(s, AUFS_WH_PFX, sizeof(AUFS_WH_PFX) - 1)) { + s += sizeof(AUFS_WH_PFX) - 1; + *whout = true; + } + } + + list_for_each_entry(d, &pwd->i_subdirs, d_child) { + if (!strcmp(d->name, s)) { + if (d->type != EROFS_FT_DIR && slash) + return ERR_PTR(-EIO); + inode = d->inode; + break; + } + } + + if (inode) { + if (to_head) { + list_del(&d->d_child); + list_add(&d->d_child, &pwd->i_subdirs); + } + pwd = inode; + } else if (!slash) { + d = erofs_d_alloc(pwd, s); + if (IS_ERR(d)) + return d; + d->type = EROFS_FT_UNKNOWN; + d->inode = pwd; + } else { + d = erofs_rebuild_mkdir(pwd, s); + if (IS_ERR(d)) + return d; + pwd = d->inode; + } + } + if (slash) { + *slash = '/'; + s = slash + 1; + } else { + break; + } + } + return d; +} + +static int erofs_rebuild_fixup_inode_index(struct erofs_inode *inode) +{ + int ret; + unsigned int count, unit, chunkbits, i; + struct erofs_inode_chunk_index *idx; + erofs_off_t chunksize; + erofs_blk_t blkaddr; + + /* TODO: fill data map in other layouts */ + if (inode->datalayout != EROFS_INODE_CHUNK_BASED && + inode->datalayout != EROFS_INODE_FLAT_PLAIN) { + erofs_err("%s: unsupported datalayout %d", inode->i_srcpath, inode->datalayout); + return -EOPNOTSUPP; + } + + if (inode->sbi->extra_devices) { + chunkbits = inode->u.chunkbits; + if (chunkbits < sbi.blkszbits) { + erofs_err("%s: chunk size %u is too small to fit the target block size %u", + inode->i_srcpath, 1U << chunkbits, 1U << sbi.blkszbits); + return -EINVAL; + } + } else { + chunkbits = ilog2(inode->i_size - 1) + 1; + if (chunkbits < sbi.blkszbits) + chunkbits = sbi.blkszbits; + if (chunkbits - sbi.blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) + chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi.blkszbits; + } + chunksize = 1ULL << chunkbits; + count = DIV_ROUND_UP(inode->i_size, chunksize); + + unit = sizeof(struct erofs_inode_chunk_index); + inode->extent_isize = count * unit; + idx = malloc(max(sizeof(*idx), sizeof(void *))); + if (!idx) + return -ENOMEM; + inode->chunkindexes = idx; + + for (i = 0; i < count; i++) { + struct erofs_blobchunk *chunk; + struct erofs_map_blocks map = { + .index = UINT_MAX, + }; + + map.m_la = i << chunkbits; + ret = erofs_map_blocks(inode, &map, 0); + if (ret) + goto err; + + blkaddr = erofs_blknr(&sbi, map.m_pa); + chunk = erofs_get_unhashed_chunk(inode->dev, blkaddr, 0); + if (IS_ERR(chunk)) { + ret = PTR_ERR(chunk); + goto err; + } + *(void **)idx++ = chunk; + + } + inode->datalayout = EROFS_INODE_CHUNK_BASED; + inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES; + inode->u.chunkformat |= chunkbits - sbi.blkszbits; + return 0; +err: + free(inode->chunkindexes); + inode->chunkindexes = NULL; + return ret; +} + +static int erofs_rebuild_fill_inode(struct erofs_inode *inode) +{ + switch (inode->i_mode & S_IFMT) { + case S_IFCHR: + if (erofs_inode_is_whiteout(inode)) + inode->i_parent->whiteouts = true; + /* fallthrough */ + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + inode->i_size = 0; + erofs_dbg("\tdev: %d %d", major(inode->u.i_rdev), + minor(inode->u.i_rdev)); + inode->u.i_rdev = erofs_new_encode_dev(inode->u.i_rdev); + return 0; + case S_IFDIR: + return erofs_init_empty_dir(inode); + case S_IFLNK: { + int ret; + + inode->i_link = malloc(inode->i_size + 1); + if (!inode->i_link) + return -ENOMEM; + ret = erofs_pread(inode, inode->i_link, inode->i_size, 0); + erofs_dbg("\tsymlink: %s -> %s", inode->i_srcpath, inode->i_link); + return ret; + } + case S_IFREG: + if (inode->i_size) + return erofs_rebuild_fixup_inode_index(inode); + return 0; + default: + break; + } + return -EINVAL; +} + +/* + * @parent: parent directory in inode tree + * @ctx.dir: parent directory when itering erofs_iterate_dir() + */ +struct erofs_rebuild_dir_context { + struct erofs_dir_context ctx; + struct erofs_inode *parent; +}; + +static int erofs_rebuild_dirent_iter(struct erofs_dir_context *ctx) +{ + struct erofs_rebuild_dir_context *rctx = (void *)ctx; + struct erofs_inode *parent = rctx->parent; + struct erofs_inode *dir = ctx->dir; + struct erofs_inode *inode, *candidate; + struct erofs_inode src; + struct erofs_dentry *d; + char *path, *dname; + bool dumb; + int ret; + + if (ctx->dot_dotdot) + return 0; + + ret = asprintf(&path, "%s/%.*s", rctx->parent->i_srcpath, + ctx->de_namelen, ctx->dname); + if (ret < 0) + return ret; + + erofs_dbg("parsing %s", path); + dname = path + strlen(parent->i_srcpath) + 1; + + d = erofs_rebuild_get_dentry(parent, dname, false, + &dumb, &dumb, false); + if (IS_ERR(d)) { + ret = PTR_ERR(d); + goto out; + } + + ret = 0; + if (d->type != EROFS_FT_UNKNOWN) { + /* + * bail out if the file exists in the upper layers. (Note that + * extended attributes won't be merged too even for dirs.) + */ + if (!S_ISDIR(d->inode->i_mode) || d->inode->opaque) + goto out; + + /* merge directory entries */ + src = (struct erofs_inode) { + .sbi = dir->sbi, + .nid = ctx->de_nid + }; + ret = erofs_read_inode_from_disk(&src); + if (ret || !S_ISDIR(src.i_mode)) + goto out; + parent = d->inode; + inode = dir = &src; + } else { + u64 nid; + + DBG_BUGON(parent != d->inode); + inode = erofs_new_inode(); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + /* reuse i_ino[0] to read nid in source fs */ + nid = inode->i_ino[0]; + inode->sbi = dir->sbi; + inode->nid = ctx->de_nid; + ret = erofs_read_inode_from_disk(inode); + if (ret) + goto out; + + /* restore nid in new generated fs */ + inode->i_ino[1] = inode->i_ino[0]; + inode->i_ino[0] = nid; + inode->dev = inode->sbi->dev; + + if (S_ISREG(inode->i_mode) && inode->i_nlink > 1 && + (candidate = erofs_iget(inode->dev, ctx->de_nid))) { + /* hardlink file */ + erofs_iput(inode); + inode = candidate; + if (S_ISDIR(inode->i_mode)) { + erofs_err("hardlink directory not supported"); + ret = -EISDIR; + goto out; + } + inode->i_nlink++; + erofs_dbg("\thardlink: %s -> %s", path, inode->i_srcpath); + } else { + ret = erofs_read_xattrs_from_disk(inode); + if (ret) { + erofs_iput(inode); + goto out; + } + + inode->i_parent = d->inode; + inode->i_srcpath = path; + path = NULL; + inode->i_ino[1] = inode->nid; + inode->i_nlink = 1; + + ret = erofs_rebuild_fill_inode(inode); + if (ret) { + erofs_iput(inode); + goto out; + } + + erofs_insert_ihash(inode, inode->dev, inode->i_ino[1]); + parent = dir = inode; + } + + d->inode = inode; + d->type = erofs_mode_to_ftype(inode->i_mode); + } + + if (S_ISDIR(inode->i_mode)) { + struct erofs_rebuild_dir_context nctx = *rctx; + + nctx.parent = parent; + nctx.ctx.dir = dir; + ret = erofs_iterate_dir(&nctx.ctx, false); + if (ret) + goto out; + } + + /* reset sbi, nid after subdirs are all loaded for the final dump */ + inode->sbi = &sbi; + inode->nid = 0; +out: + free(path); + return ret; +} + +int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi) +{ + struct erofs_inode inode = {}; + struct erofs_rebuild_dir_context ctx; + int ret; + + if (!sbi->devname) { + erofs_err("failed to find a device for rebuilding"); + return -EINVAL; + } + + ret = erofs_read_superblock(sbi); + if (ret) { + erofs_err("failed to read superblock of %s", sbi->devname); + return ret; + } + + inode.nid = sbi->root_nid; + inode.sbi = sbi; + ret = erofs_read_inode_from_disk(&inode); + if (ret) { + erofs_err("failed to read root inode of %s", sbi->devname); + return ret; + } + inode.i_srcpath = strdup("/"); + + ctx = (struct erofs_rebuild_dir_context) { + .ctx.dir = &inode, + .ctx.cb = erofs_rebuild_dirent_iter, + .parent = root, + }; + ret = erofs_iterate_dir(&ctx.ctx, false); + free(inode.i_srcpath); + return ret; +} diff --git a/lib/super.c b/lib/super.c index 30aeb36..f952f7e 100644 --- a/lib/super.c +++ b/lib/super.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include "erofs/io.h" #include "erofs/print.h" +#include "erofs/xattr.h" static bool check_layout_compatibility(struct erofs_sb_info *sbi, struct erofs_super_block *dsb) @@ -31,12 +32,13 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, sbi->total_blocks = sbi->primarydevice_blocks; - if (!erofs_sb_has_device_table()) + if (!erofs_sb_has_device_table(sbi)) ondisk_extradevs = 0; else ondisk_extradevs = le16_to_cpu(dsb->extra_devices); - if (ondisk_extradevs != sbi->extra_devices) { + if (sbi->extra_devices && + ondisk_extradevs != sbi->extra_devices) { erofs_err("extra devices don't match (ondisk %u, given %u)", ondisk_extradevs, sbi->extra_devices); return -EINVAL; @@ -44,6 +46,7 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, if (!ondisk_extradevs) return 0; + sbi->extra_devices = ondisk_extradevs; sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; sbi->devs = calloc(ondisk_extradevs, sizeof(*sbi->devs)); if (!sbi->devs) @@ -53,27 +56,30 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, struct erofs_deviceslot dis; int ret; - ret = dev_read(0, &dis, pos, sizeof(dis)); + ret = dev_read(sbi, 0, &dis, pos, sizeof(dis)); if (ret < 0) { free(sbi->devs); + sbi->devs = NULL; return ret; } - sbi->devs[i].mapped_blkaddr = dis.mapped_blkaddr; - sbi->total_blocks += dis.blocks; + sbi->devs[i].mapped_blkaddr = le32_to_cpu(dis.mapped_blkaddr); + sbi->devs[i].blocks = le32_to_cpu(dis.blocks); + memcpy(sbi->devs[i].tag, dis.tag, sizeof(dis.tag)); + sbi->total_blocks += sbi->devs[i].blocks; pos += EROFS_DEVT_SLOT_SIZE; } return 0; } -int erofs_read_superblock(void) +int erofs_read_superblock(struct erofs_sb_info *sbi) { - char data[EROFS_BLKSIZ]; + u8 data[EROFS_MAX_BLOCK_SIZE]; struct erofs_super_block *dsb; - unsigned int blkszbits; int ret; - ret = blk_read(0, data, 0, 1); + sbi->blkszbits = ilog2(EROFS_MAX_BLOCK_SIZE); + ret = blk_read(sbi, 0, data, 0, erofs_blknr(sbi, sizeof(data))); if (ret < 0) { erofs_err("cannot read erofs superblock: %d", ret); return -EIO; @@ -86,37 +92,57 @@ int erofs_read_superblock(void) return ret; } - sbi.feature_compat = le32_to_cpu(dsb->feature_compat); + sbi->feature_compat = le32_to_cpu(dsb->feature_compat); - blkszbits = dsb->blkszbits; - /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ - if (blkszbits != LOG_BLOCK_SIZE) { - erofs_err("blksize %d isn't supported on this platform", - 1 << blkszbits); + sbi->blkszbits = dsb->blkszbits; + if (sbi->blkszbits < 9 || + sbi->blkszbits > ilog2(EROFS_MAX_BLOCK_SIZE)) { + erofs_err("blksize %llu isn't supported on this platform", + erofs_blksiz(sbi) | 0ULL); return ret; - } - - if (!check_layout_compatibility(&sbi, dsb)) + } else if (!check_layout_compatibility(sbi, dsb)) { return ret; + } - sbi.primarydevice_blocks = le32_to_cpu(dsb->blocks); - sbi.meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); - sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); - sbi.islotbits = EROFS_ISLOTBITS; - sbi.root_nid = le16_to_cpu(dsb->root_nid); - sbi.packed_nid = le64_to_cpu(dsb->packed_nid); - sbi.inos = le64_to_cpu(dsb->inos); - sbi.checksum = le32_to_cpu(dsb->checksum); + sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); + sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); + sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); + sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start); + sbi->xattr_prefix_count = dsb->xattr_prefix_count; + sbi->islotbits = EROFS_ISLOTBITS; + sbi->root_nid = le16_to_cpu(dsb->root_nid); + sbi->packed_nid = le64_to_cpu(dsb->packed_nid); + sbi->inos = le64_to_cpu(dsb->inos); + sbi->checksum = le32_to_cpu(dsb->checksum); + sbi->extslots = dsb->sb_extslots; + + sbi->build_time = le64_to_cpu(dsb->build_time); + sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); + + memcpy(&sbi->uuid, dsb->uuid, sizeof(dsb->uuid)); + + if (erofs_sb_has_compr_cfgs(sbi)) + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + else + sbi->lz4_max_distance = le16_to_cpu(dsb->u1.lz4_max_distance); - sbi.build_time = le64_to_cpu(dsb->build_time); - sbi.build_time_nsec = le32_to_cpu(dsb->build_time_nsec); + ret = erofs_init_devices(sbi, dsb); + if (ret) + return ret; - memcpy(&sbi.uuid, dsb->uuid, sizeof(dsb->uuid)); - return erofs_init_devices(&sbi, dsb); + ret = erofs_xattr_prefixes_init(sbi); + if (ret && sbi->devs) { + free(sbi->devs); + sbi->devs = NULL; + } + return ret; } -void erofs_put_super(void) +void erofs_put_super(struct erofs_sb_info *sbi) { - if (sbi.devs) - free(sbi.devs); + if (sbi->devs) { + free(sbi->devs); + sbi->devs = NULL; + } + erofs_xattr_prefixes_cleanup(sbi); } diff --git a/lib/tar.c b/lib/tar.c new file mode 100644 index 0000000..0744972 --- /dev/null +++ b/lib/tar.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#if defined(HAVE_ZLIB) +#include <zlib.h> +#endif +#include "erofs/print.h" +#include "erofs/cache.h" +#include "erofs/diskbuf.h" +#include "erofs/inode.h" +#include "erofs/list.h" +#include "erofs/tar.h" +#include "erofs/io.h" +#include "erofs/xattr.h" +#include "erofs/blobchunk.h" +#include "erofs/rebuild.h" + +/* This file is a tape/volume header. Ignore it on extraction. */ +#define GNUTYPE_VOLHDR 'V' + +struct tar_header { + char name[100]; /* 0-99 */ + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + char chksum[8]; /* 148-155 */ + char typeflag; /* 156-156 */ + char linkname[100]; /* 157-256 */ + char magic[6]; /* 257-262 */ + char version[2]; /* 263-264 */ + char uname[32]; /* 265-296 */ + char gname[32]; /* 297-328 */ + char devmajor[8]; /* 329-336 */ + char devminor[8]; /* 337-344 */ + char prefix[155]; /* 345-499 */ + char padding[12]; /* 500-512 (pad to exactly the 512 byte) */ +}; + +s64 erofs_read_from_fd(int fd, void *buf, u64 bytes) +{ + s64 i = 0; + + while (bytes) { + int len = bytes > INT_MAX ? INT_MAX : bytes; + int ret; + + ret = read(fd, buf + i, len); + if (ret < 1) { + if (ret == 0) { + break; + } else if (errno != EINTR) { + erofs_err("failed to read : %s\n", + strerror(errno)); + return -errno; + } + } + bytes -= ret; + i += ret; + } + return i; +} + +void erofs_iostream_close(struct erofs_iostream *ios) +{ + free(ios->buffer); + if (ios->decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + gzclose(ios->handler); +#endif + return; + } + close(ios->fd); +} + +int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder) +{ + s64 fsz; + + ios->tail = ios->head = 0; + ios->decoder = decoder; + if (decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + ios->handler = gzdopen(fd, "r"); + if (!ios->handler) + return -ENOMEM; + ios->sz = fsz = 0; + ios->bufsize = 32768; +#else + return -EOPNOTSUPP; +#endif + } else { + ios->fd = fd; + fsz = lseek(fd, 0, SEEK_END); + if (fsz <= 0) { + ios->feof = !fsz; + ios->sz = 0; + } else { + ios->feof = false; + ios->sz = fsz; + if (lseek(fd, 0, SEEK_SET)) + return -EIO; +#ifdef HAVE_POSIX_FADVISE + if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) + erofs_warn("failed to fadvise: %s, ignored.", + erofs_strerror(errno)); +#endif + } + ios->bufsize = 16384; + } + + do { + ios->buffer = malloc(ios->bufsize); + if (ios->buffer) + break; + ios->bufsize >>= 1; + } while (ios->bufsize >= 1024); + + if (!ios->buffer) + return -ENOMEM; + return 0; +} + +int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes) +{ + unsigned int rabytes = ios->tail - ios->head; + int ret; + + if (rabytes >= bytes) { + *buf = ios->buffer + ios->head; + ios->head += bytes; + return bytes; + } + + if (ios->head) { + memmove(ios->buffer, ios->buffer + ios->head, rabytes); + ios->head = 0; + ios->tail = rabytes; + } + + if (!ios->feof) { + if (ios->decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + ret = gzread(ios->handler, ios->buffer + rabytes, + ios->bufsize - rabytes); + if (!ret) { + int errnum; + const char *errstr; + + errstr = gzerror(ios->handler, &errnum); + if (errnum != Z_STREAM_END) { + erofs_err("failed to gzread: %s", errstr); + return -EIO; + } + ios->feof = true; + } + ios->tail += ret; +#else + return -EOPNOTSUPP; +#endif + } else { + ret = erofs_read_from_fd(ios->fd, ios->buffer + rabytes, + ios->bufsize - rabytes); + if (ret < 0) + return ret; + ios->tail += ret; + if (ret < ios->bufsize - rabytes) + ios->feof = true; + } + } + *buf = ios->buffer; + ret = min_t(int, ios->tail, bytes); + ios->head = ret; + return ret; +} + +int erofs_iostream_bread(struct erofs_iostream *ios, void *buf, u64 bytes) +{ + u64 rem = bytes; + void *src; + int ret; + + do { + ret = erofs_iostream_read(ios, &src, rem); + if (ret < 0) + return ret; + memcpy(buf, src, ret); + rem -= ret; + } while (rem && ret); + + return bytes - rem; +} + +int erofs_iostream_lskip(struct erofs_iostream *ios, u64 sz) +{ + unsigned int rabytes = ios->tail - ios->head; + int ret; + void *dummy; + + if (rabytes >= sz) { + ios->head += sz; + return 0; + } + + sz -= rabytes; + ios->head = ios->tail = 0; + if (ios->feof) + return sz; + + if (ios->sz) { + s64 cur = lseek(ios->fd, sz, SEEK_CUR); + + if (cur > ios->sz) + return cur - ios->sz; + return 0; + } + + do { + ret = erofs_iostream_read(ios, &dummy, sz); + if (ret < 0) + return ret; + sz -= ret; + } while (!(ios->feof || !ret || !sz)); + + return sz; +} + +static long long tarerofs_otoi(const char *ptr, int len) +{ + char inp[32]; + char *endp = inp; + long long val; + + memcpy(inp, ptr, len); + inp[len] = '\0'; + + errno = 0; + val = strtol(ptr, &endp, 8); + if ((!val && endp == inp) | + (*endp && *endp != ' ')) + errno = -EINVAL; + return val; +} + +static long long tarerofs_parsenum(const char *ptr, int len) +{ + /* + * For fields containing numbers or timestamps that are out of range + * for the basic format, the GNU format uses a base-256 representation + * instead of an ASCII octal number. + */ + if (*(char *)ptr == '\200') { + long long res = 0; + + while (--len) + res = (res << 8) + (u8)*(++ptr); + return res; + } + return tarerofs_otoi(ptr, len); +} + +struct tarerofs_xattr_item { + struct list_head list; + char *kv; + unsigned int len, namelen; +}; + +int tarerofs_insert_xattr(struct list_head *xattrs, + char *kv, int namelen, int len, bool skip) +{ + struct tarerofs_xattr_item *item; + char *nv; + + DBG_BUGON(namelen >= len); + list_for_each_entry(item, xattrs, list) { + if (!strncmp(item->kv, kv, namelen + 1)) { + if (skip) + return 0; + goto found; + } + } + + item = malloc(sizeof(*item)); + if (!item) + return -ENOMEM; + item->kv = NULL; + item->namelen = namelen; + namelen = 0; + list_add_tail(&item->list, xattrs); +found: + nv = realloc(item->kv, len); + if (!nv) + return -ENOMEM; + item->kv = nv; + item->len = len; + memcpy(nv + namelen, kv + namelen, len - namelen); + return 0; +} + +int tarerofs_merge_xattrs(struct list_head *dst, struct list_head *src) +{ + struct tarerofs_xattr_item *item; + + list_for_each_entry(item, src, list) { + int ret; + + ret = tarerofs_insert_xattr(dst, item->kv, item->namelen, + item->len, true); + if (ret) + return ret; + } + return 0; +} + +void tarerofs_remove_xattrs(struct list_head *xattrs) +{ + struct tarerofs_xattr_item *item, *n; + + list_for_each_entry_safe(item, n, xattrs, list) { + DBG_BUGON(!item->kv); + free(item->kv); + list_del(&item->list); + free(item); + } +} + +int tarerofs_apply_xattrs(struct erofs_inode *inode, struct list_head *xattrs) +{ + struct tarerofs_xattr_item *item; + int ret; + + list_for_each_entry(item, xattrs, list) { + const char *v = item->kv + item->namelen + 1; + unsigned int vsz = item->len - item->namelen - 1; + + if (item->len <= item->namelen - 1) { + DBG_BUGON(item->len < item->namelen - 1); + continue; + } + item->kv[item->namelen] = '\0'; + erofs_dbg("Recording xattr(%s)=\"%s\" (of %u bytes) to file %s", + item->kv, v, vsz, inode->i_srcpath); + ret = erofs_setxattr(inode, item->kv, v, vsz); + if (ret == -ENODATA) + erofs_err("Failed to set xattr(%s)=%s to file %s", + item->kv, v, inode->i_srcpath); + else if (ret) + return ret; + } + return 0; +} + +static const char lookup_table[65] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +static int base64_decode(const char *src, int len, u8 *dst) +{ + int i, bits = 0, ac = 0; + const char *p; + u8 *cp = dst; + + if(!(len % 4)) { + /* Check for and ignore any end padding */ + if (src[len - 2] == '=' && src[len - 1] == '=') + len -= 2; + else if (src[len - 1] == '=') + --len; + } + + for (i = 0; i < len; i++) { + p = strchr(lookup_table, src[i]); + if (p == NULL || src[i] == 0) + return -2; + ac += (p - lookup_table) << bits; + bits += 6; + if (bits >= 8) { + *cp++ = ac & 0xff; + ac >>= 8; + bits -= 8; + } + } + if (ac) + return -1; + return cp - dst; +} + +int tarerofs_parse_pax_header(struct erofs_iostream *ios, + struct erofs_pax_header *eh, u32 size) +{ + char *buf, *p; + int ret; + + buf = malloc(size); + if (!buf) + return -ENOMEM; + p = buf; + + ret = erofs_iostream_bread(ios, buf, size); + if (ret != size) + goto out; + + while (p < buf + size) { + char *kv, *value; + int len, n; + /* extended records are of the format: "LEN NAME=VALUE\n" */ + ret = sscanf(p, "%d %n", &len, &n); + if (ret < 1 || len <= n || len > buf + size - p) { + ret = -EIO; + goto out; + } + kv = p + n; + p += len; + len -= n; + + if (p[-1] != '\n') { + ret = -EIO; + goto out; + } + p[-1] = '\0'; + + value = memchr(kv, '=', p - kv); + if (!value) { + ret = -EIO; + goto out; + } else { + long long lln; + + value++; + + if (!strncmp(kv, "path=", sizeof("path=") - 1)) { + int j = p - 1 - value; + free(eh->path); + eh->path = strdup(value); + while (eh->path[j - 1] == '/') + eh->path[--j] = '\0'; + } else if (!strncmp(kv, "linkpath=", + sizeof("linkpath=") - 1)) { + free(eh->link); + eh->link = strdup(value); + } else if (!strncmp(kv, "mtime=", + sizeof("mtime=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1) { + ret = -EIO; + goto out; + } + eh->st.st_mtime = lln; + if (value[n] == '.') { + ret = sscanf(value + n + 1, "%d", &n); + if (ret < 1) { + ret = -EIO; + goto out; + } +#if ST_MTIM_NSEC + ST_MTIM_NSEC(&eh->st) = n; +#endif + } + eh->use_mtime = true; + } else if (!strncmp(kv, "size=", + sizeof("size=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_size = lln; + eh->use_size = true; + } else if (!strncmp(kv, "uid=", sizeof("uid=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_uid = lln; + eh->use_uid = true; + } else if (!strncmp(kv, "gid=", sizeof("gid=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_gid = lln; + eh->use_gid = true; + } else if (!strncmp(kv, "SCHILY.xattr.", + sizeof("SCHILY.xattr.") - 1)) { + char *key = kv + sizeof("SCHILY.xattr.") - 1; + + --len; /* p[-1] == '\0' */ + ret = tarerofs_insert_xattr(&eh->xattrs, key, + value - key - 1, + len - (key - kv), false); + if (ret) + goto out; + } else if (!strncmp(kv, "LIBARCHIVE.xattr.", + sizeof("LIBARCHIVE.xattr.") - 1)) { + char *key; + key = kv + sizeof("LIBARCHIVE.xattr.") - 1; + + --len; /* p[-1] == '\0' */ + ret = base64_decode(value, len - (value - kv), + (u8 *)value); + if (ret < 0) { + ret = -EFSCORRUPTED; + goto out; + } + + ret = tarerofs_insert_xattr(&eh->xattrs, key, + value - key - 1, + value - key + ret, false); + if (ret) + goto out; + } else { + erofs_info("unrecognized pax keyword \"%s\", ignoring", kv); + } + } + } + ret = 0; +out: + free(buf); + return ret; +} + +void tarerofs_remove_inode(struct erofs_inode *inode) +{ + struct erofs_dentry *d; + + --inode->i_nlink; + if (!S_ISDIR(inode->i_mode)) + return; + + /* remove all subdirss */ + list_for_each_entry(d, &inode->i_subdirs, d_child) { + if (!is_dot_dotdot(d->name)) + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + d->inode = NULL; + } + --inode->i_parent->i_nlink; +} + +static int tarerofs_write_file_data(struct erofs_inode *inode, + struct erofs_tarfile *tar) +{ + unsigned int j; + void *buf; + int fd, nread; + u64 off; + + if (!inode->i_diskbuf) { + inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf)); + if (!inode->i_diskbuf) + return -ENOSPC; + } else { + erofs_diskbuf_close(inode->i_diskbuf); + } + + fd = erofs_diskbuf_reserve(inode->i_diskbuf, 0, &off); + if (fd < 0) + return -EBADF; + + for (j = inode->i_size; j; ) { + nread = erofs_iostream_read(&tar->ios, &buf, j); + if (nread < 0) + break; + if (write(fd, buf, nread) != nread) { + nread = -EIO; + break; + } + j -= nread; + } + erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size); + inode->with_diskbuf = true; + return 0; +} + +static int tarerofs_write_file_index(struct erofs_inode *inode, + struct erofs_tarfile *tar, erofs_off_t data_offset) +{ + int ret; + + ret = tarerofs_write_chunkes(inode, data_offset); + if (ret) + return ret; + if (erofs_iostream_lskip(&tar->ios, inode->i_size)) + return -EIO; + return 0; +} + +int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) +{ + char path[PATH_MAX]; + struct erofs_pax_header eh = tar->global; + struct erofs_sb_info *sbi = root->sbi; + bool whout, opq, e = false; + struct stat st; + erofs_off_t tar_offset, data_offset; + + struct tar_header *th; + struct erofs_dentry *d; + struct erofs_inode *inode; + unsigned int j, csum, cksum; + int ckksum, ret, rem; + + if (eh.path) + eh.path = strdup(eh.path); + if (eh.link) + eh.link = strdup(eh.link); + init_list_head(&eh.xattrs); + +restart: + rem = tar->offset & 511; + if (rem) { + if (erofs_iostream_lskip(&tar->ios, 512 - rem)) { + ret = -EIO; + goto out; + } + tar->offset += 512 - rem; + } + + tar_offset = tar->offset; + ret = erofs_iostream_read(&tar->ios, (void **)&th, sizeof(*th)); + if (ret != sizeof(*th)) { + erofs_err("failed to read header block @ %llu", tar_offset); + ret = -EIO; + goto out; + } + tar->offset += sizeof(*th); + if (*th->name == '\0') { + if (e) { /* end of tar 2 empty blocks */ + ret = 1; + goto out; + } + e = true; /* empty jump to next block */ + goto restart; + } + + /* chksum field itself treated as ' ' */ + csum = tarerofs_otoi(th->chksum, sizeof(th->chksum)); + if (errno) { + erofs_err("invalid chksum @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + cksum = 0; + for (j = 0; j < 8; ++j) + cksum += (unsigned int)' '; + ckksum = cksum; + for (j = 0; j < 148; ++j) { + cksum += (unsigned int)((u8*)th)[j]; + ckksum += (int)((char*)th)[j]; + } + for (j = 156; j < 500; ++j) { + cksum += (unsigned int)((u8*)th)[j]; + ckksum += (int)((char*)th)[j]; + } + if (csum != cksum && csum != ckksum) { + erofs_err("chksum mismatch @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + + if (th->typeflag == GNUTYPE_VOLHDR) { + if (th->size[0]) + erofs_warn("GNUTYPE_VOLHDR with non-zeroed size @ %llu", + tar_offset); + /* anyway, strncpy could cause some GCC warning here */ + memcpy(sbi->volume_name, th->name, sizeof(sbi->volume_name)); + goto restart; + } + + if (memcmp(th->magic, "ustar", 5)) { + erofs_err("invalid tar magic @ %llu", tar_offset); + ret = -EIO; + goto out; + } + + st.st_mode = tarerofs_otoi(th->mode, sizeof(th->mode)); + if (errno) + goto invalid_tar; + + if (eh.use_uid) { + st.st_uid = eh.st.st_uid; + } else { + st.st_uid = tarerofs_parsenum(th->uid, sizeof(th->uid)); + if (errno) + goto invalid_tar; + } + + if (eh.use_gid) { + st.st_gid = eh.st.st_gid; + } else { + st.st_gid = tarerofs_parsenum(th->gid, sizeof(th->gid)); + if (errno) + goto invalid_tar; + } + + if (eh.use_size) { + st.st_size = eh.st.st_size; + } else { + st.st_size = tarerofs_parsenum(th->size, sizeof(th->size)); + if (errno) + goto invalid_tar; + } + + if (eh.use_mtime) { + st.st_mtime = eh.st.st_mtime; +#if ST_MTIM_NSEC + ST_MTIM_NSEC(&st) = ST_MTIM_NSEC(&eh.st); +#endif + } else { + st.st_mtime = tarerofs_parsenum(th->mtime, sizeof(th->mtime)); + if (errno) + goto invalid_tar; + } + + if (th->typeflag <= '7' && !eh.path) { + eh.path = path; + j = 0; + if (*th->prefix) { + memcpy(path, th->prefix, sizeof(th->prefix)); + path[sizeof(th->prefix)] = '\0'; + j = strlen(path); + if (path[j - 1] != '/') { + path[j] = '/'; + path[++j] = '\0'; + } + } + memcpy(path + j, th->name, sizeof(th->name)); + path[j + sizeof(th->name)] = '\0'; + j = strlen(path); + while (path[j - 1] == '/') + path[--j] = '\0'; + } + + data_offset = tar->offset; + tar->offset += st.st_size; + switch(th->typeflag) { + case '0': + case '7': + case '1': + st.st_mode |= S_IFREG; + break; + case '2': + st.st_mode |= S_IFLNK; + break; + case '3': + st.st_mode |= S_IFCHR; + break; + case '4': + st.st_mode |= S_IFBLK; + break; + case '5': + st.st_mode |= S_IFDIR; + break; + case '6': + st.st_mode |= S_IFIFO; + break; + case 'g': + ret = tarerofs_parse_pax_header(&tar->ios, &tar->global, + st.st_size); + if (ret) + goto out; + if (tar->global.path) { + free(eh.path); + eh.path = strdup(tar->global.path); + } + if (tar->global.link) { + free(eh.link); + eh.link = strdup(tar->global.link); + } + goto restart; + case 'x': + ret = tarerofs_parse_pax_header(&tar->ios, &eh, st.st_size); + if (ret) + goto out; + goto restart; + case 'L': + free(eh.path); + eh.path = malloc(st.st_size + 1); + if (st.st_size != erofs_iostream_bread(&tar->ios, eh.path, + st.st_size)) + goto invalid_tar; + eh.path[st.st_size] = '\0'; + goto restart; + case 'K': + free(eh.link); + eh.link = malloc(st.st_size + 1); + if (st.st_size > PATH_MAX || st.st_size != + erofs_iostream_bread(&tar->ios, eh.link, st.st_size)) + goto invalid_tar; + eh.link[st.st_size] = '\0'; + goto restart; + default: + erofs_info("unrecognized typeflag %xh @ %llu - ignoring", + th->typeflag, tar_offset); + (void)erofs_iostream_lskip(&tar->ios, st.st_size); + ret = 0; + goto out; + } + + st.st_rdev = 0; + if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { + int major, minor; + + major = tarerofs_parsenum(th->devmajor, sizeof(th->devmajor)); + if (errno) { + erofs_err("invalid device major @ %llu", tar_offset); + goto out; + } + + minor = tarerofs_parsenum(th->devminor, sizeof(th->devminor)); + if (errno) { + erofs_err("invalid device minor @ %llu", tar_offset); + goto out; + } + + st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12); + } else if (th->typeflag == '1' || th->typeflag == '2') { + if (!eh.link) + eh.link = strndup(th->linkname, sizeof(th->linkname)); + } + + if (tar->index_mode && !tar->mapfile && + erofs_blkoff(sbi, data_offset)) { + erofs_err("invalid tar data alignment @ %llu", tar_offset); + ret = -EIO; + goto out; + } + + erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode); + + d = erofs_rebuild_get_dentry(root, eh.path, tar->aufs, &whout, &opq, true); + if (IS_ERR(d)) { + ret = PTR_ERR(d); + goto out; + } + + if (!d) { + /* some tarballs include '.' which indicates the root directory */ + if (!S_ISDIR(st.st_mode)) { + ret = -ENOTDIR; + goto out; + } + inode = root; + } else if (opq) { + DBG_BUGON(d->type == EROFS_FT_UNKNOWN); + DBG_BUGON(!d->inode); + ret = erofs_set_opaque_xattr(d->inode); + goto out; + } else if (th->typeflag == '1') { /* hard link cases */ + struct erofs_dentry *d2; + bool dumb; + + if (S_ISDIR(st.st_mode)) { + ret = -EISDIR; + goto out; + } + + if (d->type != EROFS_FT_UNKNOWN) { + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + } + d->inode = NULL; + + d2 = erofs_rebuild_get_dentry(root, eh.link, tar->aufs, + &dumb, &dumb, false); + if (IS_ERR(d2)) { + ret = PTR_ERR(d2); + goto out; + } + if (d2->type == EROFS_FT_UNKNOWN) { + ret = -ENOENT; + goto out; + } + if (S_ISDIR(d2->inode->i_mode)) { + ret = -EISDIR; + goto out; + } + inode = erofs_igrab(d2->inode); + d->inode = inode; + d->type = d2->type; + ++inode->i_nlink; + ret = 0; + goto out; + } else if (d->type != EROFS_FT_UNKNOWN) { + if (d->type != EROFS_FT_DIR || !S_ISDIR(st.st_mode)) { + struct erofs_inode *parent = d->inode->i_parent; + + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + d->inode = parent; + goto new_inode; + } + inode = d->inode; + } else { +new_inode: + inode = erofs_new_inode(); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + inode->i_parent = d->inode; + d->inode = inode; + d->type = erofs_mode_to_ftype(st.st_mode); + } + + if (whout) { + inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR; + inode->u.i_rdev = EROFS_WHITEOUT_DEV; + d->type = EROFS_FT_CHRDEV; + + /* + * Mark the parent directory as copied-up to avoid exposing + * whiteouts if mounted. See kernel commit b79e05aaa166 + * ("ovl: no direct iteration for dir with origin xattr") + */ + inode->i_parent->whiteouts = true; + } else { + inode->i_mode = st.st_mode; + if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) + inode->u.i_rdev = erofs_new_encode_dev(st.st_rdev); + } + + inode->i_srcpath = strdup(eh.path); + if (!inode->i_srcpath) { + ret = -ENOMEM; + goto out; + } + + ret = __erofs_fill_inode(inode, &st, eh.path); + if (ret) + goto out; + inode->i_size = st.st_size; + + if (!S_ISDIR(inode->i_mode)) { + if (S_ISLNK(inode->i_mode)) { + inode->i_size = strlen(eh.link); + inode->i_link = malloc(inode->i_size + 1); + memcpy(inode->i_link, eh.link, inode->i_size + 1); + } else if (inode->i_size) { + if (tar->index_mode) + ret = tarerofs_write_file_index(inode, tar, + data_offset); + else + ret = tarerofs_write_file_data(inode, tar); + if (ret) + goto out; + } + inode->i_nlink++; + } else if (!inode->i_nlink) { + ret = erofs_init_empty_dir(inode); + if (ret) + goto out; + } + + ret = tarerofs_merge_xattrs(&eh.xattrs, &tar->global.xattrs); + if (ret) + goto out; + + ret = tarerofs_apply_xattrs(inode, &eh.xattrs); + +out: + if (eh.path != path) + free(eh.path); + free(eh.link); + tarerofs_remove_xattrs(&eh.xattrs); + return ret; + +invalid_tar: + erofs_err("invalid tar @ %llu", tar_offset); + ret = -EIO; + goto out; +} diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 0000000..ec0f9d9 --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com> + */ + +#include <string.h> +#include <errno.h> + +#include "erofs/config.h" +#include "erofs/defs.h" +#include "liberofs_uuid.h" + +#ifdef HAVE_LIBUUID +#include <uuid.h> +#else + +#include <stdlib.h> +#ifdef HAVE_SYS_RANDOM_H +#include <sys/random.h> +#else +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#endif + +/* Flags to be used, will be modified if kernel does not support them */ +static unsigned int erofs_grnd_flag = +#ifdef GRND_INSECURE + GRND_INSECURE; +#else + 0x0004; +#endif + +static int s_getrandom(void *out, unsigned size, bool insecure) +{ + unsigned int kflags = erofs_grnd_flag; + unsigned int flags = insecure ? kflags : 0; + + for (;;) + { +#ifdef HAVE_SYS_RANDOM_H + ssize_t r = getrandom(out, size, flags); +#else + ssize_t r = (ssize_t)syscall(__NR_getrandom, out, size, flags); +#endif + int err; + + if (r == size) + break; + err = errno; + if (err != EINTR) { + if (err == EINVAL && kflags) { + // Kernel likely does not support GRND_INSECURE + erofs_grnd_flag = 0; + kflags = 0; + continue; + } + return -err; + } + } + return 0; +} +#endif + +void erofs_uuid_generate(unsigned char *out) +{ +#ifdef HAVE_LIBUUID + uuid_t new_uuid; + + do { + uuid_generate(new_uuid); + } while (uuid_is_null(new_uuid)); +#else + unsigned char new_uuid[16]; + int res __maybe_unused; + + res = s_getrandom(new_uuid, sizeof(new_uuid), true); + BUG_ON(res != 0); + + // UID type + version bits + new_uuid[0] = (new_uuid[4 + 2] & 0x0f) | 0x40; + new_uuid[1] = (new_uuid[4 + 2 + 2] & 0x3f) | 0x80; +#endif + memcpy(out, new_uuid, sizeof(new_uuid)); +} + +int erofs_uuid_parse(const char *in, unsigned char *uu) { +#ifdef HAVE_LIBUUID + return uuid_parse((char *)in, uu); +#else + unsigned char new_uuid[16]; + unsigned int hypens = ((1U << 3) | (1U << 5) | (1U << 7) | (1U << 9)); + int i; + + for (i = 0; i < sizeof(new_uuid); hypens >>= 1, i++) + { + char c[] = { in[0], in[1], '\0' }; + char* endptr = c; + unsigned long val = strtoul(c, &endptr, 16); + + if (endptr - c != 2) + return -EINVAL; + + in += 2; + + if ((hypens & 1U) != 0) { + if (*in++ != '-') + return -EINVAL; + } + new_uuid[i] = (unsigned char)val; + } + + if (*in != '\0') + return -EINVAL; + memcpy(uu, new_uuid, sizeof(new_uuid)); + return 0; +#endif +} diff --git a/lib/uuid_unparse.c b/lib/uuid_unparse.c new file mode 100644 index 0000000..3255c4b --- /dev/null +++ b/lib/uuid_unparse.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * Copyright (C) 2023 Norbert Lange <nolange79@gmail.com> + */ + +#include <stdio.h> + +#include "erofs/config.h" +#include "liberofs_uuid.h" + +void erofs_uuid_unparse_lower(const unsigned char *buf, char *out) { + sprintf(out, "%04x%04x-%04x-%04x-%04x-%04x%04x%04x", + (buf[0] << 8) | buf[1], + (buf[2] << 8) | buf[3], + (buf[4] << 8) | buf[5], + (buf[6] << 8) | buf[7], + (buf[8] << 8) | buf[9], + (buf[10] << 8) | buf[11], + (buf[12] << 8) | buf[13], + (buf[14] << 8) | buf[15]); +} diff --git a/lib/xattr.c b/lib/xattr.c index dbe0519..6c8ebf4 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -17,15 +17,84 @@ #include "erofs/xattr.h" #include "erofs/cache.h" #include "erofs/io.h" +#include "erofs/fragments.h" +#include "erofs/xxhash.h" #include "liberofs_private.h" +#ifndef XATTR_SYSTEM_PREFIX +#define XATTR_SYSTEM_PREFIX "system." +#endif +#ifndef XATTR_SYSTEM_PREFIX_LEN +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) +#endif +#ifndef XATTR_USER_PREFIX +#define XATTR_USER_PREFIX "user." +#endif +#ifndef XATTR_USER_PREFIX_LEN +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) +#endif +#ifndef XATTR_SECURITY_PREFIX +#define XATTR_SECURITY_PREFIX "security." +#endif +#ifndef XATTR_SECURITY_PREFIX_LEN +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) +#endif +#ifndef XATTR_TRUSTED_PREFIX +#define XATTR_TRUSTED_PREFIX "trusted." +#endif +#ifndef XATTR_TRUSTED_PREFIX_LEN +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) +#endif +#ifndef XATTR_NAME_POSIX_ACL_ACCESS +#define XATTR_NAME_POSIX_ACL_ACCESS "system.posix_acl_access" +#endif +#ifndef XATTR_NAME_POSIX_ACL_DEFAULT +#define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default" +#endif +#ifndef XATTR_NAME_SECURITY_SELINUX +#define XATTR_NAME_SECURITY_SELINUX "security.selinux" +#endif +#ifndef XATTR_NAME_SECURITY_CAPABILITY +#define XATTR_NAME_SECURITY_CAPABILITY "security.capability" +#endif +#ifndef OVL_XATTR_NAMESPACE +#define OVL_XATTR_NAMESPACE "overlay." +#endif +#ifndef OVL_XATTR_OPAQUE_POSTFIX +#define OVL_XATTR_OPAQUE_POSTFIX "opaque" +#endif +#ifndef OVL_XATTR_ORIGIN_POSTFIX +#define OVL_XATTR_ORIGIN_POSTFIX "origin" +#endif +#ifndef OVL_XATTR_TRUSTED_PREFIX +#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE +#endif +#ifndef OVL_XATTR_OPAQUE +#define OVL_XATTR_OPAQUE OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_OPAQUE_POSTFIX +#endif +#ifndef OVL_XATTR_ORIGIN +#define OVL_XATTR_ORIGIN OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ORIGIN_POSTFIX +#endif + #define EA_HASHTABLE_BITS 16 +/* one extra byte for the trailing `\0` of attribute name */ +#define EROFS_XATTR_KSIZE(kvlen) (kvlen[0] + 1) +#define EROFS_XATTR_KVSIZE(kvlen) (EROFS_XATTR_KSIZE(kvlen) + kvlen[1]) + +/* + * @base_index: the index of the matched predefined short prefix + * @prefix: the index of the matched long prefix, if any; + * same as base_index otherwise + * @prefix_len: the length of the matched long prefix if any; + * the length of the matched predefined short prefix otherwise + */ struct xattr_item { + struct xattr_item *next_shared_xattr; const char *kvbuf; unsigned int hash[2], len[2], count; int shared_xattr_id; - u8 prefix; + unsigned int prefix, base_index, prefix_len; struct hlist_node node; }; @@ -36,12 +105,12 @@ struct inode_xattr_node { static DECLARE_HASHTABLE(ea_hashtable, EA_HASHTABLE_BITS); -static LIST_HEAD(shared_xattrs_list); -static unsigned int shared_xattrs_count, shared_xattrs_size; +static struct xattr_item *shared_xattrs_list; +static unsigned int shared_xattrs_count; static struct xattr_prefix { const char *prefix; - u16 prefix_len; + unsigned int prefix_len; } xattr_types[] = { [EROFS_XATTR_INDEX_USER] = { XATTR_USER_PREFIX, @@ -61,6 +130,30 @@ static struct xattr_prefix { } }; +struct ea_type_node { + struct list_head list; + struct xattr_prefix type; + unsigned int index, base_index, base_len; +}; + +static LIST_HEAD(ea_name_prefixes); +static unsigned int ea_prefix_count; + +static bool match_prefix(const char *key, unsigned int *index, + unsigned int *len) +{ + struct xattr_prefix *p; + + for (p = xattr_types; p < xattr_types + ARRAY_SIZE(xattr_types); ++p) { + if (p->prefix && !strncmp(p->prefix, key, p->prefix_len)) { + *len = p->prefix_len; + *index = p - xattr_types; + return true; + } + } + return false; +} + static unsigned int BKDRHash(char *str, unsigned int len) { const unsigned int seed = 131313; @@ -73,13 +166,12 @@ static unsigned int BKDRHash(char *str, unsigned int len) return hash; } -static unsigned int xattr_item_hash(u8 prefix, char *buf, - unsigned int len[2], unsigned int hash[2]) +static unsigned int xattr_item_hash(char *buf, unsigned int len[2], + unsigned int hash[2]) { hash[0] = BKDRHash(buf, len[0]); /* key */ hash[1] = BKDRHash(buf + len[0], len[1]); /* value */ - - return prefix ^ hash[0] ^ hash[1]; + return hash[0] ^ hash[1]; } static unsigned int put_xattritem(struct xattr_item *item) @@ -90,17 +182,15 @@ static unsigned int put_xattritem(struct xattr_item *item) return 0; } -static struct xattr_item *get_xattritem(u8 prefix, char *kvbuf, - unsigned int len[2]) +static struct xattr_item *get_xattritem(char *kvbuf, unsigned int len[2]) { struct xattr_item *item; + struct ea_type_node *tnode; unsigned int hash[2], hkey; - hkey = xattr_item_hash(prefix, kvbuf, len, hash); - + hkey = xattr_item_hash(kvbuf, len, hash); hash_for_each_possible(ea_hashtable, item, node, hkey) { - if (prefix == item->prefix && - item->len[0] == len[0] && item->len[1] == len[1] && + if (item->len[0] == len[0] && item->len[1] == len[1] && item->hash[0] == hash[0] && item->hash[1] == hash[1] && !memcmp(kvbuf, item->kvbuf, len[0] + len[1])) { free(kvbuf); @@ -114,6 +204,14 @@ static struct xattr_item *get_xattritem(u8 prefix, char *kvbuf, free(kvbuf); return ERR_PTR(-ENOMEM); } + + if (!match_prefix(kvbuf, &item->base_index, &item->prefix_len)) { + free(item); + free(kvbuf); + return ERR_PTR(-ENODATA); + } + DBG_BUGON(len[0] < item->prefix_len); + INIT_HLIST_NODE(&item->node); item->count = 1; item->kvbuf = kvbuf; @@ -122,40 +220,32 @@ static struct xattr_item *get_xattritem(u8 prefix, char *kvbuf, item->hash[0] = hash[0]; item->hash[1] = hash[1]; item->shared_xattr_id = -1; - item->prefix = prefix; - hash_add(ea_hashtable, &item->node, hkey); - return item; -} - -static bool match_prefix(const char *key, u8 *index, u16 *len) -{ - struct xattr_prefix *p; - - for (p = xattr_types; p < xattr_types + ARRAY_SIZE(xattr_types); ++p) { - if (p->prefix && !strncmp(p->prefix, key, p->prefix_len)) { - *len = p->prefix_len; - *index = p - xattr_types; - return true; + item->prefix = item->base_index; + + list_for_each_entry(tnode, &ea_name_prefixes, list) { + if (item->base_index == tnode->base_index && + !strncmp(tnode->type.prefix, kvbuf, + tnode->type.prefix_len)) { + item->prefix = tnode->index; + item->prefix_len = tnode->type.prefix_len; + break; } } - return false; + hash_add(ea_hashtable, &item->node, hkey); + return item; } static struct xattr_item *parse_one_xattr(const char *path, const char *key, unsigned int keylen) { ssize_t ret; - u8 prefix; - u16 prefixlen; unsigned int len[2]; char *kvbuf; erofs_dbg("parse xattr [%s] of %s", path, key); - if (!match_prefix(key, &prefix, &prefixlen)) - return ERR_PTR(-ENODATA); - - DBG_BUGON(keylen < prefixlen); + /* length of the key */ + len[0] = keylen; /* determine length of the value */ #ifdef HAVE_LGETXATTR @@ -170,19 +260,18 @@ static struct xattr_item *parse_one_xattr(const char *path, const char *key, len[1] = ret; /* allocate key-value buffer */ - len[0] = keylen - prefixlen; - - kvbuf = malloc(len[0] + len[1]); + kvbuf = malloc(EROFS_XATTR_KVSIZE(len)); if (!kvbuf) return ERR_PTR(-ENOMEM); - memcpy(kvbuf, key + prefixlen, len[0]); + memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len)); if (len[1]) { /* copy value to buffer */ #ifdef HAVE_LGETXATTR - ret = lgetxattr(path, key, kvbuf + len[0], len[1]); + ret = lgetxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len), + len[1]); #elif defined(__APPLE__) - ret = getxattr(path, key, kvbuf + len[0], len[1], 0, - XATTR_NOFOLLOW); + ret = getxattr(path, key, kvbuf + EROFS_XATTR_KSIZE(len), + len[1], 0, XATTR_NOFOLLOW); #else free(kvbuf); return ERR_PTR(-EOPNOTSUPP); @@ -197,7 +286,7 @@ static struct xattr_item *parse_one_xattr(const char *path, const char *key, len[1] = ret; } } - return get_xattritem(prefix, kvbuf, len); + return get_xattritem(kvbuf, len); } static struct xattr_item *erofs_get_selabel_xattr(const char *srcpath, @@ -232,16 +321,17 @@ static struct xattr_item *erofs_get_selabel_xattr(const char *srcpath, return NULL; } - len[0] = sizeof("selinux") - 1; + len[0] = sizeof(XATTR_NAME_SECURITY_SELINUX) - 1; len[1] = strlen(secontext); - kvbuf = malloc(len[0] + len[1] + 1); + kvbuf = malloc(EROFS_XATTR_KVSIZE(len)); if (!kvbuf) { freecon(secontext); return ERR_PTR(-ENOMEM); } - sprintf(kvbuf, "selinux%s", secontext); + sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_SELINUX); + memcpy(kvbuf + EROFS_XATTR_KSIZE(len), secontext, len[1]); freecon(secontext); - return get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len); + return get_xattritem(kvbuf, len); } #endif return NULL; @@ -261,18 +351,8 @@ static int inode_xattr_add(struct list_head *hlist, struct xattr_item *item) static int shared_xattr_add(struct xattr_item *item) { - struct inode_xattr_node *node = malloc(sizeof(*node)); - - if (!node) - return -ENOMEM; - - init_list_head(&node->list); - node->item = item; - list_add(&node->list, &shared_xattrs_list); - - shared_xattrs_size += sizeof(struct erofs_xattr_entry); - shared_xattrs_size = EROFS_XATTR_ALIGN(shared_xattrs_size + - item->len[0] + item->len[1]); + item->next_shared_xattr = shared_xattrs_list; + shared_xattrs_list = item; return ++shared_xattrs_count; } @@ -297,6 +377,18 @@ static bool erofs_is_skipped_xattr(const char *key) if (cfg.sehnd && !strcmp(key, XATTR_SECURITY_PREFIX "selinux")) return true; #endif + + /* skip xattrs with unidentified "system." prefix */ + if (!strncmp(key, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) { + if (!strcmp(key, XATTR_NAME_POSIX_ACL_ACCESS) || + !strcmp(key, XATTR_NAME_POSIX_ACL_DEFAULT)) { + return false; + } else { + erofs_warn("skip unidentified xattr: %s", key); + return true; + } + } + return false; } @@ -382,6 +474,59 @@ err: return ret; } +int erofs_setxattr(struct erofs_inode *inode, char *key, + const void *value, size_t size) +{ + char *kvbuf; + unsigned int len[2]; + struct xattr_item *item; + + len[0] = strlen(key); + len[1] = size; + + kvbuf = malloc(EROFS_XATTR_KVSIZE(len)); + if (!kvbuf) + return -ENOMEM; + + memcpy(kvbuf, key, EROFS_XATTR_KSIZE(len)); + memcpy(kvbuf + EROFS_XATTR_KSIZE(len), value, size); + + item = get_xattritem(kvbuf, len); + if (IS_ERR(item)) + return PTR_ERR(item); + DBG_BUGON(!item); + + return erofs_xattr_add(&inode->i_xattrs, item); +} + +static void erofs_removexattr(struct erofs_inode *inode, const char *key) +{ + struct inode_xattr_node *node, *n; + + list_for_each_entry_safe(node, n, &inode->i_xattrs, list) { + if (!strcmp(node->item->kvbuf, key)) { + list_del(&node->list); + put_xattritem(node->item); + free(node); + } + } +} + +int erofs_set_opaque_xattr(struct erofs_inode *inode) +{ + return erofs_setxattr(inode, OVL_XATTR_OPAQUE, "y", 1); +} + +void erofs_clear_opaque_xattr(struct erofs_inode *inode) +{ + erofs_removexattr(inode, OVL_XATTR_OPAQUE); +} + +int erofs_set_origin_xattr(struct erofs_inode *inode) +{ + return erofs_setxattr(inode, OVL_XATTR_ORIGIN, NULL, 0); +} + #ifdef WITH_ANDROID static int erofs_droid_xattr_set_caps(struct erofs_inode *inode) { @@ -394,26 +539,25 @@ static int erofs_droid_xattr_set_caps(struct erofs_inode *inode) if (!capabilities) return 0; - len[0] = sizeof("capability") - 1; + len[0] = sizeof(XATTR_NAME_SECURITY_CAPABILITY) - 1; len[1] = sizeof(caps); - kvbuf = malloc(len[0] + len[1]); + kvbuf = malloc(EROFS_XATTR_KVSIZE(len)); if (!kvbuf) return -ENOMEM; - memcpy(kvbuf, "capability", len[0]); + sprintf(kvbuf, "%s", XATTR_NAME_SECURITY_CAPABILITY); caps.magic_etc = VFS_CAP_REVISION_2 | VFS_CAP_FLAGS_EFFECTIVE; caps.data[0].permitted = (u32) capabilities; caps.data[0].inheritable = 0; caps.data[1].permitted = (u32) (capabilities >> 32); caps.data[1].inheritable = 0; - memcpy(kvbuf + len[0], &caps, len[1]); + memcpy(kvbuf + EROFS_XATTR_KSIZE(len), &caps, len[1]); - item = get_xattritem(EROFS_XATTR_INDEX_SECURITY, kvbuf, len); + item = get_xattritem(kvbuf, len); if (IS_ERR(item)) return PTR_ERR(item); - if (!item) - return 0; + DBG_BUGON(!item); return erofs_xattr_add(&inode->i_xattrs, item); } @@ -424,10 +568,9 @@ static int erofs_droid_xattr_set_caps(struct erofs_inode *inode) } #endif -int erofs_prepare_xattr_ibody(struct erofs_inode *inode) +int erofs_scan_file_xattrs(struct erofs_inode *inode) { int ret; - struct inode_xattr_node *node; struct list_head *ixattrs = &inode->i_xattrs; /* check if xattr is disabled */ @@ -438,24 +581,109 @@ int erofs_prepare_xattr_ibody(struct erofs_inode *inode) if (ret < 0) return ret; - ret = erofs_droid_xattr_set_caps(inode); + return erofs_droid_xattr_set_caps(inode); +} + +int erofs_read_xattrs_from_disk(struct erofs_inode *inode) +{ + ssize_t kllen; + char *keylst, *key; + int ret; + + init_list_head(&inode->i_xattrs); + kllen = erofs_listxattr(inode, NULL, 0); + if (kllen < 0) + return kllen; + if (kllen <= 1) + return 0; + + keylst = malloc(kllen); + if (!keylst) + return -ENOMEM; + + ret = erofs_listxattr(inode, keylst, kllen); if (ret < 0) - return ret; + goto out; + + for (key = keylst; key < keylst + kllen; key += strlen(key) + 1) { + void *value = NULL; + size_t size = 0; + + if (!strcmp(key, OVL_XATTR_OPAQUE)) { + if (!S_ISDIR(inode->i_mode)) { + erofs_dbg("file %s: opaque xattr on non-dir", + inode->i_srcpath); + ret = -EINVAL; + goto out; + } + inode->opaque = true; + } + + ret = erofs_getxattr(inode, key, NULL, 0); + if (ret < 0) + goto out; + if (ret) { + size = ret; + value = malloc(size); + if (!value) { + ret = -ENOMEM; + goto out; + } - if (list_empty(ixattrs)) + ret = erofs_getxattr(inode, key, value, size); + if (ret < 0) { + free(value); + goto out; + } + DBG_BUGON(ret != size); + } else if (S_ISDIR(inode->i_mode) && + !strcmp(key, OVL_XATTR_ORIGIN)) { + ret = 0; + inode->whiteouts = true; + continue; + } + + ret = erofs_setxattr(inode, key, value, size); + free(value); + if (ret) + break; + } +out: + free(keylst); + return ret; +} + +static inline unsigned int erofs_next_xattr_align(unsigned int pos, + struct xattr_item *item) +{ + return EROFS_XATTR_ALIGN(pos + sizeof(struct erofs_xattr_entry) + + item->len[0] + item->len[1] - item->prefix_len); +} + +int erofs_prepare_xattr_ibody(struct erofs_inode *inode) +{ + int ret; + struct inode_xattr_node *node; + struct list_head *ixattrs = &inode->i_xattrs; + unsigned int h_shared_count; + + if (list_empty(ixattrs)) { + inode->xattr_isize = 0; return 0; + } /* get xattr ibody size */ + h_shared_count = 0; ret = sizeof(struct erofs_xattr_ibody_header); list_for_each_entry(node, ixattrs, list) { - const struct xattr_item *item = node->item; + struct xattr_item *item = node->item; - if (item->shared_xattr_id >= 0) { + if (item->shared_xattr_id >= 0 && h_shared_count < UCHAR_MAX) { + ++h_shared_count; ret += sizeof(__le32); continue; } - ret += sizeof(struct erofs_xattr_entry); - ret = EROFS_XATTR_ALIGN(ret + item->len[0] + item->len[1]); + ret = erofs_next_xattr_align(ret, item); } inode->xattr_isize = ret; return ret; @@ -543,32 +771,17 @@ static void erofs_cleanxattrs(bool sharedxattrs) if (sharedxattrs) return; - shared_xattrs_size = shared_xattrs_count = 0; + shared_xattrs_count = 0; } -static bool erofs_bh_flush_write_shared_xattrs(struct erofs_buffer_head *bh) -{ - void *buf = bh->fsprivate; - int err = dev_write(buf, erofs_btell(bh, false), shared_xattrs_size); - - if (err) - return false; - free(buf); - return erofs_bh_flush_generic_end(bh); -} - -static struct erofs_bhops erofs_write_shared_xattrs_bhops = { - .flush = erofs_bh_flush_write_shared_xattrs, -}; - -static int comp_xattr_item(const void *a, const void *b) +static int comp_shared_xattr_item(const void *a, const void *b) { const struct xattr_item *ia, *ib; unsigned int la, lb; int ret; - ia = (*((const struct inode_xattr_node **)a))->item; - ib = (*((const struct inode_xattr_node **)b))->item; + ia = *((const struct xattr_item **)a); + ib = *((const struct xattr_item **)b); la = ia->len[0] + ia->len[1]; lb = ib->len[0] + ib->len[1]; @@ -579,21 +792,88 @@ static int comp_xattr_item(const void *a, const void *b) return la > lb; } -int erofs_build_shared_xattrs_from_path(const char *path) +int erofs_xattr_write_name_prefixes(struct erofs_sb_info *sbi, FILE *f) +{ + struct ea_type_node *tnode; + off_t offset; + + if (!ea_prefix_count) + return 0; + offset = ftello(f); + if (offset < 0) + return -errno; + if (offset > UINT32_MAX) + return -EOVERFLOW; + + offset = round_up(offset, 4); + if (fseek(f, offset, SEEK_SET)) + return -errno; + sbi->xattr_prefix_start = (u32)offset >> 2; + sbi->xattr_prefix_count = ea_prefix_count; + + list_for_each_entry(tnode, &ea_name_prefixes, list) { + union { + struct { + __le16 size; + struct erofs_xattr_long_prefix prefix; + } s; + u8 data[EROFS_NAME_LEN + 2 + + sizeof(struct erofs_xattr_long_prefix)]; + } u; + int len, infix_len; + + u.s.prefix.base_index = tnode->base_index; + infix_len = tnode->type.prefix_len - tnode->base_len; + memcpy(u.s.prefix.infix, tnode->type.prefix + tnode->base_len, + infix_len); + len = sizeof(struct erofs_xattr_long_prefix) + infix_len; + u.s.size = cpu_to_le16(len); + if (fwrite(&u.s, sizeof(__le16) + len, 1, f) != 1) + return -EIO; + offset = round_up(offset + sizeof(__le16) + len, 4); + if (fseek(f, offset, SEEK_SET)) + return -errno; + } + erofs_sb_set_fragments(sbi); + erofs_sb_set_xattr_prefixes(sbi); + return 0; +} + +static void erofs_write_xattr_entry(char *buf, struct xattr_item *item) +{ + struct erofs_xattr_entry entry = { + .e_name_index = item->prefix, + .e_name_len = item->len[0] - item->prefix_len, + .e_value_size = cpu_to_le16(item->len[1]), + }; + + memcpy(buf, &entry, sizeof(entry)); + buf += sizeof(struct erofs_xattr_entry); + memcpy(buf, item->kvbuf + item->prefix_len, + item->len[0] - item->prefix_len); + buf += item->len[0] - item->prefix_len; + memcpy(buf, item->kvbuf + item->len[0] + 1, item->len[1]); + + erofs_dbg("writing xattr %d %s (%d %s)", item->base_index, item->kvbuf, + item->prefix, item->kvbuf + item->prefix_len); +} + +int erofs_build_shared_xattrs_from_path(struct erofs_sb_info *sbi, const char *path) { int ret; struct erofs_buffer_head *bh; - struct inode_xattr_node *node, *n, **sorted_n; + struct xattr_item *item, *n, **sorted_n; char *buf; unsigned int p, i; erofs_off_t off; + erofs_off_t shared_xattrs_size = 0; /* check if xattr or shared xattr is disabled */ if (cfg.c_inline_xattr_tolerance < 0 || cfg.c_inline_xattr_tolerance == INT_MAX) return 0; - if (shared_xattrs_size || shared_xattrs_count) { + if (shared_xattrs_count) { DBG_BUGON(1); return -EINVAL; } @@ -602,15 +882,34 @@ int erofs_build_shared_xattrs_from_path(const char *path) if (ret) return ret; - if (!shared_xattrs_size) + if (!shared_xattrs_count) goto out; + sorted_n = malloc((shared_xattrs_count + 1) * sizeof(n)); + if (!sorted_n) + return -ENOMEM; + + i = 0; + while (shared_xattrs_list) { + item = shared_xattrs_list; + sorted_n[i++] = item; + shared_xattrs_list = item->next_shared_xattr; + shared_xattrs_size = erofs_next_xattr_align(shared_xattrs_size, + item); + } + DBG_BUGON(i != shared_xattrs_count); + sorted_n[i] = NULL; + qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_shared_xattr_item); + buf = calloc(1, shared_xattrs_size); - if (!buf) + if (!buf) { + free(sorted_n); return -ENOMEM; + } bh = erofs_balloc(XATTR, shared_xattrs_size, 0, 0); if (IS_ERR(bh)) { + free(sorted_n); free(buf); return PTR_ERR(bh); } @@ -619,51 +918,33 @@ int erofs_build_shared_xattrs_from_path(const char *path) erofs_mapbh(bh->block); off = erofs_btell(bh, false); - sbi.xattr_blkaddr = off / EROFS_BLKSIZ; - off %= EROFS_BLKSIZ; + sbi->xattr_blkaddr = off / erofs_blksiz(sbi); + off %= erofs_blksiz(sbi); p = 0; - - sorted_n = malloc(shared_xattrs_count * sizeof(n)); - if (!sorted_n) - return -ENOMEM; - i = 0; - list_for_each_entry_safe(node, n, &shared_xattrs_list, list) { - list_del(&node->list); - sorted_n[i++] = node; - } - DBG_BUGON(i != shared_xattrs_count); - qsort(sorted_n, shared_xattrs_count, sizeof(n), comp_xattr_item); - for (i = 0; i < shared_xattrs_count; i++) { - struct inode_xattr_node *const tnode = sorted_n[i]; - struct xattr_item *const item = tnode->item; - const struct erofs_xattr_entry entry = { - .e_name_index = item->prefix, - .e_name_len = item->len[0], - .e_value_size = cpu_to_le16(item->len[1]) - }; - - item->shared_xattr_id = (off + p) / - sizeof(struct erofs_xattr_entry); - - memcpy(buf + p, &entry, sizeof(entry)); - p += sizeof(struct erofs_xattr_entry); - memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]); - p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]); - free(tnode); + item = sorted_n[i]; + erofs_write_xattr_entry(buf + p, item); + item->next_shared_xattr = sorted_n[i + 1]; + item->shared_xattr_id = (off + p) / sizeof(__le32); + p = erofs_next_xattr_align(p, item); } - + shared_xattrs_list = sorted_n[0]; free(sorted_n); - bh->fsprivate = buf; - bh->op = &erofs_write_shared_xattrs_bhops; + bh->op = &erofs_drop_directly_bhops; + ret = dev_write(sbi, buf, erofs_btell(bh, false), shared_xattrs_size); + free(buf); + erofs_bdrop(bh, false); out: erofs_cleanxattrs(true); - return 0; + return ret; } -char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size) +char *erofs_export_xattr_ibody(struct erofs_inode *inode) { + struct list_head *ixattrs = &inode->i_xattrs; + unsigned int size = inode->xattr_isize; struct inode_xattr_node *node, *n; + struct xattr_item *item; struct erofs_xattr_ibody_header *header; LIST_HEAD(ilst); unsigned int p; @@ -675,14 +956,35 @@ char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size) header = (struct erofs_xattr_ibody_header *)buf; header->h_shared_count = 0; + if (cfg.c_xattr_name_filter) { + u32 name_filter = 0; + int hashbit; + unsigned int base_len; + + list_for_each_entry(node, ixattrs, list) { + item = node->item; + base_len = xattr_types[item->base_index].prefix_len; + hashbit = xxh32(item->kvbuf + base_len, + item->len[0] - base_len, + EROFS_XATTR_FILTER_SEED + item->base_index) & + (EROFS_XATTR_FILTER_BITS - 1); + name_filter |= (1UL << hashbit); + } + name_filter = EROFS_XATTR_FILTER_DEFAULT & ~name_filter; + + header->h_name_filter = cpu_to_le32(name_filter); + if (header->h_name_filter) + erofs_sb_set_xattr_filter(inode->sbi); + } + p = sizeof(struct erofs_xattr_ibody_header); list_for_each_entry_safe(node, n, ixattrs, list) { - struct xattr_item *const item = node->item; - + item = node->item; list_del(&node->list); /* move inline xattrs to the onstack list */ - if (item->shared_xattr_id < 0) { + if (item->shared_xattr_id < 0 || + header->h_shared_count >= UCHAR_MAX) { list_add(&node->list, &ilst); continue; } @@ -695,18 +997,9 @@ char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size) } list_for_each_entry_safe(node, n, &ilst, list) { - struct xattr_item *const item = node->item; - const struct erofs_xattr_entry entry = { - .e_name_index = item->prefix, - .e_name_len = item->len[0], - .e_value_size = cpu_to_le16(item->len[1]) - }; - - memcpy(buf + p, &entry, sizeof(entry)); - p += sizeof(struct erofs_xattr_entry); - memcpy(buf + p, item->kvbuf, item->len[0] + item->len[1]); - p = EROFS_XATTR_ALIGN(p + item->len[0] + item->len[1]); - + item = node->item; + erofs_write_xattr_entry(buf + p, item); + p = erofs_next_xattr_align(p, item); list_del(&node->list); free(node); put_xattritem(item); @@ -716,16 +1009,18 @@ char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size) } struct xattr_iter { - char page[EROFS_BLKSIZ]; + char page[EROFS_MAX_BLOCK_SIZE]; void *kaddr; erofs_blk_t blkaddr; unsigned int ofs; + struct erofs_sb_info *sbi; }; static int init_inode_xattrs(struct erofs_inode *vi) { + struct erofs_sb_info *sbi = vi->sbi; struct xattr_iter it; unsigned int i; struct erofs_xattr_ibody_header *ih; @@ -756,10 +1051,10 @@ static int init_inode_xattrs(struct erofs_inode *vi) return -ENOATTR; } - it.blkaddr = erofs_blknr(iloc(vi->nid) + vi->inode_isize); - it.ofs = erofs_blkoff(iloc(vi->nid) + vi->inode_isize); + it.blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + vi->inode_isize); + it.ofs = erofs_blkoff(sbi, erofs_iloc(vi) + vi->inode_isize); - ret = blk_read(0, it.page, it.blkaddr, 1); + ret = blk_read(sbi, 0, it.page, it.blkaddr, 1); if (ret < 0) return -EIO; @@ -775,11 +1070,11 @@ static int init_inode_xattrs(struct erofs_inode *vi) it.ofs += sizeof(struct erofs_xattr_ibody_header); for (i = 0; i < vi->xattr_shared_count; ++i) { - if (it.ofs >= EROFS_BLKSIZ) { + if (it.ofs >= erofs_blksiz(sbi)) { /* cannot be unaligned */ - DBG_BUGON(it.ofs != EROFS_BLKSIZ); + DBG_BUGON(it.ofs != erofs_blksiz(sbi)); - ret = blk_read(0, it.page, ++it.blkaddr, 1); + ret = blk_read(sbi, 0, it.page, ++it.blkaddr, 1); if (ret < 0) { free(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; @@ -817,25 +1112,27 @@ struct xattr_iter_handlers { static inline int xattr_iter_fixup(struct xattr_iter *it) { + struct erofs_sb_info *sbi = it->sbi; int ret; - if (it->ofs < EROFS_BLKSIZ) + if (it->ofs < erofs_blksiz(sbi)) return 0; - it->blkaddr += erofs_blknr(it->ofs); + it->blkaddr += erofs_blknr(sbi, it->ofs); - ret = blk_read(0, it->page, it->blkaddr, 1); + ret = blk_read(sbi, 0, it->page, it->blkaddr, 1); if (ret < 0) return -EIO; it->kaddr = it->page; - it->ofs = erofs_blkoff(it->ofs); + it->ofs = erofs_blkoff(sbi, it->ofs); return 0; } static int inline_xattr_iter_pre(struct xattr_iter *it, struct erofs_inode *vi) { + struct erofs_sb_info *sbi = vi->sbi; unsigned int xattr_header_sz, inline_xattr_ofs; int ret; @@ -847,10 +1144,10 @@ static int inline_xattr_iter_pre(struct xattr_iter *it, inline_xattr_ofs = vi->inode_isize + xattr_header_sz; - it->blkaddr = erofs_blknr(iloc(vi->nid) + inline_xattr_ofs); - it->ofs = erofs_blkoff(iloc(vi->nid) + inline_xattr_ofs); + it->blkaddr = erofs_blknr(sbi, erofs_iloc(vi) + inline_xattr_ofs); + it->ofs = erofs_blkoff(sbi, erofs_iloc(vi) + inline_xattr_ofs); - ret = blk_read(0, it->page, it->blkaddr, 1); + ret = blk_read(sbi, 0, it->page, it->blkaddr, 1); if (ret < 0) return -EIO; @@ -866,6 +1163,7 @@ static int xattr_foreach(struct xattr_iter *it, const struct xattr_iter_handlers *op, unsigned int *tlimit) { + struct erofs_sb_info *sbi = it->sbi; struct erofs_xattr_entry entry; unsigned int value_sz, processed, slice; int err; @@ -906,8 +1204,8 @@ static int xattr_foreach(struct xattr_iter *it, processed = 0; while (processed < entry.e_name_len) { - if (it->ofs >= EROFS_BLKSIZ) { - DBG_BUGON(it->ofs > EROFS_BLKSIZ); + if (it->ofs >= erofs_blksiz(sbi)) { + DBG_BUGON(it->ofs > erofs_blksiz(sbi)); err = xattr_iter_fixup(it); if (err) @@ -915,7 +1213,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, + slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs, entry.e_name_len - processed); /* handle name */ @@ -941,8 +1239,8 @@ static int xattr_foreach(struct xattr_iter *it, } while (processed < value_sz) { - if (it->ofs >= EROFS_BLKSIZ) { - DBG_BUGON(it->ofs > EROFS_BLKSIZ); + if (it->ofs >= erofs_blksiz(sbi)) { + DBG_BUGON(it->ofs > erofs_blksiz(sbi)); err = xattr_iter_fixup(it); if (err) @@ -950,7 +1248,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, + slice = min_t(unsigned int, erofs_blksiz(sbi) - it->ofs, value_sz - processed); op->value(it, processed, it->kaddr + it->ofs, slice); it->ofs += slice; @@ -966,19 +1264,47 @@ out: struct getxattr_iter { struct xattr_iter it; - int buffer_size, index; + int buffer_size, index, infix_len; char *buffer; const char *name; size_t len; }; +static int erofs_xattr_long_entrymatch(struct getxattr_iter *it, + struct erofs_xattr_entry *entry) +{ + struct erofs_sb_info *sbi = it->it.sbi; + struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes + + (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK); + + if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count) + return -ENOATTR; + + if (it->index != pf->prefix->base_index || + it->len != entry->e_name_len + pf->infix_len) + return -ENOATTR; + + if (memcmp(it->name, pf->prefix->infix, pf->infix_len)) + return -ENOATTR; + + it->infix_len = pf->infix_len; + return 0; +} + static int xattr_entrymatch(struct xattr_iter *_it, struct erofs_xattr_entry *entry) { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - return (it->index != entry->e_name_index || - it->len != entry->e_name_len) ? -ENOATTR : 0; + /* should also match the infix for long name prefixes */ + if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX) + return erofs_xattr_long_entrymatch(it, entry); + + if (it->index != entry->e_name_index || + it->len != entry->e_name_len) + return -ENOATTR; + it->infix_len = 0; + return 0; } static int xattr_namematch(struct xattr_iter *_it, @@ -986,8 +1312,9 @@ static int xattr_namematch(struct xattr_iter *_it, { struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - - return memcmp(buf, it->name + processed, len) ? -ENOATTR : 0; + if (memcmp(buf, it->name + it->infix_len + processed, len)) + return -ENOATTR; + return 0; } static int xattr_checkbuffer(struct xattr_iter *_it, @@ -1042,12 +1369,12 @@ static int shared_getxattr(struct erofs_inode *vi, struct getxattr_iter *it) for (i = 0; i < vi->xattr_shared_count; ++i) { erofs_blk_t blkaddr = - xattrblock_addr(vi->xattr_shared_xattrs[i]); + xattrblock_addr(vi, vi->xattr_shared_xattrs[i]); - it->it.ofs = xattrblock_offset(vi->xattr_shared_xattrs[i]); + it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]); if (!i || blkaddr != it->it.blkaddr) { - ret = blk_read(0, it->it.page, blkaddr, 1); + ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1); if (ret < 0) return -EIO; @@ -1067,8 +1394,7 @@ int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer, size_t buffer_size) { int ret; - u8 prefix; - u16 prefixlen; + unsigned int prefix, prefixlen; struct getxattr_iter it; if (!name) @@ -1081,6 +1407,7 @@ int erofs_getxattr(struct erofs_inode *vi, const char *name, char *buffer, if (!match_prefix(name, &prefix, &prefixlen)) return -ENODATA; + it.it.sbi = vi->sbi; it.index = prefix; it.name = name + prefixlen; it.len = strlen(it.name); @@ -1108,23 +1435,40 @@ static int xattr_entrylist(struct xattr_iter *_it, { struct listxattr_iter *it = container_of(_it, struct listxattr_iter, it); - unsigned int prefix_len; - const char *prefix; + unsigned int base_index = entry->e_name_index; + unsigned int prefix_len, infix_len = 0; + const char *prefix, *infix = NULL; + + if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX) { + struct erofs_sb_info *sbi = _it->sbi; + struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes + + (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK); + + if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count) + return 1; + infix = pf->prefix->infix; + infix_len = pf->infix_len; + base_index = pf->prefix->base_index; + } - prefix = xattr_types[entry->e_name_index].prefix; - prefix_len = xattr_types[entry->e_name_index].prefix_len; + if (base_index >= ARRAY_SIZE(xattr_types)) + return 1; + prefix = xattr_types[base_index].prefix; + prefix_len = xattr_types[base_index].prefix_len; if (!it->buffer) { - it->buffer_ofs += prefix_len + entry->e_name_len + 1; + it->buffer_ofs += prefix_len + infix_len + + entry->e_name_len + 1; return 1; } - if (it->buffer_ofs + prefix_len + if (it->buffer_ofs + prefix_len + infix_len + entry->e_name_len + 1 > it->buffer_size) return -ERANGE; memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len); - it->buffer_ofs += prefix_len; + memcpy(it->buffer + it->buffer_ofs + prefix_len, infix, infix_len); + it->buffer_ofs += prefix_len + infix_len; return 0; } @@ -1182,11 +1526,11 @@ static int shared_listxattr(struct erofs_inode *vi, struct listxattr_iter *it) for (i = 0; i < vi->xattr_shared_count; ++i) { erofs_blk_t blkaddr = - xattrblock_addr(vi->xattr_shared_xattrs[i]); + xattrblock_addr(vi, vi->xattr_shared_xattrs[i]); - it->it.ofs = xattrblock_offset(vi->xattr_shared_xattrs[i]); + it->it.ofs = xattrblock_offset(vi, vi->xattr_shared_xattrs[i]); if (!i || blkaddr != it->it.blkaddr) { - ret = blk_read(0, it->it.page, blkaddr, 1); + ret = blk_read(vi->sbi, 0, it->it.page, blkaddr, 1); if (ret < 0) return -EIO; @@ -1213,6 +1557,7 @@ int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size) if (ret) return ret; + it.it.sbi = vi->sbi; it.buffer = buffer; it.buffer_size = buffer_size; it.buffer_ofs = 0; @@ -1222,3 +1567,96 @@ int erofs_listxattr(struct erofs_inode *vi, char *buffer, size_t buffer_size) return ret; return shared_listxattr(vi, &it); } + +int erofs_xattr_insert_name_prefix(const char *prefix) +{ + struct ea_type_node *tnode; + + if (ea_prefix_count >= 0x80 || strlen(prefix) > UINT8_MAX) + return -EOVERFLOW; + + tnode = calloc(1, sizeof(*tnode)); + if (!tnode) + return -ENOMEM; + + if (!match_prefix(prefix, &tnode->base_index, &tnode->base_len)) { + free(tnode); + return -ENODATA; + } + + tnode->type.prefix_len = strlen(prefix); + tnode->type.prefix = strdup(prefix); + if (!tnode->type.prefix) { + free(tnode); + return -ENOMEM; + } + + tnode->index = EROFS_XATTR_LONG_PREFIX | ea_prefix_count; + ea_prefix_count++; + init_list_head(&tnode->list); + list_add_tail(&tnode->list, &ea_name_prefixes); + return 0; +} + +void erofs_xattr_cleanup_name_prefixes(void) +{ + struct ea_type_node *tnode, *n; + + list_for_each_entry_safe(tnode, n, &ea_name_prefixes, list) { + list_del(&tnode->list); + free((void *)tnode->type.prefix); + free(tnode); + } +} + +void erofs_xattr_prefixes_cleanup(struct erofs_sb_info *sbi) +{ + int i; + + if (sbi->xattr_prefixes) { + for (i = 0; i < sbi->xattr_prefix_count; i++) + free(sbi->xattr_prefixes[i].prefix); + free(sbi->xattr_prefixes); + sbi->xattr_prefixes = NULL; + } +} + +int erofs_xattr_prefixes_init(struct erofs_sb_info *sbi) +{ + erofs_off_t pos = (erofs_off_t)sbi->xattr_prefix_start << 2; + struct erofs_xattr_prefix_item *pfs; + erofs_nid_t nid = 0; + int ret = 0, i, len; + void *buf; + + if (!sbi->xattr_prefix_count) + return 0; + + if (sbi->packed_nid) + nid = sbi->packed_nid; + + pfs = calloc(sbi->xattr_prefix_count, sizeof(*pfs)); + if (!pfs) + return -ENOMEM; + + for (i = 0; i < sbi->xattr_prefix_count; i++) { + buf = erofs_read_metadata(sbi, nid, &pos, &len); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out; + } + if (len < sizeof(*pfs->prefix) || + len > EROFS_NAME_LEN + sizeof(*pfs->prefix)) { + free(buf); + ret = -EFSCORRUPTED; + goto out; + } + pfs[i].prefix = buf; + pfs[i].infix_len = len - sizeof(struct erofs_xattr_long_prefix); + } +out: + sbi->xattr_prefixes = pfs; + if (ret) + erofs_xattr_prefixes_cleanup(sbi); + return ret; +} diff --git a/lib/xxhash.c b/lib/xxhash.c new file mode 100644 index 0000000..7289c77 --- /dev/null +++ b/lib/xxhash.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only +/* + * The xxhash is copied from the linux kernel at: + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/xxhash.c + * + * The original copyright is: + * + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2012-2016, Yann Collet. + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + * + * You can contact the author at: + * - xxHash homepage: https://cyan4973.github.io/xxHash/ + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#include "erofs/defs.h" +#include "erofs/xxhash.h" + +/*-************************************* + * Macros + **************************************/ +#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) + +/*-************************************* + * Constants + **************************************/ +static const uint32_t PRIME32_1 = 2654435761U; +static const uint32_t PRIME32_2 = 2246822519U; +static const uint32_t PRIME32_3 = 3266489917U; +static const uint32_t PRIME32_4 = 668265263U; +static const uint32_t PRIME32_5 = 374761393U; + +/*-*************************** + * Simple Hash Functions + ****************************/ +static uint32_t xxh32_round(uint32_t seed, const uint32_t input) +{ + seed += input * PRIME32_2; + seed = xxh_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) +{ + const uint8_t *p = (const uint8_t *)input; + const uint8_t *b_end = p + len; + uint32_t h32; + + if (len >= 16) { + const uint8_t *const limit = b_end - 16; + uint32_t v1 = seed + PRIME32_1 + PRIME32_2; + uint32_t v2 = seed + PRIME32_2; + uint32_t v3 = seed + 0; + uint32_t v4 = seed - PRIME32_1; + + do { + v1 = xxh32_round(v1, get_unaligned_le32(p)); + p += 4; + v2 = xxh32_round(v2, get_unaligned_le32(p)); + p += 4; + v3 = xxh32_round(v3, get_unaligned_le32(p)); + p += 4; + v4 = xxh32_round(v4, get_unaligned_le32(p)); + p += 4; + } while (p <= limit); + + h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + + xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (uint32_t)len; + + while (p + 4 <= b_end) { + h32 += get_unaligned_le32(p) * PRIME32_3; + h32 = xxh_rotl32(h32, 17) * PRIME32_4; + p += 4; + } + + while (p < b_end) { + h32 += (*p) * PRIME32_5; + h32 = xxh_rotl32(h32, 11) * PRIME32_1; + p++; + } + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} @@ -16,13 +16,15 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, int z_erofs_fill_inode(struct erofs_inode *vi) { - if (!erofs_sb_has_big_pcluster() && - !erofs_sb_has_ztailpacking() && !erofs_sb_has_fragments() && - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + struct erofs_sb_info *sbi = vi->sbi; + + if (!erofs_sb_has_big_pcluster(sbi) && + !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) && + vi->datalayout == EROFS_INODE_COMPRESSED_FULL) { vi->z_advise = 0; vi->z_algorithmtype[0] = 0; vi->z_algorithmtype[1] = 0; - vi->z_logical_clusterbits = LOG_BLOCK_SIZE; + vi->z_logical_clusterbits = sbi->blkszbits; vi->flags |= EROFS_I_Z_INITED; } @@ -35,12 +37,13 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi) erofs_off_t pos; struct z_erofs_map_header *h; char buf[sizeof(struct z_erofs_map_header)]; + struct erofs_sb_info *sbi = vi->sbi; if (vi->flags & EROFS_I_Z_INITED) return 0; - pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - ret = dev_read(0, buf, pos, sizeof(buf)); + pos = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8); + ret = dev_read(sbi, 0, buf, pos, sizeof(buf)); if (ret < 0) return -EIO; @@ -66,8 +69,8 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi) return -EOPNOTSUPP; } - vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); - if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && + vi->z_logical_clusterbits = sbi->blkszbits + (h->h_clusterbits & 7); + if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err("big pcluster head1/2 of compact indexes should be consistent for nid %llu", @@ -82,7 +85,7 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi) ret = z_erofs_do_map_blocks(vi, &map, EROFS_GET_BLOCKS_FINDTAIL); if (!map.m_plen || - erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) { + erofs_blkoff(sbi, map.m_pa) + map.m_plen > erofs_blksiz(sbi)) { erofs_err("invalid tail-packing pclustersize %llu", map.m_plen | 0ULL); return -EFSCORRUPTED; @@ -130,7 +133,7 @@ static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, if (map->index == eblk) return 0; - ret = blk_read(0, mpage, eblk, 1); + ret = blk_read(m->inode->sbi, 0, mpage, eblk, 1); if (ret < 0) return -EIO; @@ -143,44 +146,44 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned long lcn) { struct erofs_inode *const vi = m->inode; - const erofs_off_t ibase = iloc(vi->nid); - const erofs_off_t pos = - Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + - vi->xattr_isize) + - lcn * sizeof(struct z_erofs_vle_decompressed_index); - struct z_erofs_vle_decompressed_index *di; + struct erofs_sb_info *sbi = vi->sbi; + const erofs_off_t ibase = erofs_iloc(vi); + const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(ibase + + vi->inode_isize + vi->xattr_isize) + + lcn * sizeof(struct z_erofs_lcluster_index); + struct z_erofs_lcluster_index *di; unsigned int advise, type; int err; - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos)); if (err) return err; - m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index); + m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); m->lcn = lcn; - di = m->kaddr + erofs_blkoff(pos); + di = m->kaddr + erofs_blkoff(sbi, pos); advise = le16_to_cpu(di->di_advise); - type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & - ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); + type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) & + ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1); switch (type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + case Z_EROFS_LCLUSTER_TYPE_NONHEAD: m->clusterofs = 1 << vi->z_logical_clusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); - if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = m->delta[0] & - ~Z_EROFS_VLE_DI_D0_CBLKCNT; + ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); break; - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - if (advise & Z_EROFS_VLE_DI_PARTIAL_REF) + case Z_EROFS_LCLUSTER_TYPE_PLAIN: + case Z_EROFS_LCLUSTER_TYPE_HEAD1: + if (advise & Z_EROFS_LI_PARTIAL_REF) m->partialref = true; m->clusterofs = le16_to_cpu(di->di_clusterofs); m->pblk = le32_to_cpu(di->di_u.blkaddr); @@ -218,13 +221,13 @@ static int get_compacted_la_distance(unsigned int lclusterbits, lo = decode_compactedbits(lclusterbits, lomask, in, encodebits * i, &type); - if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) return d1; ++d1; } while (++i < vcnt); - /* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */ - if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT)) + /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */ + if (!(lo & Z_EROFS_LI_D0_CBLKCNT)) d1 += lo - 1; return d1; } @@ -241,7 +244,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, u8 *in, type; bool big_pcluster; - if (1 << amortizedshift == 4) + if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits == 12) vcnt = 16; @@ -253,7 +256,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, (vcnt << amortizedshift); big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; - eofs = erofs_blkoff(pos); + eofs = erofs_blkoff(vi->sbi, pos); base = round_down(eofs, vcnt << amortizedshift); in = m->kaddr + base; @@ -262,19 +265,19 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, lo = decode_compactedbits(lclusterbits, lomask, in, encodebits * i, &type); m->type = type; - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; /* figure out lookahead_distance: delta[1] if needed */ if (lookahead) m->delta[1] = get_compacted_la_distance(lclusterbits, encodebits, vcnt, in, i); - if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (lo & Z_EROFS_LI_D0_CBLKCNT) { if (!big_pcluster) { DBG_BUGON(1); return -EFSCORRUPTED; } - m->compressedblks = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; return 0; } else if (i + 1 != (int)vcnt) { @@ -288,9 +291,9 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, */ lo = decode_compactedbits(lclusterbits, lomask, in, encodebits * (i - 1), &type); - if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) lo = 0; - else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) + else if (lo & Z_EROFS_LI_D0_CBLKCNT) lo = 1; m->delta[0] = lo + 1; return 0; @@ -304,7 +307,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, --i; lo = decode_compactedbits(lclusterbits, lomask, in, encodebits * i, &type); - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) i -= lo; if (i >= 0) @@ -316,13 +319,13 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, --i; lo = decode_compactedbits(lclusterbits, lomask, in, encodebits * i, &type); - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { - if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + if (lo & Z_EROFS_LI_D0_CBLKCNT) { --i; - nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT; continue; } - if (lo == 1) { + if (lo <= 1) { DBG_BUGON(1); /* --i; ++nblk; continue; */ return -EFSCORRUPTED; @@ -342,19 +345,16 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned long lcn, bool lookahead) { struct erofs_inode *const vi = m->inode; - const unsigned int lclusterbits = vi->z_logical_clusterbits; - const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize + + struct erofs_sb_info *sbi = vi->sbi; + const erofs_off_t ebase = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8) + sizeof(struct z_erofs_map_header); - const unsigned int totalidx = BLK_ROUND_UP(vi->i_size); + const unsigned int totalidx = BLK_ROUND_UP(sbi, vi->i_size); unsigned int compacted_4b_initial, compacted_2b; unsigned int amortizedshift; erofs_off_t pos; int err; - if (lclusterbits != 12) - return -EOPNOTSUPP; - if (lcn >= totalidx) return -EINVAL; @@ -387,7 +387,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, amortizedshift = 2; out: pos += lcn * (1 << amortizedshift); - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + err = z_erofs_reload_indexes(m, erofs_blknr(sbi, pos)); if (err) return err; return unpack_compacted_index(m, amortizedshift, pos, lookahead); @@ -398,10 +398,10 @@ static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, { const unsigned int datamode = m->inode->datalayout; - if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + if (datamode == EROFS_INODE_COMPRESSED_FULL) return legacy_load_cluster_from_disk(m, lcn); - if (datamode == EROFS_INODE_FLAT_COMPRESSION) + if (datamode == EROFS_INODE_COMPRESSED_COMPACT) return compacted_load_cluster_from_disk(m, lcn, lookahead); return -EINVAL; @@ -430,7 +430,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, return err; switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + case Z_EROFS_LCLUSTER_TYPE_NONHEAD: if (!m->delta[0]) { erofs_err("invalid lookback distance 0 @ nid %llu", (unsigned long long)vi->nid); @@ -438,8 +438,8 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } return z_erofs_extent_lookback(m, m->delta[0]); - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_LCLUSTER_TYPE_PLAIN: + case Z_EROFS_LCLUSTER_TYPE_HEAD1: m->headtype = m->type; map->m_la = (lcn << lclusterbits) | m->clusterofs; break; @@ -456,15 +456,16 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, unsigned int initial_lcn) { struct erofs_inode *const vi = m->inode; + struct erofs_sb_info *sbi = vi->sbi; struct erofs_map_blocks *const map = m->map; const unsigned int lclusterbits = vi->z_logical_clusterbits; unsigned long lcn; int err; - DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN && - m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD); + DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN && + m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1); - if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || + if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { map->m_plen = 1 << lclusterbits; return 0; @@ -487,18 +488,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, * BUG_ON in the debugging mode only for developers to notice that. */ DBG_BUGON(lcn == initial_lcn && - m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD); + m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_LCLUSTER_TYPE_PLAIN: + case Z_EROFS_LCLUSTER_TYPE_HEAD1: /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. */ - m->compressedblks = 1 << (lclusterbits - LOG_BLOCK_SIZE); + m->compressedblks = 1 << (lclusterbits - sbi->blkszbits); break; - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + case Z_EROFS_LCLUSTER_TYPE_NONHEAD: if (m->delta[0] != 1) goto err_bonus_cblkcnt; if (m->compressedblks) @@ -511,7 +512,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = m->compressedblks << LOG_BLOCK_SIZE; + map->m_plen = m->compressedblks << sbi->blkszbits; return 0; err_bonus_cblkcnt: erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu", @@ -539,11 +540,11 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) if (err) return err; - if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { DBG_BUGON(!m->delta[1] && m->clusterofs != 1 << lclusterbits); - } else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || - m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) { + } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN || + m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1) { /* go on until the next HEAD lcluster */ if (lcn != headlcn) break; @@ -566,6 +567,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, struct erofs_map_blocks *map, int flags) { + struct erofs_sb_info *sbi = vi->sbi; bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; struct z_erofs_maprecorder m = { @@ -593,11 +595,18 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; switch (m.type) { - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_LCLUSTER_TYPE_PLAIN: + case Z_EROFS_LCLUSTER_TYPE_HEAD1: if (endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + /* + * For ztailpacking files, in order to inline data more + * effectively, special EOF lclusters are now supported + * which can have three parts at most. + */ + if (ztailpacking && end > vi->i_size) + end = vi->i_size; break; } /* m.lcn should be >= 1 if endoff < m.clusterofs */ @@ -611,7 +620,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + case Z_EROFS_LCLUSTER_TYPE_NONHEAD: /* get the correspoinding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) @@ -629,8 +638,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; /* for non-compact indexes, fragmentoff is 64 bits */ - if (fragment && - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL) vi->fragmentoff |= (u64)m.pblk << 32; } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { @@ -640,13 +648,13 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi, } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_FRAGMENT; } else { - map->m_pa = blknr_to_addr(m.pblk); + map->m_pa = erofs_pos(sbi, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) goto out; } - if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) { + if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) { if (map->m_llen > map->m_plen) { DBG_BUGON(1); err = -EFSCORRUPTED; diff --git a/man/dump.erofs.1 b/man/dump.erofs.1 index 209e5f9..7316f4b 100644 --- a/man/dump.erofs.1 +++ b/man/dump.erofs.1 @@ -9,18 +9,28 @@ or overall disk statistics information from an EROFS-formatted image. .SH DESCRIPTION .B dump.erofs is used to retrieve erofs metadata from \fIIMAGE\fP and demonstrate +.br 1) overall disk statistics, +.br 2) superblock information, +.br 3) file information of the given inode NID, +.br 4) file extent information of the given inode NID. .SH OPTIONS .TP .BI "\-\-device=" path Specify an extra device to be used together. -You may give multiple `--device' options in the correct order. +You may give multiple +.B --device +options in the correct order. .TP .BI "\-\-ls" -List directory contents. An inode should be specified together. +List directory contents. +.I NID +or +.I path +required. .TP .BI "\-\-nid=" NID Specify an inode NID in order to print its file information. @@ -29,16 +39,21 @@ Specify an inode NID in order to print its file information. Specify an inode path in order to print its file information. .TP .BI \-e -Show the file extent information. An inode should be specified together. +Show the file extent information. +.I NID +or +.I path +required. .TP .BI \-V Print the version number and exit. .TP .BI \-s -Show superblock information of the an EROFS-formatted image. +Show superblock information. +This is the default if no options are specified. .TP .BI \-S -Show EROFS disk statistics, including file type/size distribution, number of (un)compressed files, compression ratio of the whole image, etc. +Show image statistics, including file type/size distribution, number of (un)compressed files, compression ratio, etc. .SH AUTHOR Initial code was written by Wang Qi <mpiglet@outlook.com>, Guo Xuenan <guoxuenan@huawei.com>. .PP diff --git a/man/fsck.erofs.1 b/man/fsck.erofs.1 index f3e9c3b..364219a 100644 --- a/man/fsck.erofs.1 +++ b/man/fsck.erofs.1 @@ -2,7 +2,7 @@ .\" .TH FSCK.EROFS 1 .SH NAME -fsck.erofs \- tool to check the EROFS filesystem's integrity +fsck.erofs \- tool to check an EROFS filesystem's integrity .SH SYNOPSIS \fBfsck.erofs\fR [\fIOPTIONS\fR] \fIIMAGE\fR .SH DESCRIPTION @@ -22,15 +22,18 @@ Print total compression ratio of all files including compressed and non-compressed files. .TP .BI "\-\-device=" path -Specify an extra device to be used together. -You may give multiple `--device' options in the correct order. +Specify an extra blob device to be used together. +You may give multiple +.B --device +options in the correct order. .TP .B \-\-extract -Check if all files are well encoded. This will induce more I/Os to read -compressed file data, so it might take too much time depending on the image. +Check if all files are well encoded. This read all compressed files, +and hence create more I/O load, +so it might take too much time depending on the image. .TP .B \-\-help -Display this help and exit. +Display help string and exit. .SH AUTHOR This version of \fBfsck.erofs\fR is written by Daeho Jeong <daehojeong@google.com>. diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 82ef138..00ac2ac 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -20,25 +20,30 @@ mkfs.erofs is used to create such EROFS filesystem \fIDESTINATION\fR image file from \fISOURCE\fR directory. .SH OPTIONS .TP -.BI "\-z " compression-algorithm " [" ",#" "]" " [:" " ... " "]" +.BI "\-z " compression-algorithm \fR[\fP, # \fR][\fP: ... \fR]\fP Set a primary algorithm for data compression, which can be set with an optional compression level (1 to 12 for LZ4HC, 0 to 9 for LZMA and 100 to 109 for LZMA extreme compression) separated by a comma. Alternative algorithms could be specified and separated by colons. .TP +.BI "\-b " block-size +Set the fundamental block size of the filesystem in bytes. In other words, +specify the smallest amount of data that can be accessed at a time. The +default is the system page size. It cannot be less than 512 bytes. +.TP .BI "\-C " max-pcluster-size -Specify the maximum size of compress physical cluster in bytes. It may enable -big pcluster feature if needed (Linux v5.13+). +Specify the maximum size of compress physical cluster in bytes. +This may cause the big pcluster feature to be enabled (Linux v5.13+). .TP .BI "\-d " # Specify the level of debugging messages. The default is 2, which shows basic warning messages. .TP .BI "\-x " # -Specify the upper limit of an xattr which is still inlined. The default is 2. -Disable storing xattrs if < 0. +Limit how many xattrs will be inlined. The default is 2. +Disables storing xattrs if < 0. .TP -.BI "\-E " extended-option " [,...]" +.BI "\-E " extended-option \fR[\fP, ... \fR]\fP Set extended options for the filesystem. Extended options are comma separated, and may take an extra argument using the equals ('=') sign. The following extended options are supported: @@ -51,33 +56,37 @@ it may take an argument as the pcluster size of the packed inode in bytes. .TP .BI dedupe Enable global compressed data deduplication to minimize duplicated data in -the filesystem. It may be used with \fI-Efragments\fR option together to -further reduce image sizes. (Linux v6.1+) +the filesystem. May further reduce image size when used with +.BR -E\ fragments . +(Linux v6.1+) .TP .BI force-inode-compact -Forcely generate compact inodes (32-byte inodes) to output. +Force generation of compact (32-byte) inodes. .TP .BI force-inode-extended -Forcely generate extended inodes (64-byte inodes) to output. +Force generation of extended (64-byte) inodes. .TP .BI force-inode-blockmap -Forcely generate inode chunk format in 4-byte block address array. +Force generation of inode chunk format as a 4-byte block address array. .TP .BI force-chunk-indexes -Forcely generate inode chunk format in 8-byte chunk indexes (with device id). +Forcely generate inode chunk format as an 8-byte chunk index (with device ID). .TP -.BI fragments -Pack the tail part (pcluster) of compressed files or the whole files into a +.BI fragments\fR[\fP= size \fR]\fP +Pack the tail part (pcluster) of compressed files, or entire files, into a special inode for smaller image sizes, and it may take an argument as the pcluster size of the packed inode in bytes. (Linux v6.1+) .TP .BI legacy-compress -Drop "inplace decompression" and "compacted indexes" support, which is used -to generate compatible EROFS images for Linux v4.19 - 5.3. +Disable "inplace decompression" and "compacted indexes", +for compatibility with Linux pre-v5.4. .TP .BI noinline_data Don't inline regular files to enable FSDAX for these files (Linux v5.15+). .TP +.B ^xattr-name-filter +Turn off/on xattr name filter to optimize negative xattr lookups (Linux v6.6+). +.TP .BI ztailpacking Pack the tail part (pcluster) of compressed files into its metadata to save more space and the tail part I/O. (Linux v5.17+) @@ -89,8 +98,8 @@ Set the volume label for the filesystem to The maximum length of the volume label is 16 bytes. .TP .BI "\-T " # -Set all files to the given UNIX timestamp. Reproducible builds requires setting -all to a specific one. +Set all files to the given UNIX timestamp. Reproducible builds require setting +all to a specific one. By default, the source file's modification time is used. .TP .BI "\-U " UUID Set the universally unique identifier (UUID) of the filesystem to @@ -102,65 +111,93 @@ like this: "c1b9d5a2-f162-11cf-9ece-0020afc76f16". Make all files owned by root. .TP .BI "\-\-blobdev " file -Specify another extra blob device to store chunk-based data. +Specify an extra blob device to store chunk-based data. .TP .BI "\-\-chunksize " # Generate chunk-based files with #-byte chunks. .TP .BI "\-\-compress-hints " file -If the optional -.BI "\-\-compress-hints " file -argument is given, -.B mkfs.erofs -uses it to apply the per-file compression strategy. Each line is defined by +Apply a per-file compression strategy. Each line in +.I file +is defined by tokens separated by spaces in the following form. Optionally, instead of -the given primary algorithm, alternative algorithms could be specified with -\fIalgorithm-index\fR by hand: +the given primary algorithm, alternative algorithms can be specified with +\fIalgorithm-index\fR explicitly: .RS 1.2i -<pcluster-in-bytes> [algorithm-index] <match-pattern> +<pcluster-size-in-bytes> [algorithm-index] <match-pattern> .RE +.IR match-pattern s +are extended regular expressions, matched against absolute paths within +the output filesystem, with no leading /. .TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. -You may give multiple `--exclude-path' options. +You may give multiple +.B --exclude-path +options. .TP .BI "\-\-exclude-regex=" regex -Ignore files that match the given regular expression. -You may give multiple `--exclude-regex` options. +Ignore files that match the given extended regular expression. +You may give multiple +.B --exclude-regex +options. .TP .BI "\-\-file-contexts=" file -Specify a \fIfile_contexts\fR file to setup / override selinux labels. +Read SELinux label configuration/overrides from \fIfile\fR in the +.BR selinux_file (5) +format. .TP .BI "\-\-force-uid=" UID -Set all file uids to \fIUID\fR. +Set all file UIDs to \fIUID\fR. .TP .BI "\-\-force-gid=" GID -Set all file gids to \fIGID\fR. +Set all file GIDs to \fIGID\fR. .TP .BI "\-\-gid-offset=" GIDOFFSET -Add \fIGIDOFFSET\fR to all file gids. -When this option is used together with --force-gid, the final file gids are +Add \fIGIDOFFSET\fR to all file GIDs. +When this option is used together with +.BR --force-gid , +the final file gids are set to \fIGID\fR + \fIGID-OFFSET\fR. .TP +.B \-\-gzip +Filter tarball streams through gzip. +.TP .B \-\-help -Display this help and exit. +Display help string and exit. .TP .B "\-\-ignore-mtime" -File modification time is ignored whenever it would cause \fBmkfs.erofs\fR to +Ignore the file modification time whenever it would cause \fBmkfs.erofs\fR to use extended inodes over compact inodes. When not using a fixed timestamp, this -can reduce total metadata size. +can reduce total metadata size. Implied by +.BR "-E force-inode-compact" . .TP .BI "\-\-max-extent-bytes " # -Specify maximum decompressed extent size # in bytes. +Specify maximum decompressed extent size in bytes. .TP .B "\-\-preserve-mtime" -File modification time is preserved whenever \fBmkfs.erofs\fR decides to use -extended inodes over compact inodes. +Use extended inodes instead of compact inodes if the file modification time +would overflow compact inodes. This is the default. Overrides +.BR --ignore-mtime . +.TP +.B "\-\-tar=f" +Generate a full EROFS image from a tarball. +.TP +.B "\-\-tar=i" +Generate an meta-only EROFS image from a tarball. .TP .BI "\-\-uid-offset=" UIDOFFSET -Add \fIUIDOFFSET\fR to all file uids. -When this option is used together with --force-uid, the final file uids are +Add \fIUIDOFFSET\fR to all file UIDs. +When this option is used together with +.BR --force-uid , +the final file uids are set to \fIUID\fR + \fIUIDOFFSET\fR. +.TP +.BI "\-\-xattr-prefix=" PREFIX +Specify a customized extended attribute namespace prefix for space saving, +e.g. "trusted.overlay.". You may give multiple +.B --xattr-prefix +options (Linux v6.4+). .SH AUTHOR This version of \fBmkfs.erofs\fR is written by Li Guifu <blucerlee@gmail.com>, Miao Xie <miaoxie@huawei.com> and Gao Xiang <xiang@kernel.org> with diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am index 709d9bf..dd75485 100644 --- a/mkfs/Makefile.am +++ b/mkfs/Makefile.am @@ -2,8 +2,9 @@ AUTOMAKE_OPTIONS = foreign bin_PROGRAMS = mkfs.erofs -AM_CPPFLAGS = ${libuuid_CFLAGS} ${libselinux_CFLAGS} +AM_CPPFLAGS = ${libselinux_CFLAGS} mkfs_erofs_SOURCES = main.c mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include -mkfs_erofs_LDADD = ${libuuid_LIBS} $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${liblz4_LIBS} ${liblzma_LIBS} +mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ + ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} \ + ${libdeflate_LIBS} diff --git a/mkfs/main.c b/mkfs/main.c index 94f51df..6d2b700 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -15,7 +15,9 @@ #include "erofs/config.h" #include "erofs/print.h" #include "erofs/cache.h" +#include "erofs/diskbuf.h" #include "erofs/inode.h" +#include "erofs/tar.h" #include "erofs/io.h" #include "erofs/compress.h" #include "erofs/dedupe.h" @@ -25,11 +27,9 @@ #include "erofs/compress_hints.h" #include "erofs/blobchunk.h" #include "erofs/fragments.h" +#include "erofs/rebuild.h" #include "../lib/liberofs_private.h" - -#ifdef HAVE_LIBUUID -#include <uuid.h> -#endif +#include "../lib/liberofs_uuid.h" #define EROFS_SUPER_END (EROFS_SUPER_OFFSET + sizeof(struct erofs_super_block)) @@ -56,32 +56,42 @@ static struct option long_options[] = { {"preserve-mtime", no_argument, NULL, 15}, {"uid-offset", required_argument, NULL, 16}, {"gid-offset", required_argument, NULL, 17}, + {"tar", optional_argument, NULL, 20}, + {"aufs", no_argument, NULL, 21}, {"mount-point", required_argument, NULL, 512}, + {"xattr-prefix", required_argument, NULL, 19}, #ifdef WITH_ANDROID {"product-out", required_argument, NULL, 513}, {"fs-config-file", required_argument, NULL, 514}, {"block-list-file", required_argument, NULL, 515}, #endif + {"ovlfs-strip", optional_argument, NULL, 516}, +#ifdef HAVE_ZLIB + {"gzip", no_argument, NULL, 517}, +#endif {0, 0, 0, 0}, }; static void print_available_compressors(FILE *f, const char *delim) { - unsigned int i = 0; + int i = 0; + bool comma = false; const char *s; - while ((s = z_erofs_list_available_compressors(i)) != NULL) { - if (i++) + while ((s = z_erofs_list_available_compressors(&i)) != NULL) { + if (comma) fputs(delim, f); fputs(s, f); + comma = true; } fputc('\n', f); } static void usage(void) { - fputs("usage: [options] FILE DIRECTORY\n\n" - "Generate erofs image from DIRECTORY to FILE, and [options] are:\n" + fputs("usage: [options] FILE SOURCE(s)\n" + "Generate EROFS image (FILE) from DIRECTORY, TARBALL and/or EROFS images. And [options] are:\n" + " -b# set block size to # (# = page size by default)\n" " -d# set output message level to # (maximum 9)\n" " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -zX[,Y][:..] X=compressor (Y=compression level, optional)\n" @@ -90,9 +100,7 @@ static void usage(void) " -EX[,...] X=extended options\n" " -L volume-label set the volume label (maximum 16)\n" " -T# set a fixed UNIX timestamp # to all files\n" -#ifdef HAVE_LIBUUID " -UX use a given filesystem UUID\n" -#endif " --all-root make all files owned by root\n" " --blobdev=X specify an extra device X to store chunked data\n" " --chunksize=# generate chunk-based files with #-byte chunks\n" @@ -106,15 +114,22 @@ static void usage(void) " --force-gid=# set all file gids to # (# = GID)\n" " --uid-offset=# add offset # to all file uids (# = id offset)\n" " --gid-offset=# add offset # to all file gids (# = id offset)\n" +#ifdef HAVE_ZLIB + " --gzip try to filter the tarball stream through gzip\n" +#endif " --help display this help and exit\n" " --ignore-mtime use build time instead of strict per-file modification time\n" " --max-extent-bytes=# set maximum decompressed extent size # in bytes\n" " --preserve-mtime keep per-file modification time strictly\n" + " --aufs replace aufs special files with overlayfs metadata\n" + " --tar=[fi] generate an image from tarball(s)\n" + " --ovlfs-strip=[01] strip overlayfs metadata in the target image (e.g. whiteouts)\n" " --quiet quiet execution (do not write anything to standard output.)\n" #ifndef NDEBUG " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n" " --random-algorithms randomize per-file algorithms (debugging only)\n" #endif + " --xattr-prefix=X X=extra xattr name prefix\n" " --mount-point=X X=prefix of target fs path (default: /)\n" #ifdef WITH_ANDROID "\nwith following android-specific options:\n" @@ -126,6 +141,15 @@ static void usage(void) print_available_compressors(stderr, ", "); } +static unsigned int pclustersize_packed, pclustersize_max; +static struct erofs_tarfile erofstar = { + .global.xattrs = LIST_HEAD_INIT(erofstar.global.xattrs) +}; +static bool tar_mode, rebuild_mode, gzip_supported; + +static unsigned int rebuild_src_count; +static LIST_HEAD(rebuild_src_list); + static int parse_extended_opts(const char *opts) { #define MATCH_EXTENTED_OPT(opt, token, keylen) \ @@ -136,6 +160,7 @@ static int parse_extended_opts(const char *opts) value = NULL; for (token = opts; *token != '\0'; token = next) { + bool clear = false; const char *p = strchr(token, ','); next = NULL; @@ -159,62 +184,56 @@ static int parse_extended_opts(const char *opts) vallen = 0; } + if (token[0] == '^') { + if (keylen < 2) + return -EINVAL; + ++token; + --keylen; + clear = true; + } + if (MATCH_EXTENTED_OPT("legacy-compress", token, keylen)) { if (vallen) return -EINVAL; /* disable compacted indexes and 0padding */ cfg.c_legacy_compress = true; - } - - if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("force-inode-compact", token, keylen)) { if (vallen) return -EINVAL; cfg.c_force_inodeversion = FORCE_INODE_COMPACT; cfg.c_ignore_mtime = true; - } - - if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("force-inode-extended", token, keylen)) { if (vallen) return -EINVAL; cfg.c_force_inodeversion = FORCE_INODE_EXTENDED; - } - - if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("nosbcrc", token, keylen)) { if (vallen) return -EINVAL; - erofs_sb_clear_sb_chksum(); - } - - if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) { + erofs_sb_clear_sb_chksum(&sbi); + } else if (MATCH_EXTENTED_OPT("noinline_data", token, keylen)) { if (vallen) return -EINVAL; - cfg.c_noinline_data = true; - } - - if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) { + cfg.c_inline_data = false; + } else if (MATCH_EXTENTED_OPT("inline_data", token, keylen)) { + if (vallen) + return -EINVAL; + cfg.c_inline_data = !clear; + } else if (MATCH_EXTENTED_OPT("force-inode-blockmap", token, keylen)) { if (vallen) return -EINVAL; cfg.c_force_chunkformat = FORCE_INODE_BLOCK_MAP; - } - - if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("force-chunk-indexes", token, keylen)) { if (vallen) return -EINVAL; cfg.c_force_chunkformat = FORCE_INODE_CHUNK_INDEXES; - } - - if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("ztailpacking", token, keylen)) { if (vallen) return -EINVAL; - cfg.c_ztailpacking = true; - } - - if (MATCH_EXTENTED_OPT("all-fragments", token, keylen)) { + cfg.c_ztailpacking = !clear; + } else if (MATCH_EXTENTED_OPT("all-fragments", token, keylen)) { cfg.c_all_fragments = true; goto handle_fragment; - } - - if (MATCH_EXTENTED_OPT("fragments", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("fragments", token, keylen)) { char *endptr; u64 i; @@ -222,20 +241,25 @@ handle_fragment: cfg.c_fragments = true; if (vallen) { i = strtoull(value, &endptr, 0); - if (endptr - value != vallen || - i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) { + if (endptr - value != vallen) { erofs_err("invalid pcluster size for the packed file %s", next); return -EINVAL; } - cfg.c_pclusterblks_packed = i / EROFS_BLKSIZ; + pclustersize_packed = i; } - } - - if (MATCH_EXTENTED_OPT("dedupe", token, keylen)) { + } else if (MATCH_EXTENTED_OPT("dedupe", token, keylen)) { + if (vallen) + return -EINVAL; + cfg.c_dedupe = !clear; + } else if (MATCH_EXTENTED_OPT("xattr-name-filter", token, keylen)) { if (vallen) return -EINVAL; - cfg.c_dedupe = true; + cfg.c_xattr_name_filter = !clear; + } else { + erofs_err("unknown extended option %.*s", + p - token, token); + return -EINVAL; } } return 0; @@ -266,26 +290,43 @@ static int mkfs_parse_compress_algs(char *algs) return 0; } +static void erofs_rebuild_cleanup(void) +{ + struct erofs_sb_info *src, *n; + + list_for_each_entry_safe(src, n, &rebuild_src_list, list) { + list_del(&src->list); + erofs_put_super(src); + dev_close(src); + free(src); + } + rebuild_src_count = 0; +} + static int mkfs_parse_options_cfg(int argc, char *argv[]) { char *endptr; - int opt, i; + int opt, i, err; bool quiet = false; - while ((opt = getopt_long(argc, argv, "C:E:L:T:U:d:x:z:", + while ((opt = getopt_long(argc, argv, "C:E:L:T:U:b:d:x:z:", long_options, NULL)) != -1) { switch (opt) { case 'z': - if (!optarg) { - cfg.c_compr_alg[0] = "(default)"; - cfg.c_compr_level[0] = -1; - break; - } i = mkfs_parse_compress_algs(optarg); if (i) return i; break; + case 'b': + i = atoi(optarg); + if (i < 512 || i > EROFS_MAX_BLOCK_SIZE) { + erofs_err("invalid block size %s", optarg); + return -EINVAL; + } + sbi.blkszbits = ilog2(i); + break; + case 'd': i = atoi(optarg); if (i < EROFS_MSG_MIN || i > EROFS_MSG_MAX) { @@ -328,14 +369,12 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) } cfg.c_timeinherit = TIMESTAMP_FIXED; break; -#ifdef HAVE_LIBUUID case 'U': - if (uuid_parse(optarg, sbi.uuid)) { + if (erofs_uuid_parse(optarg, sbi.uuid)) { erofs_err("invalid UUID %s", optarg); return -EINVAL; } break; -#endif case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { @@ -415,14 +454,12 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) #endif case 'C': i = strtoull(optarg, &endptr, 0); - if (*endptr != '\0' || - i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) { + if (*endptr != '\0') { erofs_err("invalid physical clustersize %s", optarg); return -EINVAL; } - cfg.c_pclusterblks_max = i / EROFS_BLKSIZ; - cfg.c_pclusterblks_def = cfg.c_pclusterblks_max; + pclustersize_max = i; break; case 11: i = strtol(optarg, &endptr, 0); @@ -436,12 +473,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) optarg); return -EINVAL; } - if (i < EROFS_BLKSIZ) { - erofs_err("chunksize %s must be larger than block size", - optarg); - return -EINVAL; - } - erofs_sb_set_chunked_file(); + erofs_sb_set_chunked_file(&sbi); break; case 12: quiet = true; @@ -471,6 +503,37 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 19: + errno = 0; + opt = erofs_xattr_insert_name_prefix(optarg); + if (opt) { + erofs_err("failed to parse xattr name prefix: %s", + erofs_strerror(opt)); + return opt; + } + cfg.c_extra_ea_name_prefixes = true; + break; + case 20: + if (optarg && (!strcmp(optarg, "i") || + !strcmp(optarg, "0") || !memcmp(optarg, "0,", 2))) { + erofstar.index_mode = true; + if (!memcmp(optarg, "0,", 2)) + erofstar.mapfile = strdup(optarg + 2); + } + tar_mode = true; + break; + case 21: + erofstar.aufs = true; + break; + case 516: + if (!optarg || !strcmp(optarg, "1")) + cfg.c_ovlfs_strip = true; + else + cfg.c_ovlfs_strip = false; + break; + case 517: + gzip_supported = true; + break; case 1: usage(); exit(0); @@ -480,7 +543,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) } } - if (cfg.c_blobdev_path && cfg.c_chunkbits < LOG_BLOCK_SIZE) { + if (cfg.c_blobdev_path && cfg.c_chunkbits < sbi.blkszbits) { erofs_err("--blobdev must be used together with --chunksize"); return -EINVAL; } @@ -502,25 +565,114 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -ENOMEM; if (optind >= argc) { - erofs_err("missing argument: DIRECTORY"); - return -EINVAL; - } + if (!tar_mode) { + erofs_err("missing argument: SOURCE(s)"); + return -EINVAL; + } else { + int dupfd; - cfg.c_src_path = realpath(argv[optind++], NULL); - if (!cfg.c_src_path) { - erofs_err("failed to parse source directory: %s", - erofs_strerror(-errno)); - return -ENOENT; - } + dupfd = dup(STDIN_FILENO); + if (dupfd < 0) { + erofs_err("failed to duplicate STDIN_FILENO: %s", + strerror(errno)); + return -errno; + } + err = erofs_iostream_open(&erofstar.ios, dupfd, gzip_supported); + if (err) + return err; + } + } else { + struct stat st; - if (optind < argc) { - erofs_err("unexpected argument: %s\n", argv[optind]); - return -EINVAL; + cfg.c_src_path = realpath(argv[optind++], NULL); + if (!cfg.c_src_path) { + erofs_err("failed to parse source directory: %s", + erofs_strerror(-errno)); + return -ENOENT; + } + + if (tar_mode) { + int fd = open(cfg.c_src_path, O_RDONLY); + + if (fd < 0) { + erofs_err("failed to open file: %s", cfg.c_src_path); + return -errno; + } + err = erofs_iostream_open(&erofstar.ios, fd, gzip_supported); + if (err) + return err; + } else { + err = lstat(cfg.c_src_path, &st); + if (err) + return -errno; + if (S_ISDIR(st.st_mode)) + erofs_set_fs_root(cfg.c_src_path); + else + rebuild_mode = true; + } + + if (rebuild_mode) { + char *srcpath = cfg.c_src_path; + struct erofs_sb_info *src; + + do { + src = calloc(1, sizeof(struct erofs_sb_info)); + if (!src) { + erofs_rebuild_cleanup(); + return -ENOMEM; + } + + err = dev_open_ro(src, srcpath); + if (err) { + free(src); + erofs_rebuild_cleanup(); + return err; + } + + /* extra device index starts from 1 */ + src->dev = ++rebuild_src_count; + list_add(&src->list, &rebuild_src_list); + } while (optind < argc && (srcpath = argv[optind++])); + } else if (optind < argc) { + erofs_err("unexpected argument: %s\n", argv[optind]); + return -EINVAL; + } } if (quiet) { cfg.c_dbg_lvl = EROFS_ERR; cfg.c_showprogress = false; } + + if (cfg.c_compr_alg[0] && erofs_blksiz(&sbi) != getpagesize()) + erofs_warn("Please note that subpage blocksize with compression isn't yet supported in kernel. " + "This compressed image will only work with bs = ps = %u bytes", + erofs_blksiz(&sbi)); + + if (pclustersize_max) { + if (pclustersize_max < erofs_blksiz(&sbi) || + pclustersize_max % erofs_blksiz(&sbi)) { + erofs_err("invalid physical clustersize %u", + pclustersize_max); + return -EINVAL; + } + cfg.c_pclusterblks_max = pclustersize_max >> sbi.blkszbits; + cfg.c_pclusterblks_def = cfg.c_pclusterblks_max; + } + if (cfg.c_chunkbits && cfg.c_chunkbits < sbi.blkszbits) { + erofs_err("chunksize %u must be larger than block size", + 1u << cfg.c_chunkbits); + return -EINVAL; + } + + if (pclustersize_packed) { + if (pclustersize_max < erofs_blksiz(&sbi) || + pclustersize_max % erofs_blksiz(&sbi)) { + erofs_err("invalid pcluster size for the packed file %u", + pclustersize_packed); + return -EINVAL; + } + cfg.c_pclusterblks_packed = pclustersize_packed >> sbi.blkszbits; + } return 0; } @@ -531,22 +683,24 @@ int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh, { struct erofs_super_block sb = { .magic = cpu_to_le32(EROFS_SUPER_MAGIC_V1), - .blkszbits = LOG_BLOCK_SIZE, + .blkszbits = sbi.blkszbits, .inos = cpu_to_le64(sbi.inos), .build_time = cpu_to_le64(sbi.build_time), .build_time_nsec = cpu_to_le32(sbi.build_time_nsec), .blocks = 0, - .meta_blkaddr = sbi.meta_blkaddr, - .xattr_blkaddr = sbi.xattr_blkaddr, + .meta_blkaddr = cpu_to_le32(sbi.meta_blkaddr), + .xattr_blkaddr = cpu_to_le32(sbi.xattr_blkaddr), + .xattr_prefix_count = sbi.xattr_prefix_count, + .xattr_prefix_start = cpu_to_le32(sbi.xattr_prefix_start), .feature_incompat = cpu_to_le32(sbi.feature_incompat), .feature_compat = cpu_to_le32(sbi.feature_compat & ~EROFS_FEATURE_COMPAT_SB_CHKSUM), .extra_devices = cpu_to_le16(sbi.extra_devices), .devt_slotoff = cpu_to_le16(sbi.devt_slotoff), }; - const unsigned int sb_blksize = - round_up(EROFS_SUPER_END, EROFS_BLKSIZ); + const u32 sb_blksize = round_up(EROFS_SUPER_END, erofs_blksiz(&sbi)); char *buf; + int ret; *blocks = erofs_mapbh(NULL); sb.blocks = cpu_to_le32(*blocks); @@ -555,8 +709,8 @@ int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh, memcpy(sb.uuid, sbi.uuid, sizeof(sb.uuid)); memcpy(sb.volume_name, sbi.volume_name, sizeof(sb.volume_name)); - if (erofs_sb_has_compr_cfgs()) - sb.u1.available_compr_algs = sbi.available_compr_algs; + if (erofs_sb_has_compr_cfgs(&sbi)) + sb.u1.available_compr_algs = cpu_to_le16(sbi.available_compr_algs); else sb.u1.lz4_max_distance = cpu_to_le16(sbi.lz4_max_distance); @@ -568,19 +722,21 @@ int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh, } memcpy(buf + EROFS_SUPER_OFFSET, &sb, sizeof(sb)); - bh->fsprivate = buf; - bh->op = &erofs_buf_write_bhops; - return 0; + ret = dev_write(&sbi, buf, erofs_btell(bh, false), EROFS_SUPER_END); + free(buf); + erofs_bdrop(bh, false); + return ret; } static int erofs_mkfs_superblock_csum_set(void) { int ret; - u8 buf[EROFS_BLKSIZ]; + u8 buf[EROFS_MAX_BLOCK_SIZE]; u32 crc; + unsigned int len; struct erofs_super_block *sb; - ret = blk_read(0, buf, 0, 1); + ret = blk_read(&sbi, 0, buf, 0, erofs_blknr(&sbi, EROFS_SUPER_END) + 1); if (ret) { erofs_err("failed to read superblock to set checksum: %s", erofs_strerror(ret)); @@ -601,12 +757,16 @@ static int erofs_mkfs_superblock_csum_set(void) /* turn on checksum feature */ sb->feature_compat = cpu_to_le32(le32_to_cpu(sb->feature_compat) | EROFS_FEATURE_COMPAT_SB_CHKSUM); - crc = erofs_crc32c(~0, (u8 *)sb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); + if (erofs_blksiz(&sbi) > EROFS_SUPER_OFFSET) + len = erofs_blksiz(&sbi) - EROFS_SUPER_OFFSET; + else + len = erofs_blksiz(&sbi); + crc = erofs_crc32c(~0, (u8 *)sb, len); /* set up checksum field to erofs_super_block */ sb->checksum = cpu_to_le32(crc); - ret = blk_write(buf, 0, 1); + ret = blk_write(&sbi, buf, 0, 1); if (ret) { erofs_err("failed to write checksummed superblock: %s", erofs_strerror(ret)); @@ -621,16 +781,15 @@ static void erofs_mkfs_default_options(void) { cfg.c_showprogress = true; cfg.c_legacy_compress = false; - sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_LZ4_0PADDING; + cfg.c_inline_data = true; + cfg.c_xattr_name_filter = true; + sbi.blkszbits = ilog2(min_t(u32, getpagesize(), EROFS_MAX_BLOCK_SIZE)); + sbi.feature_incompat = EROFS_FEATURE_INCOMPAT_ZERO_PADDING; sbi.feature_compat = EROFS_FEATURE_COMPAT_SB_CHKSUM | EROFS_FEATURE_COMPAT_MTIME; /* generate a default uuid first */ -#ifdef HAVE_LIBUUID - do { - uuid_generate(sbi.uuid); - } while (uuid_is_null(sbi.uuid)); -#endif + erofs_uuid_generate(sbi.uuid); } /* https://reproducible-builds.org/specs/source-date-epoch/ for more details */ @@ -665,6 +824,101 @@ void erofs_show_progs(int argc, char *argv[]) if (cfg.c_dbg_lvl >= EROFS_WARN) printf("%s %s\n", basename(argv[0]), cfg.c_version); } +static struct erofs_inode *erofs_alloc_root_inode(void) +{ + struct erofs_inode *root; + + root = erofs_new_inode(); + if (IS_ERR(root)) + return root; + root->i_srcpath = strdup("/"); + root->i_mode = S_IFDIR | 0777; + root->i_parent = root; + root->i_mtime = root->sbi->build_time; + root->i_mtime_nsec = root->sbi->build_time_nsec; + erofs_init_empty_dir(root); + return root; +} + +static int erofs_rebuild_load_trees(struct erofs_inode *root) +{ + struct erofs_sb_info *src; + unsigned int extra_devices = 0; + erofs_blk_t nblocks; + int ret, idx; + + list_for_each_entry(src, &rebuild_src_list, list) { + ret = erofs_rebuild_load_tree(root, src); + if (ret) { + erofs_err("failed to load %s", src->devname); + return ret; + } + if (src->extra_devices > 1) { + erofs_err("%s: unsupported number of extra devices", + src->devname, src->extra_devices); + return -EOPNOTSUPP; + } + extra_devices += src->extra_devices; + } + + if (extra_devices && extra_devices != rebuild_src_count) { + erofs_err("extra_devices(%u) is mismatched with source images(%u)", + extra_devices, rebuild_src_count); + return -EOPNOTSUPP; + } + + ret = erofs_mkfs_init_devices(&sbi, rebuild_src_count); + if (ret) + return ret; + + list_for_each_entry(src, &rebuild_src_list, list) { + u8 *tag = NULL; + + if (extra_devices) { + nblocks = src->devs[0].blocks; + tag = src->devs[0].tag; + } else { + nblocks = src->primarydevice_blocks; + } + DBG_BUGON(src->dev < 1); + idx = src->dev - 1; + sbi.devs[idx].blocks = nblocks; + if (tag && *tag) + memcpy(sbi.devs[idx].tag, tag, sizeof(sbi.devs[0].tag)); + else + /* convert UUID of the source image to a hex string */ + sprintf((char *)sbi.devs[idx].tag, + "%04x%04x%04x%04x%04x%04x%04x%04x", + (src->uuid[0] << 8) | src->uuid[1], + (src->uuid[2] << 8) | src->uuid[3], + (src->uuid[4] << 8) | src->uuid[5], + (src->uuid[6] << 8) | src->uuid[7], + (src->uuid[8] << 8) | src->uuid[9], + (src->uuid[10] << 8) | src->uuid[11], + (src->uuid[12] << 8) | src->uuid[13], + (src->uuid[14] << 8) | src->uuid[15]); + } + return 0; +} + +static void erofs_mkfs_showsummaries(erofs_blk_t nblocks) +{ + char uuid_str[37] = {}; + + if (!(cfg.c_dbg_lvl > EROFS_ERR && cfg.c_showprogress)) + return; + + erofs_uuid_unparse_lower(sbi.uuid, uuid_str); + + fprintf(stdout, "------\nFilesystem UUID: %s\n" + "Filesystem total blocks: %u (of %u-byte blocks)\n" + "Filesystem total inodes: %llu\n" + "Filesystem total metadata blocks: %u\n" + "Filesystem total deduplicated bytes (of source files): %llu\n", + uuid_str, nblocks, 1U << sbi.blkszbits, sbi.inos | 0ULL, + erofs_total_metablocks(), + sbi.saved_by_deduplication | 0ULL); +} int main(int argc, char **argv) { @@ -672,10 +926,9 @@ int main(int argc, char **argv) struct erofs_buffer_head *sb_bh; struct erofs_inode *root_inode, *packed_inode; erofs_nid_t root_nid, packed_nid; - struct stat st; erofs_blk_t nblocks; struct timeval t; - char uuid_str[37] = "not available"; + FILE *packedfile = NULL; erofs_init_configure(); erofs_mkfs_default_options(); @@ -694,16 +947,6 @@ int main(int argc, char **argv) return 1; } - err = lstat(cfg.c_src_path, &st); - if (err) - return 1; - if (!S_ISDIR(st.st_mode)) { - erofs_err("root of the filesystem is not a directory - %s", - cfg.c_src_path); - usage(); - return 1; - } - if (cfg.c_unix_timestamp != -1) { sbi.build_time = cfg.c_unix_timestamp; sbi.build_time_nsec = 0; @@ -712,12 +955,20 @@ int main(int argc, char **argv) sbi.build_time_nsec = t.tv_usec; } - err = dev_open(cfg.c_img_path); + err = dev_open(&sbi, cfg.c_img_path); if (err) { usage(); return 1; } + if (tar_mode && !erofstar.index_mode) { + err = erofs_diskbuf_init(1); + if (err) { + erofs_err("failed to initialize diskbuf: %s", + strerror(-err)); + goto exit; + } + } #ifdef WITH_ANDROID if (cfg.fs_config_file && load_canned_fs_config(cfg.fs_config_file) < 0) { @@ -725,30 +976,64 @@ int main(int argc, char **argv) return 1; } - if (cfg.block_list_file && erofs_droid_blocklist_fopen() < 0) { + if (cfg.block_list_file && + erofs_blocklist_open(cfg.block_list_file, false)) { erofs_err("failed to open %s", cfg.block_list_file); return 1; } #endif erofs_show_config(); - if (cfg.c_fragments) { + if (cfg.c_fragments || cfg.c_extra_ea_name_prefixes) { if (!cfg.c_pclusterblks_packed) cfg.c_pclusterblks_packed = cfg.c_pclusterblks_def; - err = erofs_fragments_init(); + packedfile = erofs_packedfile_init(); + if (IS_ERR(packedfile)) { + erofs_err("failed to initialize packedfile"); + return 1; + } + } + + if (cfg.c_fragments) { + err = z_erofs_fragments_init(); if (err) { erofs_err("failed to initialize fragments"); return 1; } - erofs_warn("EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); } - if (cfg.c_dedupe) - erofs_warn("EXPERIMENTAL data deduplication feature in use. Use at your own risk!"); - erofs_set_fs_root(cfg.c_src_path); + #ifndef NDEBUG if (cfg.c_random_pclusterblks) srand(time(NULL)); #endif + if (tar_mode && erofstar.index_mode) { + if (erofstar.mapfile) { + err = erofs_blocklist_open(erofstar.mapfile, true); + if (err) { + erofs_err("failed to open %s", erofstar.mapfile); + goto exit; + } + } else { + sbi.blkszbits = 9; + } + } + + if (rebuild_mode) { + struct erofs_sb_info *src; + + erofs_warn("EXPERIMENTAL rebuild mode in use. Use at your own risk!"); + + src = list_first_entry(&rebuild_src_list, struct erofs_sb_info, list); + if (!src) + goto exit; + err = erofs_read_superblock(src); + if (err) { + erofs_err("failed to read superblock of %s", src->devname); + goto exit; + } + sbi.blkszbits = src->blkszbits; + } + sb_bh = erofs_buffer_init(); if (IS_ERR(sb_bh)) { err = PTR_ERR(sb_bh); @@ -763,14 +1048,21 @@ int main(int argc, char **argv) goto exit; } - err = erofs_load_compress_hints(); + /* make sure that the super block should be the very first blocks */ + (void)erofs_mapbh(sb_bh->block); + if (erofs_btell(sb_bh, false) != 0) { + erofs_err("failed to reserve erofs_super_block"); + goto exit; + } + + err = erofs_load_compress_hints(&sbi); if (err) { erofs_err("failed to load compress hints %s", cfg.c_compress_hints_file); goto exit; } - err = z_erofs_compress_init(sb_bh); + err = z_erofs_compress_init(&sbi, sb_bh); if (err) { erofs_err("failed to initialize compressor: %s", erofs_strerror(err)); @@ -780,9 +1072,9 @@ int main(int argc, char **argv) if (cfg.c_dedupe) { if (!cfg.c_compr_alg[0]) { erofs_err("Compression is not enabled. Turn on chunk-based data deduplication instead."); - cfg.c_chunkbits = LOG_BLOCK_SIZE; + cfg.c_chunkbits = sbi.blkszbits; } else { - err = z_erofs_dedupe_init(EROFS_BLKSIZ); + err = z_erofs_dedupe_init(erofs_blksiz(&sbi)); if (err) { erofs_err("failed to initialize deduplication: %s", erofs_strerror(err)); @@ -797,46 +1089,78 @@ int main(int argc, char **argv) return 1; } - err = erofs_generate_devtable(); + if ((erofstar.index_mode && !erofstar.mapfile) || cfg.c_blobdev_path) + err = erofs_mkfs_init_devices(&sbi, 1); if (err) { erofs_err("failed to generate device table: %s", erofs_strerror(err)); goto exit; } -#ifdef HAVE_LIBUUID - uuid_unparse_lower(sbi.uuid, uuid_str); -#endif - erofs_info("filesystem UUID: %s", uuid_str); erofs_inode_manager_init(); - err = erofs_build_shared_xattrs_from_path(cfg.c_src_path); - if (err) { - erofs_err("failed to build shared xattrs: %s", - erofs_strerror(err)); - goto exit; - } + if (tar_mode) { + root_inode = erofs_alloc_root_inode(); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } - root_inode = erofs_mkfs_build_tree_from_path(NULL, cfg.c_src_path); - if (IS_ERR(root_inode)) { - err = PTR_ERR(root_inode); - goto exit; - } + while (!(err = tarerofs_parse_tar(root_inode, &erofstar))); + + if (err < 0) + goto exit; + err = erofs_rebuild_dump_tree(root_inode); + if (err < 0) + goto exit; + } else if (rebuild_mode) { + root_inode = erofs_alloc_root_inode(); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + + err = erofs_rebuild_load_trees(root_inode); + if (err) + goto exit; + err = erofs_rebuild_dump_tree(root_inode); + if (err) + goto exit; + } else { + err = erofs_build_shared_xattrs_from_path(&sbi, cfg.c_src_path); + if (err) { + erofs_err("failed to build shared xattrs: %s", + erofs_strerror(err)); + goto exit; + } + + if (cfg.c_extra_ea_name_prefixes) + erofs_xattr_write_name_prefixes(&sbi, packedfile); + + root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + } root_nid = erofs_lookupnid(root_inode); erofs_iput(root_inode); - if (cfg.c_chunkbits) { - erofs_info("total metadata: %u blocks", erofs_mapbh(NULL)); - err = erofs_blob_remap(); + if (erofstar.index_mode || cfg.c_chunkbits || sbi.extra_devices) { + if (erofstar.index_mode && !erofstar.mapfile) + sbi.devs[0].blocks = + BLK_ROUND_UP(&sbi, erofstar.offset); + err = erofs_mkfs_dump_blobs(&sbi); if (err) goto exit; } packed_nid = 0; - if (cfg.c_fragments && erofs_sb_has_fragments()) { + if ((cfg.c_fragments || cfg.c_extra_ea_name_prefixes) && + erofs_sb_has_fragments(&sbi)) { erofs_update_progressinfo("Handling packed_file ..."); - packed_inode = erofs_mkfs_build_fragments(); + packed_inode = erofs_mkfs_build_packedfile(); if (IS_ERR(packed_inode)) { err = PTR_ERR(packed_inode); goto exit; @@ -845,6 +1169,12 @@ int main(int argc, char **argv) erofs_iput(packed_inode); } + /* flush all buffers except for the superblock */ + if (!erofs_bflush(NULL)) { + err = -EIO; + goto exit; + } + err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks, packed_nid); if (err) @@ -854,31 +1184,35 @@ int main(int argc, char **argv) if (!erofs_bflush(NULL)) err = -EIO; else - err = dev_resize(nblocks); + err = dev_resize(&sbi, nblocks); - if (!err && erofs_sb_has_sb_chksum()) + if (!err && erofs_sb_has_sb_chksum(&sbi)) err = erofs_mkfs_superblock_csum_set(); exit: z_erofs_compress_exit(); z_erofs_dedupe_exit(); -#ifdef WITH_ANDROID - erofs_droid_blocklist_fclose(); -#endif - dev_close(); + erofs_blocklist_close(); + dev_close(&sbi); erofs_cleanup_compress_hints(); erofs_cleanup_exclude_rules(); if (cfg.c_chunkbits) erofs_blob_exit(); if (cfg.c_fragments) - erofs_fragments_exit(); + z_erofs_fragments_exit(); + erofs_packedfile_exit(); + erofs_xattr_cleanup_name_prefixes(); + erofs_rebuild_cleanup(); + erofs_diskbuf_exit(); erofs_exit_configure(); + if (tar_mode) + erofs_iostream_close(&erofstar.ios); if (err) { erofs_err("\tCould not format the device : %s\n", erofs_strerror(err)); return 1; - } else { - erofs_update_progressinfo("Build completed.\n"); } + erofs_update_progressinfo("Build completed.\n"); + erofs_mkfs_showsummaries(nblocks); return 0; } |