aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-14 00:44:43 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-14 00:44:43 +0000
commit3909e4b74c9a9f870178cd9a0d31e6f14932cba8 (patch)
tree01d6e4802ff19d73bbebbf0d2c0a95ba61bf223b
parentde75e8b2a7c1067984140c819dba3ad8e2125274 (diff)
parent3d45c6f550274b2331982316704fb01eaef82b24 (diff)
downloadlibcap-android12-mainline-permission-release.tar.gz
Change-Id: I97fc5acd8fed1399241206a5f110ed90e4f38bc9
-rw-r--r--Android.bp100
-rw-r--r--CHANGELOG14
-rw-r--r--METADATA9
-rw-r--r--Make.Rules127
-rw-r--r--Makefile66
-rw-r--r--README40
-rw-r--r--cap/LICENSE385
-rw-r--r--cap/README10
-rw-r--r--cap/cap.go461
-rw-r--r--cap/cap_test.go214
-rw-r--r--cap/convenience.go292
-rw-r--r--cap/file.go386
-rw-r--r--cap/flags.go161
-rw-r--r--cap/go.mod5
-rw-r--r--cap/iab.go287
-rw-r--r--cap/launch.go252
-rw-r--r--cap/legacy.go7
-rw-r--r--cap/modern.go8
-rw-r--r--cap/names.go428
-rw-r--r--cap/oslockluster.go33
-rw-r--r--cap/oslocks.go51
-rw-r--r--cap/syscalls.go27
-rw-r--r--cap/text.go325
-rw-r--r--contrib/seccomp/explore.go277
-rw-r--r--contrib/seccomp/go.mod5
-rwxr-xr-xdistcheck.sh13
-rw-r--r--doc/Makefile10
-rw-r--r--doc/cap_clear.32
-rw-r--r--doc/cap_copy_ext.38
-rw-r--r--doc/cap_from_text.354
-rw-r--r--doc/cap_get_ambient.31
-rw-r--r--doc/cap_get_file.330
-rw-r--r--doc/cap_get_mode.31
-rw-r--r--doc/cap_get_proc.3268
-rw-r--r--doc/cap_get_secbits.31
-rw-r--r--doc/cap_init.34
-rw-r--r--doc/cap_mode_name.31
-rw-r--r--doc/cap_reset_ambient.31
-rw-r--r--doc/cap_set_ambient.31
-rw-r--r--doc/cap_set_mode.31
-rw-r--r--doc/cap_set_secbits.31
-rw-r--r--doc/cap_setgroups.31
-rw-r--r--doc/cap_setuid.31
-rw-r--r--doc/capsh.1234
-rw-r--r--doc/getcap.832
-rw-r--r--doc/getpcaps.845
-rw-r--r--doc/libcap.39
-rw-r--r--doc/libpsx.389
-rw-r--r--doc/md2html.lua6
-rwxr-xr-xdoc/mkmd.sh79
-rw-r--r--doc/psx_syscall.31
-rw-r--r--doc/psx_syscall3.31
-rw-r--r--doc/psx_syscall6.31
-rw-r--r--doc/setcap.844
-rw-r--r--doc/values/0.txt2
-rw-r--r--doc/values/1.txt5
-rw-r--r--doc/values/10.txt3
-rw-r--r--doc/values/11.txt2
-rw-r--r--doc/values/12.txt17
-rw-r--r--doc/values/13.txt5
-rw-r--r--doc/values/14.txt3
-rw-r--r--doc/values/15.txt1
-rw-r--r--doc/values/16.txt3
-rw-r--r--doc/values/17.txt4
-rw-r--r--doc/values/18.txt3
-rw-r--r--doc/values/19.txt2
-rw-r--r--doc/values/2.txt4
-rw-r--r--doc/values/20.txt1
-rw-r--r--doc/values/21.txt43
-rw-r--r--doc/values/22.txt1
-rw-r--r--doc/values/23.txt6
-rw-r--r--doc/values/24.txt16
-rw-r--r--doc/values/25.txt4
-rw-r--r--doc/values/26.txt3
-rw-r--r--doc/values/27.txt2
-rw-r--r--doc/values/28.txt1
-rw-r--r--doc/values/29.txt2
-rw-r--r--doc/values/3.txt4
-rw-r--r--doc/values/30.txt2
-rw-r--r--doc/values/31.txt1
-rw-r--r--doc/values/32.txt4
-rw-r--r--doc/values/33.txt4
-rw-r--r--doc/values/34.txt2
-rw-r--r--doc/values/35.txt2
-rw-r--r--doc/values/36.txt2
-rw-r--r--doc/values/37.txt2
-rw-r--r--doc/values/38.txt4
-rw-r--r--doc/values/39.txt33
-rw-r--r--doc/values/4.txt4
-rw-r--r--doc/values/40.txt4
-rw-r--r--doc/values/5.txt3
-rw-r--r--doc/values/6.txt5
-rw-r--r--doc/values/7.txt5
-rw-r--r--doc/values/8.txt19
-rw-r--r--doc/values/9.txt2
-rw-r--r--generate_cap_names_list.awk1
-rw-r--r--go/.gitignore14
-rw-r--r--go/Makefile128
-rw-r--r--go/b210613.go21
-rwxr-xr-xgo/cgo-required.sh17
-rw-r--r--go/compare-cap.go384
-rw-r--r--go/go-mod-index.html16
-rw-r--r--go/mknames.go123
-rw-r--r--go/ok.go9
-rw-r--r--go/psx-signals.go46
-rw-r--r--go/try-launching.go114
-rw-r--r--goapps/gowns/go.mod5
-rw-r--r--goapps/gowns/gowns.go249
-rw-r--r--goapps/setid/go.mod8
-rw-r--r--goapps/setid/setid.go151
-rw-r--r--goapps/web/README18
-rw-r--r--goapps/web/go.mod5
-rw-r--r--goapps/web/web.go139
-rwxr-xr-xkdebug/test-kernel.sh17
-rw-r--r--kdebug/test-passwd1
-rw-r--r--libcap/.gitignore5
-rw-r--r--libcap/Makefile164
-rw-r--r--libcap/_makenames.c53
-rw-r--r--libcap/cap_alloc.c78
-rw-r--r--libcap/cap_file.c97
-rw-r--r--libcap/cap_flag.c128
-rw-r--r--libcap/cap_names.h50
-rw-r--r--libcap/cap_proc.c850
-rw-r--r--libcap/cap_test.c39
-rw-r--r--libcap/cap_text.c319
-rw-r--r--libcap/include/sys/.gitignore1
-rw-r--r--libcap/include/sys/capability.h112
-rw-r--r--libcap/include/uapi/linux/capability.h83
-rw-r--r--libcap/include/uapi/linux/prctl.h48
-rw-r--r--libcap/include/uapi/linux/securebits.h4
-rw-r--r--libcap/libcap.h104
-rw-r--r--libcap/libcap.pc.in2
-rw-r--r--libcap/libpsx.pc.in11
-rw-r--r--pam_cap/.gitignore3
-rw-r--r--pam_cap/Makefile36
-rw-r--r--pam_cap/pam_cap.c258
-rw-r--r--pam_cap/sudotest.conf23
-rw-r--r--pam_cap/test_pam_cap.c202
-rw-r--r--progs/.gitignore2
-rw-r--r--progs/Makefile31
-rw-r--r--progs/capsh.c758
-rw-r--r--progs/getcap.c42
-rw-r--r--progs/getpcaps.c43
-rwxr-xr-xprogs/quicktest.sh149
-rw-r--r--progs/setcap.c80
-rw-r--r--psx/LICENSE385
-rw-r--r--psx/README28
-rw-r--r--psx/doc.go60
-rw-r--r--psx/go.mod3
-rw-r--r--psx/psx.c640
-rw-r--r--psx/psx.go15
-rw-r--r--psx/psx_cgo.go65
-rw-r--r--psx/psx_cgo_test.go40
-rw-r--r--psx/psx_syscall.h74
-rw-r--r--psx/psx_test.go68
-rw-r--r--tests/.gitignore7
-rw-r--r--tests/Makefile116
-rw-r--r--tests/exploit.c154
-rw-r--r--tests/libcap_launch_test.c173
-rw-r--r--tests/libcap_psx_test.c50
-rw-r--r--tests/noop.c5
-rw-r--r--tests/psx_test.c150
162 files changed, 11562 insertions, 857 deletions
diff --git a/Android.bp b/Android.bp
index fb54b86..4644987 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,25 +1,47 @@
-// Copyright (C) 2015 The Android Open Source Project
+package {
+ default_applicable_licenses: ["external_libcap_license"],
+}
+
+// Added automatically by a large-scale-change that took the approach of
+// 'apply every license found to every target'. While this makes sure we respect
+// every license restriction, it may not be entirely correct.
+//
+// e.g. GPL in an MIT project might only apply to the contrib/ directory.
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+// Please consider splitting the single license below into multiple licenses,
+// taking care not to lose any license_kind information, and overriding the
+// default license using the 'licenses: [...]' property on targets as needed.
//
-// http://www.apache.org/licenses/LICENSE-2.0
+// For unused files, consider creating a 'fileGroup' with "//visibility:private"
+// to attach the license to, and including a comment whether the files may be
+// used in the current project.
//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// large-scale-change filtered out the below license kinds as false-positives:
+// SPDX-license-identifier-GPL
+// SPDX-license-identifier-GPL-2.0
+// SPDX-license-identifier-LGPL
+// See: http://go/android-license-faq
+license {
+ name: "external_libcap_license",
+ visibility: [":__subpackages__"],
+ license_kinds: [
+ "SPDX-license-identifier-Apache-2.0",
+ "SPDX-license-identifier-BSD",
+ "SPDX-license-identifier-BSD-3-Clause",
+ ],
+ license_text: [
+ "NOTICE",
+ ],
+}
cc_defaults {
name: "libcap_defaults",
cflags: [
- "-Wall",
- "-Werror",
+ "-Wno-pointer-arith",
+ "-Wno-tautological-compare",
"-Wno-unused-parameter",
"-Wno-unused-result",
- "-Wno-tautological-compare",
+ "-Wno-unused-variable",
],
}
@@ -27,6 +49,7 @@ cc_library {
name: "libcap",
host_supported: true,
vendor_available: true,
+ product_available: true,
recovery_available: true,
vndk: {
enabled: true,
@@ -41,6 +64,7 @@ cc_library {
"libcap/cap_proc.c",
"libcap/cap_text.c",
],
+ generated_headers: ["cap_names.h"],
local_include_dirs: ["libcap/include"],
export_include_dirs: ["libcap/include"],
@@ -52,22 +76,23 @@ cc_library {
linux_bionic: {
enabled: true,
},
+ linux_glibc: {
+ local_include_dirs: ["libcap/include/uapi"],
+ },
},
apex_available: [
"//apex_available:platform",
"com.android.adbd",
"com.android.media.swcodec",
+ "com.android.virt",
],
min_sdk_version: "29",
}
cc_binary {
name: "getcap",
-
- srcs: ["progs/getcap.c"],
-
defaults: ["libcap_defaults"],
-
+ srcs: ["progs/getcap.c"],
static_libs: [
"libcap",
],
@@ -75,12 +100,45 @@ cc_binary {
cc_binary {
name: "setcap",
-
- srcs: ["progs/setcap.c"],
-
defaults: ["libcap_defaults"],
-
+ srcs: ["progs/setcap.c"],
static_libs: [
"libcap",
],
}
+
+//
+// Generate cap_names.list.h from the <linux/capability.h> header (using awk).
+//
+
+genrule {
+ name: "cap_names.list.h",
+ srcs: ["libcap/include/uapi/linux/capability.h"],
+ out: ["cap_names.list.h"],
+ tool_files: [":generate_cap_names_list.awk"],
+ cmd: "awk -f $(location :generate_cap_names_list.awk) $(in) > $(out)",
+}
+
+filegroup {
+ name: "generate_cap_names_list.awk",
+ srcs: ["generate_cap_names_list.awk"],
+}
+
+//
+// Generate cap_names.h from cap_names.list.h (using _makenames).
+//
+
+genrule {
+ name: "cap_names.h",
+ out: ["cap_names.h"],
+ tools: [":_makenames"],
+ cmd: "$(location :_makenames) > $(out)",
+}
+
+cc_binary {
+ name: "_makenames",
+ defaults: ["libcap_defaults"],
+ srcs: ["libcap/_makenames.c"],
+ generated_headers: ["cap_names.list.h"],
+ host_supported: true,
+}
diff --git a/CHANGELOG b/CHANGELOG
index 70e5faf..5489c48 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,17 +1,15 @@
-For release notes and other info pointers:
+For release notes and other info pointers for submitting patches etc.:
- http://sites.google.com/site/fullycapable/
+ http://sites.google.com/site/fullycapable/
-See GIT repository for detailed source history
+See GIT repository for detailed source history:
- https://git.kernel.org/cgit/linux/kernel/git/morgan/libcap.git/
+ https://git.kernel.org/pub/scm/libs/libcap/libcap.git/
Or simply download the source:
- git clone git://git.kernel.org/pub/scm/linux/kernel/git/morgan/libcap.git
+ git clone git://git.kernel.org/pub/scm/libs/libcap/libcap.git
The license for this library is here:
- https://git.kernel.org/cgit/linux/kernel/git/morgan/libcap.git/tree/License
-
-please submit patches compatible with this to morgan at kernel.org.
+ https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/License
diff --git a/METADATA b/METADATA
index bd26017..076bc04 100644
--- a/METADATA
+++ b/METADATA
@@ -5,10 +5,11 @@ third_party {
type: GIT
value: "https://git.kernel.org/pub/scm/linux/kernel/git/morgan/libcap.git"
}
- version: "dce069b617cf5e42fde707196eaf2ee8d62bc96c"
+ version: "libcap-2.48"
+ license_type: NOTICE
last_upgrade_date {
- year: 2016
- month: 11
- day: 2
+ year: 2021
+ month: 2
+ day: 5
}
}
diff --git a/Make.Rules b/Make.Rules
index 8347b26..ded9014 100644
--- a/Make.Rules
+++ b/Make.Rules
@@ -1,8 +1,13 @@
+# Common version number defines for libcap
+LIBTITLE=libcap
+VERSION=2
+MINOR=48
+
#
## Optional prefixes:
#
-# common 'packaging' directoty
+# common 'packaging' directory
FAKEROOT=$(DESTDIR)
@@ -35,51 +40,127 @@ MANDIR=$(man_prefix)/man
SBINDIR=$(exec_prefix)/sbin
INCDIR=$(inc_prefix)/include
LIBDIR=$(lib_prefix)/$(lib)
-PKGCONFIGDIR=$(prefix)/$(lib)/pkgconfig
+PKGCONFIGDIR=$(LIBDIR)/pkgconfig
+GOPKGDIR=$(prefix)/share/gocode/src
-# common defines for libcap
-LIBTITLE=libcap
-VERSION=2
-MINOR=25
-#
+# Once go1.16 is released, I plan to set this value to 1 and keep it
+# there. The Go packages should always remain backwardly compatible,
+# but I may have to up it if Go's syntax dramatically changes in a
+# backwards incompatible manner. (Let's hope not.)
+GOMAJOR=0
# Compilation specifics
KERNEL_HEADERS := $(topdir)/libcap/include/uapi
IPATH += -fPIC -I$(KERNEL_HEADERS) -I$(topdir)/libcap/include
-CC := gcc
-CFLAGS := -O2 -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-BUILD_CC := $(CC)
-BUILD_CFLAGS := $(CFLAGS) $(IPATH)
-AR := ar
-RANLIB := ranlib
+CC := $(CROSS_COMPILE)gcc
+DEFINES := -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+COPTS ?= -O2
+CFLAGS ?= $(COPTS) $(DEFINES)
+BUILD_CC ?= $(CC)
+BUILD_COPTS ?= -O2
+BUILD_CFLAGS ?= $(BUILD_COPTS) $(DEFINES) $(IPATH)
+AR := $(CROSS_COMPILE)ar
+RANLIB := $(CROSS_COMPILE)ranlib
DEBUG = -g #-DDEBUG
WARNINGS=-Wall -Wwrite-strings \
-Wpointer-arith -Wcast-qual -Wcast-align \
-Wstrict-prototypes -Wmissing-prototypes \
-Wnested-externs -Winline -Wshadow
LD=$(CC) -Wl,-x -shared
-LDFLAGS := #-g
+LDFLAGS ?= #-g
+LIBCAPLIB := -L$(topdir)/libcap -lcap
+PSXLINKFLAGS := -lpthread -Wl,-wrap,pthread_create
+LIBPSXLIB := -L$(topdir)/libcap -lpsx $(PSXLINKFLAGS)
+
BUILD_GPERF := $(shell which gperf >/dev/null 2>/dev/null && echo yes)
SYSTEM_HEADERS = /usr/include
INCS=$(topdir)/libcap/include/sys/capability.h
LDFLAGS += -L$(topdir)/libcap
CFLAGS += -Dlinux $(WARNINGS) $(DEBUG)
-PAM_CAP := $(shell if [ -f /usr/include/security/pam_modules.h ]; then echo yes ; else echo no ; fi)
INDENT := $(shell if [ -n "$$(which indent 2>/dev/null)" ]; then echo "| indent -kr" ; fi)
-DYNAMIC := $(shell if [ ! -d "$(topdir)/.git" ]; then echo yes; fi)
-# When installing setcap, set its inheritable bit to be able to place
-# capabilities on files. It can be used in conjunction with pam_cap
-# (associated with su and certain users say) to make it useful for
-# specially blessed users. If you wish to drop this install feature,
-# use this command when running install
+# SHARED tracks whether or not the SHARED libraries (libcap.so,
+# libpsx.so and pam_cap.so) are built. (Some environments don't
+# support shared libraries.)
+SHARED ?= yes
+# DYNAMIC controls how capsh etc are linked - to shared or static libraries
+DYNAMIC := $(shell if [ ! -d "$(topdir)/.git" ]; then echo $(SHARED); else echo no ; fi)
+
+PAM_CAP ?= $(shell if [ -f /usr/include/security/pam_modules.h ]; then echo $(SHARED) ; else echo no ; fi)
+
+# If your system does not support pthreads, override this as "no".
+#
+# make PTHREADS=no ...
+#
+# This implies no Go support and no C/C++ libpsx build. Why might you
+# need libpsx for non-Go use? Tl;dr for POSIX semantics security:
+#
+# https://sites.google.com/site/fullycapable/who-ordered-libpsx
+#
+PTHREADS ?= yes
+
+ifeq ($(PTHREADS),yes)
+GO ?= go
+GOLANG ?= $(shell if [ -n "$(shell $(GO) version 2>/dev/null)" ]; then echo yes ; else echo no ; fi)
+ifeq ($(GOLANG),yes)
+GOROOT ?= $(shell $(GO) env GOROOT)
+GOCGO ?= $(shell if [ "$(shell $(GO) env CGO_ENABLED)" = 1 ]; then echo yes ; else echo no ; fi)
+GOOSARCH ?= $(shell $(GO) env GOHOSTOS)_$(shell $(GO) env GOHOSTARCH)
+CGO_REQUIRED=$(shell $(topdir)/go/cgo-required.sh $(GO))
+ifeq ($(CGO_REQUIRED),1)
+# Strictly speaking go1.15 doesn't need this, but 1.16 is when the
+# real golang support arrives for non-cgo support, so drop the last
+# vestige of legacy workarounds then.
+CGO_LDFLAGS_ALLOW := -Wl,-?-wrap[=,][^-.@][^,]*
+endif
+CGO_CFLAGS := -I$(topdir)/libcap/include
+CGO_LDFLAGS := -L$(topdir)/libcap
+endif
+endif
+
+# If you want capsh to launch with something other than /bin/bash
+# build like this:
+#
+# make CAPSH_SHELL='-DSHELL=\"/bin/sh\"'
+#
+# or undefine the following:
+#CAPSH_SHELL := '-DSHELL="/bin/sh"'
+
+# When installing setcap, you can arrange for the installation process
+# to set its inheritable bit to be able to place capabilities on files.
+# It can be used in conjunction with pam_cap (associated with su and
+# certain users say) to make it useful for specially blessed users.
#
-# make RAISE_SETFCAP=no install
+# make RAISE_SETFCAP=yes install
#
-RAISE_SETFCAP := yes
+# This is now defaulted to no because some distributions have started
+# shipping with all users blessed with full inheritable sets which makes
+# no sense whatsoever!
+#
+# Indeed, it looks alarmingly like these distributions are recreating
+# the environment for what became known as the sendmail-capabilities
+# bug from 2000:
+#
+# https://sites.google.com/site/fullycapable/Home/thesendmailcapabilitiesissue
+#
+# they are also nullifying the difference between a p-bit and an i-bit.
+#
+# Folk really should read this document, which explains there is a really
+# important difference being lost here:
+#
+# https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/33528.pdf
+#
+# In the context of this tree, on such such systems, a yes setting will
+# guarantee that every user, by default, is able to bless any binary with
+# any capability - a ready made local exploit machanism.
+RAISE_SETFCAP := no
+
+# If set to yes, this will cause the go "web" demo app to force the needed p
+# bit to be able to bind to port 80 without running as root.
+RAISE_GO_FILECAP := no
# Global cleanup stuff
diff --git a/Makefile b/Makefile
index ad58c3a..7150b9b 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,11 @@ all install clean kdebug: %: %-here
ifneq ($(PAM_CAP),no)
$(MAKE) -C pam_cap $@
endif
+ifeq ($(GOLANG),yes)
+ $(MAKE) -C go $@
+ rm -f cap/go.sum
+endif
+ $(MAKE) -C tests $@
$(MAKE) -C progs $@
$(MAKE) -C doc $@
$(MAKE) -C kdebug $@
@@ -26,13 +31,60 @@ clean-here:
distclean: clean
$(DISTCLEAN)
+ @echo "CONFIRM Go package cap has right version dependency on cap/psx:"
+ for x in $$(find . -name go.mod); do grep -F -v "module" $$x | fgrep "kernel.org/pub/linux/libs/security/libcap" > /dev/null || continue ; grep -F "v$(GOMAJOR).$(VERSION).$(MINOR)" $$x > /dev/null && continue ; echo "$$x is not updated to v$(GOMAJOR).$(VERSION).$(MINOR)" ; exit 1 ; done
+ @echo "ALL go.mod files updated"
+ @echo "Now validate that everything is checked in to a clean tree.."
+ test -z "$$(git status --ignored -s)"
+ @echo "All good!"
release: distclean
- cd .. && ln -s libcap libcap-$(VERSION).$(MINOR) && tar cvf libcap-$(VERSION).$(MINOR).tar libcap-$(VERSION).$(MINOR)/* && rm libcap-$(VERSION).$(MINOR)
- cd .. && gpg -sba -u E2CCF3F4 libcap-$(VERSION).$(MINOR).tar
+ cd .. && ln -s libcap libcap-$(VERSION).$(MINOR) && tar cvf libcap-$(VERSION).$(MINOR).tar --exclude patches libcap-$(VERSION).$(MINOR)/* && rm libcap-$(VERSION).$(MINOR)
+
+test: all
+ $(MAKE) -C libcap $@
+ $(MAKE) -C tests $@
+ifneq ($(PAM_CAP),no)
+ $(MAKE) -C pam_cap $@
+endif
+ifeq ($(GOLANG),yes)
+ $(MAKE) -C go $@
+endif
+ $(MAKE) -C progs $@
-tagrelease: distclean
- @echo "sign the tag twice: older DSA key; and newer RSA kernel.org key"
- git tag -u D41A6DF2 -s libcap-$(VERSION).$(MINOR)
- git tag -u E2CCF3F4 -s libcap-korg-$(VERSION).$(MINOR)
- make release
+sudotest: all
+ $(MAKE) -C tests $@
+ifneq ($(PAM_CAP),no)
+ $(MAKE) -C pam_cap $@
+endif
+ifeq ($(GOLANG),yes)
+ $(MAKE) -C go $@
+endif
+ $(MAKE) -C progs $@
+
+distcheck:
+ ./distcheck.sh
+ $(MAKE) DYNAMIC=yes clean all test sudotest
+ $(MAKE) CC=/usr/local/musl/bin/musl-gcc clean all test sudotest
+ $(MAKE) clean all test sudotest
+ $(MAKE) distclean
+
+morgangodoc:
+ @echo "Now the release is made, you want to remember to run:"
+ @echo
+ @echo "GOPROXY=https://proxy.golang.org GO111MODULE=on go get kernel.org/pub/linux/libs/security/libcap/cap@v$(GOMAJOR).$(VERSION).$(MINOR)"
+ @echo
+ @echo "This will cause a go.dev documentation update."
+
+morganrelease: distcheck
+ @echo "sign the main library tag twice: older DSA key; and newer RSA (kernel.org) key"
+ git tag -u D41A6DF2 -s libcap-$(VERSION).$(MINOR) -m "This is libcap-$(VERSION).$(MINOR)"
+ git tag -u E2CCF3F4 -s libcap-korg-$(VERSION).$(MINOR) -m "This is libcap-$(VERSION).$(MINOR)"
+ @echo "The following are for the Go module tracking."
+ git tag -u D41A6DF2 -s v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the version tag for the 'libcap' Go base directory associated with libcap-$(VERSION).$(MINOR)."
+ git tag -u D41A6DF2 -s psx/v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the version tag for the 'psx' Go package associated with libcap-$(VERSION).$(MINOR)."
+ git tag -u D41A6DF2 -s cap/v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the version tag for the 'cap' Go package associated with libcap-$(VERSION).$(MINOR)."
+ $(MAKE) release
+ @echo "sign the tar file using korg key"
+ cd .. && gpg -sba -u E2CCF3F4 libcap-$(VERSION).$(MINOR).tar
+ $(MAKE) morgangodoc
diff --git a/README b/README
index 7356eac..6ba482c 100644
--- a/README
+++ b/README
@@ -1,28 +1,48 @@
This is a library for getting and setting POSIX.1e (formerly POSIX 6)
draft 15 capabilities.
+Natively supported languages are C/C++ and Go.
+
This library would not have been possible without the help of
Aleph1, Roland Buresund and Andrew Main, Alexander Kjeldaas.
-More information on capabilities in the Linux kernel can be found at
+More information on capabilities in the Linux kernel, links to the
+official git repostitory for libcap, release notes and how to report
+bugs can be found at:
http://sites.google.com/site/fullycapable/
-# INSTALLATION
+# BUILDING AND INSTALLATION
+
+ $ make
+
+ builds the library and the programs that are expected
+ to work on your system. For example, if you have
+ Linux-PAM installed, pam_cap is built. A golang
+ installation is required to build the Go packages.
+
+ $ make test
+
+ runs all of the tests not requiring privilege
+
+ $ make sudotest
- Linux-Caps % make
+ runs all of the tests including those that require privilege.
- builds the library and the programs
+ $ sudo make install
- Linux-Caps % make install
+ default installs the library libcap.XX.Y in /lib[64]/
+ the binaries in /sbin/
+ the header files in /usr/include
+ the {libcap,libpsx}.pc files in /usr/lib[64]/pkgconfig
+ the Go packages (if built) under /usr/share/gocode/src
- installs the library libcap.XX.Y in /lib[64]/
- the binaries in /sbin/
- the <sys/capability.h> file in /usr/include
- the libcap.pc file in /usr/lib[64]/pkgconfig
+For some example C programs look in the progs/ directory. Specifically,
+capsh, getpcaps, setcap and getcap.
-* for some example programs look in progs.
+Go example programs are to be found in the goapps/ directory. There
+are also some more complicated integration tests in the go/ directory.
Cheers
diff --git a/cap/LICENSE b/cap/LICENSE
new file mode 100644
index 0000000..1c65641
--- /dev/null
+++ b/cap/LICENSE
@@ -0,0 +1,385 @@
+Unless otherwise *explicitly* stated, the following text describes the
+licensed conditions under which the contents of this libcap/cap release
+may be used and distributed:
+
+-------------------------------------------------------------------------
+Redistribution and use in source and binary forms of libcap/cap, with
+or without modification, are permitted provided that the following
+conditions are met:
+
+1. Redistributions of source code must retain any existing copyright
+ notice, and this entire permission notice in its entirety,
+ including the disclaimer of warranties.
+
+2. Redistributions in binary form must reproduce all prior and current
+ copyright notices, this list of conditions, and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+3. The name of any author may not be used to endorse or promote
+ products derived from this software without their specific prior
+ written permission.
+
+ALTERNATIVELY, this product may be distributed under the terms of the
+GNU General Public License (v2.0 - see below), in which case the
+provisions of the GNU GPL are required INSTEAD OF the above
+restrictions. (This clause is necessary due to a potential conflict
+between the GNU GPL and the restrictions contained in a BSD-style
+copyright.)
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+-------------------------------------------------------------------------
+
+-------------------------
+Full text of gpl-2.0.txt:
+-------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/cap/README b/cap/README
new file mode 100644
index 0000000..3ac8433
--- /dev/null
+++ b/cap/README
@@ -0,0 +1,10 @@
+Package cap is the libcap API for Linux Capabilities written in
+Go. The official release announcement site for libcap is:
+
+ https://sites.google.com/site/fullycapable/
+
+Like libcap, the cap package is distributed with a "you choose"
+License. Specifically: BSD three clause, or GPL2. See the LICENSE
+file.
+
+Andrew G. Morgan <morgan@kernel.org>
diff --git a/cap/cap.go b/cap/cap.go
new file mode 100644
index 0000000..5ccef59
--- /dev/null
+++ b/cap/cap.go
@@ -0,0 +1,461 @@
+// Package cap provides all the Linux Capabilities userspace library API
+// bindings in native Go.
+//
+// Capabilities are a feature of the Linux kernel that allow fine
+// grain permissions to perform privileged operations. Privileged
+// operations are required to do irregular system level operations
+// from code. You can read more about how Capabilities are intended to
+// work here:
+//
+// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/33528.pdf
+//
+// This package supports native Go bindings for all the features
+// described in that paper as well as supporting subsequent changes to
+// the kernel for other styles of inheritable Capability.
+//
+// Some simple things you can do with this package are:
+//
+// // Read and display the capabilities of the running process
+// c := cap.GetProc()
+// log.Printf("this process has these caps:", c)
+//
+// // Drop any privilege a process might have (including for root,
+// // but note root 'owns' a lot of system files so a cap-limited
+// // root can still do considerable damage to a running system).
+// old := cap.GetProc()
+// empty := cap.NewSet()
+// if err := empty.SetProc(); err != nil {
+// log.Fatalf("failed to drop privilege: %q -> %q: %v", old, empty, err)
+// }
+// now := cap.GetProc()
+// if cap.Differs(now.Compare(empty)) {
+// log.Fatalf("failed to fully drop privilege: have=%q, wanted=%q", now, empty)
+// }
+//
+// See https://sites.google.com/site/fullycapable/ for recent updates,
+// some more complete walk-through examples of ways of using
+// 'cap.Set's etc and information on how to file bugs.
+//
+// For CGo linked binaries, behind the scenes, the package
+// "kernel.org/pub/linux/libs/security/libcap/psx" is used to perform
+// POSIX semantics system calls that manipulate thread state
+// uniformly over the whole Go (and CGo linked) process runtime.
+//
+// Note, if the Go runtime syscall interface contains the Linux
+// variant syscall.AllThreadsSyscall() API (it debuted in go1.16 see
+// https://github.com/golang/go/issues/1435 for its history) then
+// the "psx" package will use that to invoke Capability setting system
+// calls in pure Go binaries. In such an enhanced Go runtime, to force
+// this behavior, use the CGO_ENABLED=0 environment variable.
+//
+//
+// Copyright (c) 2019-21 Andrew G. Morgan <morgan@kernel.org>
+//
+// The cap and psx packages are licensed with a (you choose) BSD
+// 3-clause or GPL2. See LICENSE file for details.
+package cap // import "kernel.org/pub/linux/libs/security/libcap/cap"
+
+import (
+ "errors"
+ "sort"
+ "sync"
+ "syscall"
+ "unsafe"
+)
+
+// Value is the type of a single capability (or permission) bit.
+type Value uint
+
+// Flag is the type of one of the three Value dimensions held in a
+// Set. It is also used in the (*IAB).Fill() method for changing the
+// Bounding and Ambient Vectors.
+type Flag uint
+
+// Effective, Permitted, Inheritable are the three Flags of Values
+// held in a Set.
+const (
+ Effective Flag = iota
+ Permitted
+ Inheritable
+)
+
+// String identifies a Flag value by its conventional "e", "p" or "i"
+// string abbreviation.
+func (f Flag) String() string {
+ switch f {
+ case Effective:
+ return "e"
+ case Permitted:
+ return "p"
+ case Inheritable:
+ return "i"
+ default:
+ return "<Error>"
+ }
+}
+
+// data holds a 32-bit slice of the compressed bitmaps of capability
+// sets as understood by the kernel.
+type data [Inheritable + 1]uint32
+
+// Set is an opaque capabilities container for a set of system
+// capbilities. It holds individually addressable capability Value's
+// for the three capability Flag's. See GetFlag() and SetFlag() for
+// how to adjust them individually, and Clear() and ClearFlag() for
+// how to do bulk operations.
+//
+// For admin tasks associated with managing namespace specific file
+// capabilities, Set can also support a namespace-root-UID value which
+// defaults to zero. See GetNSOwner() and SetNSOwner().
+type Set struct {
+ // mu protects all other members of a Set.
+ mu sync.RWMutex
+
+ // flat holds Flag Value bitmaps for all capabilities
+ // associated with this Set.
+ flat []data
+
+ // Linux specific
+ nsRoot int
+}
+
+// Various known kernel magic values.
+const (
+ kv1 = 0x19980330 // First iteration of process capabilities (32 bits).
+ kv2 = 0x20071026 // First iteration of process and file capabilities (64 bits) - deprecated.
+ kv3 = 0x20080522 // Most recently supported process and file capabilities (64 bits).
+)
+
+var (
+ // starUp protects setting of the following values: magic,
+ // words, maxValues.
+ startUp sync.Once
+
+ // magic holds the preferred magic number for the kernel ABI.
+ magic uint32
+
+ // words holds the number of uint32's associated with each
+ // capability Flag for this session.
+ words int
+
+ // maxValues holds the number of bit values that are named by
+ // the running kernel. This is generally expected to match
+ // ValueCount which is autogenerated at packaging time.
+ maxValues uint
+)
+
+type header struct {
+ magic uint32
+ pid int32
+}
+
+// scwMu is used to fully serialize the write system calls. Note, this
+// is generally not necesary, but in the case of Launch we get into a
+// situation where the launching thread is temporarily allowed to
+// deviate from the kernel state of the rest of the runtime and
+// allowing other threads to perform w* syscalls will potentially
+// interfere with the launching process.
+var scwMu sync.Mutex
+
+// syscaller is a type for abstracting syscalls. The r* variants are
+// for reading state, and can be parallelized, the w* variants need to
+// be serialized so all OS threads can share state.
+type syscaller struct {
+ r3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ w3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ r6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ w6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+}
+
+// caprcall provides a pointer etc wrapper for the system calls
+// associated with getcap.
+//go:uintptrescapes
+func (sc *syscaller) caprcall(call uintptr, h *header, d []data) error {
+ x := uintptr(0)
+ if d != nil {
+ x = uintptr(unsafe.Pointer(&d[0]))
+ }
+ _, _, err := sc.r3(call, uintptr(unsafe.Pointer(h)), x, 0)
+ if err != 0 {
+ return err
+ }
+ return nil
+}
+
+// capwcall provides a pointer etc wrapper for the system calls
+// associated with setcap.
+//go:uintptrescapes
+func (sc *syscaller) capwcall(call uintptr, h *header, d []data) error {
+ x := uintptr(0)
+ if d != nil {
+ x = uintptr(unsafe.Pointer(&d[0]))
+ }
+ _, _, err := sc.w3(call, uintptr(unsafe.Pointer(h)), x, 0)
+ if err != 0 {
+ return err
+ }
+ return nil
+}
+
+// prctlrcall provides a wrapper for the prctl systemcalls that only
+// read kernel state. There is a limited number of arguments needed
+// and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlrcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := sc.r3(syscall.SYS_PRCTL, prVal, v1, v2)
+ if err != 0 {
+ return int(r), err
+ }
+ return int(r), nil
+}
+
+// prctlrcall6 provides a wrapper for the prctl systemcalls that only
+// read kernel state and require 6 arguments - ambient cap API, I'm
+// looking at you. There is a limited number of arguments needed and
+// the caller should use 0 for those not needed.
+func (sc *syscaller) prctlrcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+ r, _, err := sc.r6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+ if err != 0 {
+ return int(r), err
+ }
+ return int(r), nil
+}
+
+// prctlwcall provides a wrapper for the prctl systemcalls that
+// write/modify kernel state. Where available, these will use the
+// POSIX semantics fixup system calls. There is a limited number of
+// arguments needed and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlwcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := sc.w3(syscall.SYS_PRCTL, prVal, v1, v2)
+ if err != 0 {
+ return int(r), err
+ }
+ return int(r), nil
+}
+
+// prctlwcall6 provides a wrapper for the prctl systemcalls that
+// write/modify kernel state and require 6 arguments - ambient cap
+// API, I'm looking at you. (Where available, these will use the POSIX
+// semantics fixup system calls). There is a limited number of
+// arguments needed and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlwcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+ r, _, err := sc.w6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+ if err != 0 {
+ return int(r), err
+ }
+ return int(r), nil
+}
+
+// cInit perfoms the lazy identification of the capability vintage of
+// the running system.
+func (sc *syscaller) cInit() {
+ h := &header{
+ magic: kv3,
+ }
+ sc.caprcall(syscall.SYS_CAPGET, h, nil)
+ magic = h.magic
+ switch magic {
+ case kv1:
+ words = 1
+ case kv2, kv3:
+ words = 2
+ default:
+ // Fall back to a known good version.
+ magic = kv3
+ words = 2
+ }
+ // Use the bounding set to evaluate which capabilities exist.
+ maxValues = uint(sort.Search(32*words, func(n int) bool {
+ _, err := GetBound(Value(n))
+ return err != nil
+ }))
+ if maxValues == 0 {
+ // Fall back to using the largest value defined at build time.
+ maxValues = NamedCount
+ }
+}
+
+// MaxBits returns the number of kernel-named capabilities discovered
+// at runtime in the current system.
+func MaxBits() Value {
+ startUp.Do(multisc.cInit)
+ return Value(maxValues)
+}
+
+// NewSet returns an empty capability set.
+func NewSet() *Set {
+ startUp.Do(multisc.cInit)
+ return &Set{
+ flat: make([]data, words),
+ }
+}
+
+// ErrBadSet indicates a nil pointer was used for a *Set, or the
+// request of the Set is invalid in some way.
+var ErrBadSet = errors.New("bad capability set")
+
+// Dup returns a copy of the specified capability set.
+func (c *Set) Dup() (*Set, error) {
+ if c == nil || len(c.flat) == 0 {
+ return nil, ErrBadSet
+ }
+ n := NewSet()
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ copy(n.flat, c.flat)
+ n.nsRoot = c.nsRoot
+ return n, nil
+}
+
+// GetPID returns the capability set associated with the target process
+// id; pid=0 is an alias for current.
+func GetPID(pid int) (*Set, error) {
+ v := NewSet()
+ if err := multisc.caprcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
+ return nil, err
+ }
+ return v, nil
+}
+
+// GetProc returns the capability Set of the current process. If the
+// kernel is unable to determine the Set associated with the current
+// process, the function panic()s.
+func GetProc() *Set {
+ c, err := GetPID(0)
+ if err != nil {
+ panic(err)
+ }
+ return c
+}
+
+func (sc *syscaller) setProc(c *Set) error {
+ if c == nil || len(c.flat) == 0 {
+ return ErrBadSet
+ }
+ return sc.capwcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
+}
+
+// SetProc attempts to set the capability Set of the current
+// process. The kernel will perform permission checks and an error
+// will be returned if the attempt fails. Should the attempt fail
+// no process capabilities will have been modified.
+func (c *Set) SetProc() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setProc(c)
+}
+
+// defines from uapi/linux/prctl.h
+const (
+ prCapBSetRead = 23
+ prCapBSetDrop = 24
+)
+
+// GetBound determines if a specific capability is currently part of
+// the local bounding set. On systems where the bounding set Value is
+// not present, this function returns an error.
+func GetBound(val Value) (bool, error) {
+ v, err := multisc.prctlrcall(prCapBSetRead, uintptr(val), 0)
+ if err != nil {
+ return false, err
+ }
+ return v > 0, nil
+}
+
+//go:uintptrescapes
+func (sc *syscaller) dropBound(val ...Value) error {
+ for _, v := range val {
+ if _, err := sc.prctlwcall(prCapBSetDrop, uintptr(v), 0); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// DropBound attempts to suppress bounding set Values. The kernel will
+// never allow a bounding set Value bit to be raised once successfully
+// dropped. However, dropping requires the current process is
+// sufficiently capable (usually via cap.SETPCAP being raised in the
+// Effective flag of the process' Set). Note, the drops are performed
+// in order and if one bounding value cannot be dropped, the function
+// returns immediately with an error which may leave the system in an
+// ill-defined state. The caller can determine where things went wrong
+// using GetBound().
+func DropBound(val ...Value) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.dropBound(val...)
+}
+
+// defines from uapi/linux/prctl.h
+const (
+ prCapAmbient = 47
+
+ prCapAmbientIsSet = 1
+ prCapAmbientRaise = 2
+ prCapAmbientLower = 3
+ prCapAmbientClearAll = 4
+)
+
+// GetAmbient determines if a specific capability is currently part of
+// the local ambient set. On systems where the ambient set Value is
+// not present, this function returns an error.
+func GetAmbient(val Value) (bool, error) {
+ r, err := multisc.prctlrcall6(prCapAmbient, prCapAmbientIsSet, uintptr(val), 0, 0, 0)
+ return r > 0, err
+}
+
+//go:uintptrescapes
+func (sc *syscaller) setAmbient(enable bool, val ...Value) error {
+ dir := uintptr(prCapAmbientLower)
+ if enable {
+ dir = prCapAmbientRaise
+ }
+ for _, v := range val {
+ _, err := sc.prctlwcall6(prCapAmbient, dir, uintptr(v), 0, 0, 0)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// SetAmbient attempts to set a specific Value bit to the state,
+// enable. This function will return an error if insufficient
+// permission is available to perform this task. The settings are
+// performed in order and the function returns immediately an error is
+// detected. Use GetAmbient() to unravel where things went
+// wrong. Note, the cap package manages an abstraction IAB that
+// captures all three inheritable vectors in a single type. Consider
+// using that.
+func SetAmbient(enable bool, val ...Value) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setAmbient(enable, val...)
+}
+
+func (sc *syscaller) resetAmbient() error {
+ var v bool
+ var err error
+
+ for c := Value(0); !v; c++ {
+ if v, err = GetAmbient(c); err != nil {
+ // no non-zero values found.
+ return nil
+ }
+ }
+ _, err = sc.prctlwcall6(prCapAmbient, prCapAmbientClearAll, 0, 0, 0, 0)
+ return err
+}
+
+// ResetAmbient attempts to ensure the Ambient set is fully
+// cleared. It works by first reading the set and if it finds any bits
+// raised it will attempt a reset. The test before attempting a reset
+// behavior is a workaround for situations where the Ambient API is
+// locked, but a reset is not actually needed. No Ambient bit not
+// already raised in both the Permitted and Inheritable Set is allowed
+// to be raised by the kernel.
+func ResetAmbient() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.resetAmbient()
+}
diff --git a/cap/cap_test.go b/cap/cap_test.go
new file mode 100644
index 0000000..017c565
--- /dev/null
+++ b/cap/cap_test.go
@@ -0,0 +1,214 @@
+package cap
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestAllMask(t *testing.T) {
+ oldMask := maxValues
+ oldWords := words
+ defer func() {
+ maxValues = oldMask
+ words = oldWords
+ }()
+
+ maxValues = 35
+ words = 3
+
+ vs := []struct {
+ val Value
+ index uint
+ bit uint32
+ mask uint32
+ }{
+ {val: CHOWN, index: 0, bit: 0x1, mask: ^uint32(0)},
+ {val: 38, index: 1, bit: (1 << 6), mask: 0x7},
+ {val: 34, index: 1, bit: (1 << 2), mask: 0x7},
+ {val: 65, index: 2, bit: (1 << 1), mask: 0},
+ }
+ for i, v := range vs {
+ index, bit, err := bitOf(Inheritable, v.val)
+ if err != nil {
+ t.Fatalf("[%d] %v(%d) - not bitOf: %v", i, v.val, v.val, err)
+ } else if index != v.index {
+ t.Errorf("[%d] %v(%d) - index: got=%d want=%d", i, v.val, v.val, index, v.index)
+ }
+ if bit != v.bit {
+ t.Errorf("[%d] %v(%d) - bit: got=%b want=%b", i, v.val, v.val, bit, v.bit)
+ }
+ if mask := allMask(index); mask != v.mask {
+ t.Errorf("[%d] %v(%d) - mask: got=%b want=%b", i, v.val, v.val, mask, v.mask)
+ }
+ }
+}
+
+func TestString(t *testing.T) {
+ a := CHOWN
+ if got, want := a.String(), "cap_chown"; got != want {
+ t.Fatalf("pretty basic failure: got=%q, want=%q", got, want)
+ }
+}
+
+func TestText(t *testing.T) {
+ vs := []struct {
+ from, to string
+ err error
+ }{
+ {"", "", ErrBadText},
+ {"=", "=", nil},
+ {"= cap_chown+iep cap_chown-i", "cap_chown=ep", nil},
+ {"= cap_setfcap,cap_chown+iep cap_chown-i", "cap_setfcap=eip cap_chown+ep", nil},
+ {"cap_setfcap,cap_chown=iep cap_chown-i", "cap_setfcap=eip cap_chown+ep", nil},
+ {"=i =p", "=p", nil},
+ {"all+pie", "=eip", nil},
+ {"all=p+ie-e", "=ip", nil},
+ }
+ for i, v := range vs {
+ c, err := FromText(v.from)
+ if err != v.err {
+ t.Errorf("[%d] parsing %q failed: got=%v, want=%v", i, v.from, err, v.err)
+ continue
+ }
+ if err != nil {
+ continue
+ }
+ to := c.String()
+ if to != v.to {
+ t.Errorf("[%d] failed to stringify cap: %q -> got=%q, want=%q", i, v.from, to, v.to)
+ }
+ if d, err := FromText(to); err != nil {
+ t.Errorf("[%d] failed to reparse %q: %v", i, to, err)
+ } else if got := d.String(); got != to {
+ t.Errorf("[%d] failed to stringify %q getting %q", i, to, got)
+ }
+ }
+}
+
+func same(a, b *Set) error {
+ if (a == nil) != (b == nil) {
+ return fmt.Errorf("nil-ness miscompare: %q vs %v", a, b)
+ }
+ if a == nil {
+ return nil
+ }
+ if a.nsRoot != b.nsRoot {
+ return fmt.Errorf("capabilities differ in nsRoot: a=%d b=%d", a.nsRoot, b.nsRoot)
+ }
+ for i, f := range a.flat {
+ g := b.flat[i]
+ for s := Effective; s <= Inheritable; s++ {
+ if got, want := f[s], g[s]; got != want {
+ return fmt.Errorf("capabilities differ: a[%d].flat[%v]=0x%08x b[%d].flat[%v]=0x%08x", i, s, got, i, s, want)
+ }
+ }
+ }
+ return nil
+}
+
+func TestImportExport(t *testing.T) {
+ wantQ := "=ep cap_chown-e 63+ip"
+ if q, err := FromText(wantQ); err != nil {
+ t.Fatalf("failed to parse %q: %v", wantQ, err)
+ } else if gotQ := q.String(); gotQ != wantQ {
+ t.Fatalf("static test failed %q -> q -> %q", wantQ, gotQ)
+ }
+
+ // Sanity check empty import/export.
+ c := NewSet()
+ if ex, err := c.Export(); err != nil {
+ t.Fatalf("failed to export empty set: %v", err)
+ } else if len(ex) != 5 {
+ t.Fatalf("wrong length: got=%d want=%d", len(ex), 5)
+ } else if im, err := Import(ex); err != nil {
+ t.Fatalf("failed to import empty set: %v", err)
+ } else if got, want := im.String(), c.String(); got != want {
+ t.Fatalf("import != export: got=%q want=%q", got, want)
+ }
+ // Now keep flipping bits on and off and validate that all
+ // forms of import/export work.
+ for i := uint(0); i < 7000; i += 13 {
+ s := Flag(i % 3)
+ v := Value(i % (maxValues + 3))
+ c.SetFlag(s, i&17 < 8, v)
+ if ex, err := c.Export(); err != nil {
+ t.Fatalf("[%d] failed to export (%q): %v", i, c, err)
+ } else if im, err := Import(ex); err != nil {
+ t.Fatalf("[%d] failed to import (%q) set: %v", i, c, err)
+ } else if got, want := im.String(), c.String(); got != want {
+ t.Fatalf("[%d] import != export: got=%q want=%q [%02x]", i, got, want, ex)
+ } else if parsed, err := FromText(got); err != nil {
+ t.Fatalf("[%d] failed to parse %q: %v", i, got, err)
+ } else if err := same(c, parsed); err != nil {
+ t.Fatalf("[%d] miscompare (%q vs. %q): %v", i, got, parsed, err)
+ }
+ }
+}
+
+func TestIAB(t *testing.T) {
+ vs := []struct {
+ text string
+ bad bool
+ }{
+ {text: "cup_full", bad: true},
+ {text: ""},
+ {text: "!%cap_chown"},
+ {text: "!cap_chown,^cap_setuid"},
+ {text: "cap_chown,cap_setuid"},
+ {text: "^cap_chown,cap_setuid"},
+ {text: "^cap_chown,!cap_setuid"},
+ }
+ for i, v := range vs {
+ want := v.text
+ iab, err := IABFromText(want)
+ if err != nil {
+ if v.bad {
+ continue
+ }
+ t.Errorf("[%d] want=%q, got=%q", i, want, iab)
+ continue
+ }
+ if got := iab.String(); got != want {
+ t.Errorf("[%d] got=%q want=%q", i, got, want)
+ }
+ }
+
+ one, err := GetPID(1)
+ if err != nil {
+ t.Fatalf("failed to get init's capabilities: %v", err)
+ }
+ iab := IABInit()
+ iab.Fill(Amb, one, Permitted)
+ for i := 0; i < words; i++ {
+ if iab.i[i] != iab.a[i] {
+ t.Errorf("[%d] i=0x%08x != a=0x%08x", i, iab.i[i], iab.a[i])
+ }
+ }
+ one.ClearFlag(Inheritable)
+ iab.Fill(Inh, one, Inheritable)
+ for i := 0; i < words; i++ {
+ if iab.i[i] != iab.a[i] {
+ t.Errorf("[%d] i=0x%08x != a=0x%08x", i, iab.i[i], iab.a[i])
+ }
+ }
+
+ for n := uint(0); n < 1000; n += 13 {
+ enabled := ((n % 5) & 2) != 0
+ vec := Vector(n % 3)
+ c := Value(n % maxValues)
+ if err := iab.SetVector(vec, enabled, c); err != nil {
+ t.Errorf("[%d] failed to set vec=%v enabled=%v %q in %q", n, vec, enabled, c, iab)
+ continue
+ }
+ replay, err := IABFromText(iab.String())
+ if err != nil {
+ t.Errorf("failed to replay: %v", err)
+ continue
+ }
+ for i := 0; i < words; i++ {
+ if replay.i[i] != iab.i[i] || replay.a[i] != iab.a[i] || replay.nb[i] != iab.nb[i] {
+ t.Errorf("[%d,%d] got=%q want=%q", n, i, replay, iab)
+ }
+ }
+ }
+}
diff --git a/cap/convenience.go b/cap/convenience.go
new file mode 100644
index 0000000..9580903
--- /dev/null
+++ b/cap/convenience.go
@@ -0,0 +1,292 @@
+package cap
+
+import (
+ "errors"
+ "syscall"
+ "unsafe"
+)
+
+// This file contains convenience functions for libcap, to help
+// users do the right thing with respect to capabilities for
+// common actions.
+
+// Secbits capture the prctl settable secure-bits of a process.
+type Secbits uint
+
+// SecbitNoRoot etc are the bitmasks associated with the supported
+// Secbit masks. Source: uapi/linux/securebits.h
+const (
+ SecbitNoRoot Secbits = 1 << iota
+ SecbitNoRootLocked
+ SecbitNoSetUIDFixup
+ SecbitNoSetUIDFixupLocked
+ SecbitKeepCaps
+ SecbitKeepCapsLocked
+ SecbitNoCapAmbientRaise
+ SecbitNoCapAmbientRaiseLocked
+)
+
+const (
+ securedBasicBits = SecbitNoRoot | SecbitNoRootLocked | SecbitNoSetUIDFixup | SecbitNoSetUIDFixupLocked | SecbitKeepCapsLocked
+ securedAmbientBits = securedBasicBits | SecbitNoCapAmbientRaise | SecbitNoCapAmbientRaiseLocked
+)
+
+// defines from uapi/linux/prctl.h
+const (
+ prSetKeepCaps = 8
+ prGetSecureBits = 27
+ prSetSecureBits = 28
+ prSetNoNewPrivs = 38
+)
+
+// GetSecbits returns the current setting of the process' Secbits.
+func GetSecbits() Secbits {
+ v, err := multisc.prctlrcall(prGetSecureBits, 0, 0)
+ if err != nil {
+ panic(err)
+ }
+ return Secbits(v)
+}
+
+func (sc *syscaller) setSecbits(s Secbits) error {
+ _, err := sc.prctlwcall(prSetSecureBits, uintptr(s), 0)
+ return err
+}
+
+// Set attempts to force the process Secbits to a value. This function
+// will raise cap.SETPCAP in order to achieve this operation, and will
+// completely lower the Effective vector of the process returning.
+func (s Secbits) Set() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setSecbits(s)
+}
+
+// Mode summarizes a complicated secure-bits and capability mode in a
+// libcap preferred way.
+type Mode uint
+
+// ModeUncertain etc are how libcap summarizes security modes
+// involving capabilities and secure-bits.
+const (
+ ModeUncertain Mode = iota
+ ModeNoPriv
+ ModePure1EInit
+ ModePure1E
+)
+
+// GetMode assesses the current process state and summarizes it as
+// a Mode. This function always succeeds. Unfamiliar modes are
+// declared ModeUncertain.
+func GetMode() Mode {
+ b := GetSecbits()
+ if b&securedBasicBits != securedBasicBits {
+ return ModeUncertain
+ }
+
+ for c := Value(0); ; c++ {
+ v, err := GetAmbient(c)
+ if err != nil {
+ if c != 0 && b != securedAmbientBits {
+ return ModeUncertain
+ }
+ break
+ }
+ if v {
+ return ModeUncertain
+ }
+ }
+
+ w := GetProc()
+ e := NewSet()
+ cf, _ := w.Compare(e)
+
+ if Differs(cf, Inheritable) {
+ return ModePure1E
+ }
+ if Differs(cf, Permitted) || Differs(cf, Effective) {
+ return ModePure1EInit
+ }
+
+ for c := Value(0); ; c++ {
+ v, err := GetBound(c)
+ if err != nil {
+ break
+ }
+ if v {
+ return ModePure1EInit
+ }
+ }
+
+ return ModeNoPriv
+}
+
+// ErrBadMode is the error returned when an attempt is made to set an
+// unrecognized libcap security mode.
+var ErrBadMode = errors.New("unsupported mode")
+
+func (sc *syscaller) setMode(m Mode) error {
+ w := GetProc()
+ defer func() {
+ w.ClearFlag(Effective)
+ sc.setProc(w)
+ }()
+
+ if err := w.SetFlag(Effective, true, SETPCAP); err != nil {
+ return err
+ }
+ if err := sc.setProc(w); err != nil {
+ return err
+ }
+
+ if m == ModeNoPriv || m == ModePure1EInit {
+ w.ClearFlag(Inheritable)
+ } else if m != ModePure1E {
+ return ErrBadMode
+ }
+
+ sb := securedAmbientBits
+ if _, err := GetAmbient(0); err != nil {
+ sb = securedBasicBits
+ } else if err := sc.resetAmbient(); err != nil {
+ return err
+ }
+
+ if err := sc.setSecbits(sb); err != nil {
+ return err
+ }
+
+ if m != ModeNoPriv {
+ return nil
+ }
+
+ for c := Value(0); sc.dropBound(c) == nil; c++ {
+ }
+ w.ClearFlag(Permitted)
+
+ // For good measure.
+ sc.prctlwcall6(prSetNoNewPrivs, 1, 0, 0, 0, 0)
+
+ return nil
+}
+
+// Set attempts to enter the specified mode. An attempt is made to
+// enter the mode, so if you prefer this operation to be a no-op if
+// entering the same mode, call only if CurrentMode() disagrees with
+// the desired mode.
+//
+// This function will raise cap.SETPCAP in order to achieve this
+// operation, and will completely lower the Effective Flag of the
+// process' Set before returning. This function may fail for lack of
+// permission or because (some of) the Secbits are already locked for
+// the current process.
+func (m Mode) Set() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setMode(m)
+}
+
+// String returns the libcap conventional string for this mode.
+func (m Mode) String() string {
+ switch m {
+ case ModeUncertain:
+ return "UNCERTAIN"
+ case ModeNoPriv:
+ return "NOPRIV"
+ case ModePure1EInit:
+ return "PURE1E_INIT"
+ case ModePure1E:
+ return "PURE1E"
+ default:
+ return "UNKNOWN"
+ }
+}
+
+func (sc *syscaller) setUID(uid int) error {
+ w := GetProc()
+ defer func() {
+ w.ClearFlag(Effective)
+ sc.setProc(w)
+ }()
+
+ if err := w.SetFlag(Effective, true, SETUID); err != nil {
+ return err
+ }
+
+ // these may or may not work depending on whether or not they
+ // are locked. We try them just in case.
+ sc.prctlwcall(prSetKeepCaps, 1, 0)
+ defer sc.prctlwcall(prSetKeepCaps, 0, 0)
+
+ if err := sc.setProc(w); err != nil {
+ return err
+ }
+
+ if _, _, err := sc.w3(syscall.SYS_SETUID, uintptr(uid), 0, 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+// SetUID is a convenience function for robustly setting the UID and
+// all other variants of UID (EUID etc) to the specified value without
+// dropping the privilege of the current process. This function will
+// raise cap.SETUID in order to achieve this operation, and will
+// completely lower the Effective vector of the process before
+// returning. Unlike the traditional method of dropping privilege when
+// changing from [E]UID=0 to some other UID, this function only
+// performs a change of UID cap.SETUID is available, and the action
+// does not alter the Permitted Flag of the process' Set.
+func SetUID(uid int) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setUID(uid)
+}
+
+//go:uintptrescapes
+func (sc *syscaller) setGroups(gid int, suppl []int) error {
+ w := GetProc()
+ defer func() {
+ w.ClearFlag(Effective)
+ sc.setProc(w)
+ }()
+
+ if err := w.SetFlag(Effective, true, SETGID); err != nil {
+ return err
+ }
+ if err := sc.setProc(w); err != nil {
+ return err
+ }
+
+ if _, _, err := sc.w3(syscall.SYS_SETGID, uintptr(gid), 0, 0); err != 0 {
+ return err
+ }
+ if len(suppl) == 0 {
+ if _, _, err := sc.w3(sysSetGroupsVariant, 0, 0, 0); err != 0 {
+ return err
+ }
+ return nil
+ }
+
+ // On linux gid values are 32-bits.
+ gs := make([]uint32, len(suppl))
+ for i, g := range suppl {
+ gs[i] = uint32(g)
+ }
+ if _, _, err := sc.w3(sysSetGroupsVariant, uintptr(len(suppl)), uintptr(unsafe.Pointer(&gs[0])), 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+// SetGroups is a convenience function for robustly setting the GID
+// and all other variants of GID (EGID etc) to the specified value, as
+// well as setting all of the supplementary groups. This function will
+// raise cap.SETGID in order to achieve this operation, and will
+// completely lower the Effective Flag of the process Set before
+// returning.
+func SetGroups(gid int, suppl ...int) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setGroups(gid, suppl)
+}
diff --git a/cap/file.go b/cap/file.go
new file mode 100644
index 0000000..6658f1b
--- /dev/null
+++ b/cap/file.go
@@ -0,0 +1,386 @@
+package cap
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "io"
+ "os"
+ "syscall"
+ "unsafe"
+)
+
+// uapi/linux/xattr.h defined.
+var (
+ xattrNameCaps, _ = syscall.BytePtrFromString("security.capability")
+)
+
+// uapi/linux/capability.h defined.
+const (
+ vfsCapRevisionMask = uint32(0xff000000)
+ vfsCapFlagsMask = ^vfsCapRevisionMask
+ vfsCapFlagsEffective = uint32(1)
+
+ vfsCapRevision1 = uint32(0x01000000)
+ vfsCapRevision2 = uint32(0x02000000)
+ vfsCapRevision3 = uint32(0x03000000)
+)
+
+// Data types stored in little-endian order.
+
+type vfsCaps1 struct {
+ MagicEtc uint32
+ Data [1]struct {
+ Permitted, Inheritable uint32
+ }
+}
+
+type vfsCaps2 struct {
+ MagicEtc uint32
+ Data [2]struct {
+ Permitted, Inheritable uint32
+ }
+}
+
+type vfsCaps3 struct {
+ MagicEtc uint32
+ Data [2]struct {
+ Permitted, Inheritable uint32
+ }
+ RootID uint32
+}
+
+// ErrBadSize indicates the the loaded file capability has
+// an invalid number of bytes in it.
+var ErrBadSize = errors.New("filecap bad size")
+
+// ErrBadMagic indicates that the kernel preferred magic number for
+// capability Set values is not supported by this package. This
+// generally implies you are using an exceptionally old
+// "../libcap/cap" package. An upgrade is needed, or failing that see
+// https://sites.google.com/site/fullycapable/ for how to file a bug.
+var ErrBadMagic = errors.New("unsupported magic")
+
+// ErrBadPath indicates a failed attempt to set a file capability on
+// an irregular (non-executable) file.
+var ErrBadPath = errors.New("file is not a regular executable")
+
+// digestFileCap unpacks a file capability and returns it in a *Set
+// form.
+func digestFileCap(d []byte, sz int, err error) (*Set, error) {
+ if err != nil {
+ return nil, err
+ }
+ var raw1 vfsCaps1
+ var raw2 vfsCaps2
+ var raw3 vfsCaps3
+ if sz < binary.Size(raw1) || sz > binary.Size(raw3) {
+ return nil, ErrBadSize
+ }
+ b := bytes.NewReader(d[:sz])
+ var magicEtc uint32
+ if err = binary.Read(b, binary.LittleEndian, &magicEtc); err != nil {
+ return nil, err
+ }
+
+ c := NewSet()
+ b.Seek(0, io.SeekStart)
+ switch magicEtc & vfsCapRevisionMask {
+ case vfsCapRevision1:
+ if err = binary.Read(b, binary.LittleEndian, &raw1); err != nil {
+ return nil, err
+ }
+ data := raw1.Data[0]
+ c.flat[0][Permitted] = data.Permitted
+ c.flat[0][Inheritable] = data.Inheritable
+ if raw1.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+ c.flat[0][Effective] = data.Inheritable | data.Permitted
+ }
+ case vfsCapRevision2:
+ if err = binary.Read(b, binary.LittleEndian, &raw2); err != nil {
+ return nil, err
+ }
+ for i, data := range raw2.Data {
+ c.flat[i][Permitted] = data.Permitted
+ c.flat[i][Inheritable] = data.Inheritable
+ if raw2.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+ c.flat[i][Effective] = data.Inheritable | data.Permitted
+ }
+ }
+ case vfsCapRevision3:
+ if err = binary.Read(b, binary.LittleEndian, &raw3); err != nil {
+ return nil, err
+ }
+ for i, data := range raw3.Data {
+ c.flat[i][Permitted] = data.Permitted
+ c.flat[i][Inheritable] = data.Inheritable
+ if raw3.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+ c.flat[i][Effective] = data.Inheritable | data.Permitted
+ }
+ }
+ c.nsRoot = int(raw3.RootID)
+ default:
+ return nil, ErrBadMagic
+ }
+ return c, nil
+}
+
+//go:uintptrescapes
+
+// GetFd returns the file capabilities of an open (*os.File).Fd().
+func GetFd(file *os.File) (*Set, error) {
+ var raw3 vfsCaps3
+ d := make([]byte, binary.Size(raw3))
+ sz, _, oErr := multisc.r6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ var err error
+ if oErr != 0 {
+ err = oErr
+ }
+ return digestFileCap(d, int(sz), err)
+}
+
+//go:uintptrescapes
+
+// GetFile returns the file capabilities of a named file.
+func GetFile(path string) (*Set, error) {
+ p, err := syscall.BytePtrFromString(path)
+ if err != nil {
+ return nil, err
+ }
+ var raw3 vfsCaps3
+ d := make([]byte, binary.Size(raw3))
+ sz, _, oErr := multisc.r6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ if oErr != 0 {
+ err = oErr
+ }
+ return digestFileCap(d, int(sz), err)
+}
+
+// GetNSOwner returns the namespace owner UID of the capability Set.
+func (c *Set) GetNSOwner() (int, error) {
+ if magic < kv3 {
+ return 0, ErrBadMagic
+ }
+ return c.nsRoot, nil
+}
+
+// SetNSOwner adds an explicit namespace owner UID to the capability
+// Set. This is only honored when generating file capabilities, and is
+// generally for use by a setup process when installing binaries that
+// use file capabilities to become capable inside a namespace to be
+// administered by that UID. If capability aware code within that
+// namespace writes file capabilities without explicitly setting such
+// a UID, the kernel will fix-up the capabilities to be specific to
+// that owner. In this way, the kernel prevents filesystem
+// capabilities from leaking out of that restricted namespace.
+func (c *Set) SetNSOwner(uid int) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ c.nsRoot = uid
+}
+
+// packFileCap transforms a system capability into a VFS form. Because
+// of the way Linux stores capabilities in the file extended
+// attributes, the process is a little lossy with respect to effective
+// bits.
+func (c *Set) packFileCap() ([]byte, error) {
+ var magic uint32
+ switch words {
+ case 1:
+ if c.nsRoot != 0 {
+ return nil, ErrBadSet // nsRoot not supported for single DWORD caps.
+ }
+ magic = vfsCapRevision1
+ case 2:
+ if c.nsRoot == 0 {
+ magic = vfsCapRevision2
+ break
+ }
+ magic = vfsCapRevision3
+ }
+ if magic == 0 {
+ return nil, ErrBadSize
+ }
+ eff := uint32(0)
+ for _, f := range c.flat {
+ eff |= (f[Permitted] | f[Inheritable]) & f[Effective]
+ }
+ if eff != 0 {
+ magic |= vfsCapFlagsEffective
+ }
+ b := new(bytes.Buffer)
+ binary.Write(b, binary.LittleEndian, magic)
+ for _, f := range c.flat {
+ binary.Write(b, binary.LittleEndian, f[Permitted])
+ binary.Write(b, binary.LittleEndian, f[Inheritable])
+ }
+ if c.nsRoot != 0 {
+ binary.Write(b, binary.LittleEndian, c.nsRoot)
+ }
+ return b.Bytes(), nil
+}
+
+//go:uintptrescapes
+
+// SetFd attempts to set the file capabilities of an open
+// (*os.File).Fd(). This function can also be used to delete a file's
+// capabilities, by calling with c = nil.
+//
+// Note, Linux does not store the full Effective Value Flag in the
+// metadata for the file. Only a single Effective bit is stored in
+// this metadata. This single bit is non-zero if the Effective vector
+// has any overlapping bits with the Permitted or Inheritable vector
+// of c. This may appear suboptimal, but the reasoning behind it is
+// sound. Namely, the purpose of the Effective bit it to support
+// capabability unaware binaries that will only work if they magically
+// launch with the needed bits already raised (this bit is sometimes
+// referred to simply as the 'legacy' bit). Without *full* support for
+// capability manipulation, as it is provided in this "../libcap/cap"
+// package, this was the only way for Go programs to make use of
+// file capabilities.
+//
+// The preferred way a binary will actually manipulate its
+// file-acquired capabilities is to carefully and deliberately use
+// this package (or libcap, assisted by libpsx, for threaded C/C++
+// family code).
+func (c *Set) SetFd(file *os.File) error {
+ if c == nil {
+ if _, _, err := multisc.r6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ return err
+ }
+ return nil
+ }
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ d, err := c.packFileCap()
+ if err != nil {
+ return err
+ }
+ if _, _, err := multisc.r6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+//go:uintptrescapes
+
+// SetFile attempts to set the file capabilities of the specfied
+// filename. This function can also be used to delete a file's
+// capabilities, by calling with c = nil.
+//
+// Note, see the comment for SetFd() for some non-obvious behavior of
+// Linux for the Effective Value vector on the modified file.
+func (c *Set) SetFile(path string) error {
+ fi, err := os.Stat(path)
+ if err != nil {
+ return err
+ }
+ mode := fi.Mode()
+ if mode&os.ModeType != 0 {
+ return ErrBadPath
+ }
+ if mode&os.FileMode(0111) == 0 {
+ return ErrBadPath
+ }
+ p, err := syscall.BytePtrFromString(path)
+ if err != nil {
+ return err
+ }
+ if c == nil {
+ if _, _, err := multisc.r6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ return err
+ }
+ return nil
+ }
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ d, err := c.packFileCap()
+ if err != nil {
+ return err
+ }
+ if _, _, err := multisc.r6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+// ExtMagic is the 32-bit (little endian) magic for an external
+// capability set. It can be used to transmit capabilities in binary
+// format in a Linux portable way. The format is:
+// <ExtMagic><byte:length><length-bytes*3-of-cap-data>.
+const ExtMagic = uint32(0x5101c290)
+
+// Import imports a Set from a byte array where it has been stored in
+// a portable (lossless) way.
+func Import(d []byte) (*Set, error) {
+ b := bytes.NewBuffer(d)
+ var m uint32
+ if err := binary.Read(b, binary.LittleEndian, &m); err != nil {
+ return nil, ErrBadSize
+ } else if m != ExtMagic {
+ return nil, ErrBadMagic
+ }
+ var n byte
+ if err := binary.Read(b, binary.LittleEndian, &n); err != nil {
+ return nil, ErrBadSize
+ }
+ c := NewSet()
+ if int(n) > 4*words {
+ return nil, ErrBadSize
+ }
+ f := make([]byte, 3)
+ for i := 0; i < words; i++ {
+ for j := uint(0); n > 0 && j < 4; j++ {
+ n--
+ if x, err := b.Read(f); err != nil || x != 3 {
+ return nil, ErrBadSize
+ }
+ sh := 8 * j
+ c.flat[i][Effective] |= uint32(f[0]) << sh
+ c.flat[i][Permitted] |= uint32(f[1]) << sh
+ c.flat[i][Inheritable] |= uint32(f[2]) << sh
+ }
+ }
+ return c, nil
+}
+
+// Export exports a Set into a lossless byte array format where it is
+// stored in a portable way. Note, any namespace owner in the Set
+// content is not exported by this function.
+func (c *Set) Export() ([]byte, error) {
+ if c == nil {
+ return nil, ErrBadSet
+ }
+ b := new(bytes.Buffer)
+ binary.Write(b, binary.LittleEndian, ExtMagic)
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ var n = byte(0)
+ for i, f := range c.flat {
+ if u := f[Effective] | f[Permitted] | f[Inheritable]; u != 0 {
+ n = 4 * byte(i)
+ for ; u != 0; u >>= 8 {
+ n++
+ }
+ }
+ }
+ b.Write([]byte{n})
+ for _, f := range c.flat {
+ if n == 0 {
+ break
+ }
+ eff, per, inh := f[Effective], f[Permitted], f[Inheritable]
+ for i := 0; n > 0 && i < 4; i++ {
+ n--
+ b.Write([]byte{
+ byte(eff & 0xff),
+ byte(per & 0xff),
+ byte(inh & 0xff),
+ })
+ eff >>= 8
+ per >>= 8
+ inh >>= 8
+ }
+ }
+ return b.Bytes(), nil
+}
diff --git a/cap/flags.go b/cap/flags.go
new file mode 100644
index 0000000..b800a2d
--- /dev/null
+++ b/cap/flags.go
@@ -0,0 +1,161 @@
+package cap
+
+import "errors"
+
+// GetFlag determines if the requested bit is enabled in the Flag
+// vector of the capability Set.
+func (c *Set) GetFlag(vec Flag, val Value) (bool, error) {
+ if c == nil || len(c.flat) == 0 {
+ // Checked this first, because otherwise we are sure
+ // cInit has been called.
+ return false, ErrBadSet
+ }
+ offset, mask, err := bitOf(vec, val)
+ if err != nil {
+ return false, err
+ }
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ return c.flat[offset][vec]&mask != 0, nil
+}
+
+// SetFlag sets the requested bits to the indicated enable state. This
+// function does not perform any security checks, so values can be set
+// out-of-order. Only when the Set is used to SetProc() etc., will the
+// bits be checked for validity and permission by the kernel. If the
+// function returns an error, the Set will not be modified.
+func (c *Set) SetFlag(vec Flag, enable bool, val ...Value) error {
+ if c == nil || len(c.flat) == 0 {
+ // Checked this first, because otherwise we are sure
+ // cInit has been called.
+ return ErrBadSet
+ }
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ // Make a backup.
+ replace := make([]uint32, words)
+ for i := range replace {
+ replace[i] = c.flat[i][vec]
+ }
+ var err error
+ for _, v := range val {
+ offset, mask, err2 := bitOf(vec, v)
+ if err2 != nil {
+ err = err2
+ break
+ }
+ if enable {
+ c.flat[offset][vec] |= mask
+ } else {
+ c.flat[offset][vec] &= ^mask
+ }
+ }
+ if err == nil {
+ return nil
+ }
+ // Clean up.
+ for i, bits := range replace {
+ c.flat[i][vec] = bits
+ }
+ return err
+}
+
+// Clear fully clears a capability set.
+func (c *Set) Clear() error {
+ if c == nil || len(c.flat) == 0 {
+ return ErrBadSet
+ }
+ // startUp.Do(cInit) is not called here because c cannot be
+ // initialized except via this package and doing that will
+ // perform that call at least once (sic).
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ c.flat = make([]data, words)
+ c.nsRoot = 0
+ return nil
+}
+
+// ErrBadValue indicates a bad capability value was specified.
+var ErrBadValue = errors.New("bad capability value")
+
+// bitOf converts from a Value into the offset and mask for a
+// specific Value bit in the compressed (kernel ABI) representation of
+// a capability vector. If the requested bit is unsupported, an error
+// is returned.
+func bitOf(vec Flag, val Value) (uint, uint32, error) {
+ if vec > Inheritable || val > Value(words*32) {
+ return 0, 0, ErrBadValue
+ }
+ u := uint(val)
+ return u / 32, uint32(1) << (u % 32), nil
+}
+
+// allMask returns the mask of valid bits in the all mask for index.
+func allMask(index uint) (mask uint32) {
+ if maxValues == 0 {
+ panic("uninitialized package")
+ }
+ base := 32 * uint(index)
+ if maxValues <= base {
+ return
+ }
+ if maxValues >= 32+base {
+ mask = ^mask
+ return
+ }
+ mask = uint32((uint64(1) << (maxValues % 32)) - 1)
+ return
+}
+
+// forceFlag sets 'all' capability values (supported by the kernel) of
+// a flag vector to enable.
+func (c *Set) forceFlag(vec Flag, enable bool) error {
+ if c == nil || len(c.flat) == 0 || vec > Inheritable {
+ return ErrBadSet
+ }
+ m := uint32(0)
+ if enable {
+ m = ^m
+ }
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ for i := range c.flat {
+ c.flat[i][vec] = m & allMask(uint(i))
+ }
+ return nil
+}
+
+// ClearFlag clears a specific vector of Values associated with the
+// specified Flag.
+func (c *Set) ClearFlag(vec Flag) error {
+ return c.forceFlag(vec, false)
+}
+
+// Compare returns 0 if c and d are identical in content. Otherwise,
+// this function returns a non-zero value of 3 independent bits:
+// (differE ? 1:0) | (differP ? 2:0) | (differI ? 4:0). The Differs()
+// function can be used to test for a difference in a specific Flag.
+func (c *Set) Compare(d *Set) (uint, error) {
+ if c == nil || len(c.flat) == 0 || d == nil || len(d.flat) == 0 {
+ return 0, ErrBadSet
+ }
+ var cf uint
+ for i := 0; i < words; i++ {
+ if c.flat[i][Effective]^d.flat[i][Effective] != 0 {
+ cf |= (1 << Effective)
+ }
+ if c.flat[i][Permitted]^d.flat[i][Permitted] != 0 {
+ cf |= (1 << Permitted)
+ }
+ if c.flat[i][Inheritable]^d.flat[i][Inheritable] != 0 {
+ cf |= (1 << Inheritable)
+ }
+ }
+ return cf, nil
+}
+
+// Differs processes the result of Compare and determines if the
+// Flag's components were different.
+func Differs(cf uint, vec Flag) bool {
+ return cf&(1<<vec) != 0
+}
diff --git a/cap/go.mod b/cap/go.mod
new file mode 100644
index 0000000..45e38fa
--- /dev/null
+++ b/cap/go.mod
@@ -0,0 +1,5 @@
+module kernel.org/pub/linux/libs/security/libcap/cap
+
+go 1.11
+
+require kernel.org/pub/linux/libs/security/libcap/psx v0.2.48
diff --git a/cap/iab.go b/cap/iab.go
new file mode 100644
index 0000000..877ed12
--- /dev/null
+++ b/cap/iab.go
@@ -0,0 +1,287 @@
+package cap
+
+import "strings"
+
+// omask returns the offset and mask for a specific capability.
+func omask(c Value) (uint, uint32) {
+ u := uint(c)
+ return u >> 5, uint32(1) << (u & 31)
+}
+
+// IAB holds a summary of all of the inheritable capability vectors:
+// Inh, Amb and Bound. The Bound vector is the logical inverse (two's
+// complement) of the process' Bounding set. That is, raising a Value
+// in the Bound (think blocked) vector is equivalent to dropping that
+// Value from the process' Bounding set. This convention is used to
+// support the empty IAB as being mostly harmless.
+type IAB struct {
+ a, i, nb []uint32
+}
+
+// Vector enumerates which of the inheritable IAB capability vectors
+// is being manipulated.
+type Vector uint
+
+// Inh, Amb, Bound enumerate the IAB vector components. (Vector) Inh
+// is equivalent to (Flag) Inheritable. They are named differently for
+// syntax/type checking reasons.
+const (
+ Inh Vector = iota
+ Amb
+ Bound
+)
+
+// String identifies a Vector value by its conventional I A or B
+// string abbreviation.
+func (v Vector) String() string {
+ switch v {
+ case Inh:
+ return "I"
+ case Amb:
+ return "A"
+ case Bound:
+ return "B"
+ default:
+ return "<Error>"
+ }
+}
+
+// IABInit returns an empty IAB.
+func IABInit() *IAB {
+ startUp.Do(multisc.cInit)
+ return &IAB{
+ i: make([]uint32, words),
+ a: make([]uint32, words),
+ nb: make([]uint32, words),
+ }
+}
+
+// IABGetProc summarizes the Inh, Amb and Bound capabilty vectors of
+// the current process.
+func IABGetProc() *IAB {
+ iab := IABInit()
+ current := GetProc()
+ iab.Fill(Inh, current, Inheritable)
+ for c := MaxBits(); c > 0; {
+ c--
+ offset, mask := omask(c)
+ if a, _ := GetAmbient(c); a {
+ iab.a[offset] |= mask
+ }
+ if b, err := GetBound(c); err == nil && !b {
+ iab.nb[offset] |= mask
+ }
+ }
+ return iab
+}
+
+// IABFromText parses a string representing an IAB, as generated
+// by IAB.String(), to generate an IAB.
+func IABFromText(text string) (*IAB, error) {
+ iab := IABInit()
+ if len(text) == 0 {
+ return iab, nil
+ }
+ for _, f := range strings.Split(text, ",") {
+ var i, a, nb bool
+ var j int
+ for j = 0; j < len(f); j++ {
+ switch f[j : j+1] {
+ case "!":
+ nb = true
+ case "^":
+ i = true
+ a = true
+ case "%":
+ i = true
+ default:
+ goto done
+ }
+ }
+ done:
+ c, err := FromName(f[j:])
+ if err != nil {
+ return nil, err
+ }
+ offset, mask := omask(c)
+ if i || !nb {
+ iab.i[offset] |= mask
+ }
+ if a {
+ iab.a[offset] |= mask
+ }
+ if nb {
+ iab.nb[offset] |= mask
+ }
+ }
+ return iab, nil
+}
+
+// String serializes an IAB to a string format.
+func (iab *IAB) String() string {
+ var vs []string
+ for c := Value(0); c < Value(maxValues); c++ {
+ offset, mask := omask(c)
+ i := (iab.i[offset] & mask) != 0
+ a := (iab.a[offset] & mask) != 0
+ nb := (iab.nb[offset] & mask) != 0
+ var cs []string
+ if nb {
+ cs = append(cs, "!")
+ }
+ if a {
+ cs = append(cs, "^")
+ } else if nb && i {
+ cs = append(cs, "%")
+ }
+ if nb || a || i {
+ vs = append(vs, strings.Join(cs, "")+c.String())
+ }
+ }
+ return strings.Join(vs, ",")
+}
+
+func (sc *syscaller) iabSetProc(iab *IAB) (err error) {
+ temp := GetProc()
+ var raising uint32
+ for i := 0; i < words; i++ {
+ newI := iab.i[i]
+ oldIP := temp.flat[i][Inheritable] | temp.flat[i][Permitted]
+ raising |= (newI & ^oldIP) | iab.a[i] | iab.nb[i]
+ temp.flat[i][Inheritable] = newI
+ }
+ working, err2 := temp.Dup()
+ if err2 != nil {
+ err = err2
+ return
+ }
+ if raising != 0 {
+ if err = working.SetFlag(Effective, true, SETPCAP); err != nil {
+ return
+ }
+ if err = sc.setProc(working); err != nil {
+ return
+ }
+ }
+ defer func() {
+ if err2 := sc.setProc(temp); err == nil {
+ err = err2
+ }
+ }()
+ if err = sc.resetAmbient(); err != nil {
+ return
+ }
+ for c := Value(maxValues); c > 0; {
+ c--
+ offset, mask := omask(c)
+ if iab.a[offset]&mask != 0 {
+ err = sc.setAmbient(true, c)
+ }
+ if err == nil && iab.nb[offset]&mask != 0 {
+ err = sc.dropBound(c)
+ }
+ if err != nil {
+ return
+ }
+ }
+ return
+}
+
+// SetProc attempts to change the Inheritable, Ambient and Bounding
+// capabilty vectors of the current process using the content,
+// iab. The Bounding vector strongly affects the potential for setting
+// other bits, so this function carefully performs the the combined
+// operation in the most flexible manner.
+func (iab *IAB) SetProc() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.iabSetProc(iab)
+}
+
+// GetVector returns the raised state of the specific capability bit
+// of the indicated vector.
+func (iab *IAB) GetVector(vec Vector, val Value) (bool, error) {
+ if val >= MaxBits() {
+ return false, ErrBadValue
+ }
+ offset, mask := omask(val)
+ switch vec {
+ case Inh:
+ return (iab.i[offset] & mask) != 0, nil
+ case Amb:
+ return (iab.a[offset] & mask) != 0, nil
+ case Bound:
+ return (iab.nb[offset] & mask) != 0, nil
+ default:
+ return false, ErrBadValue
+ }
+}
+
+// SetVector sets all of the vals in the specified vector to the
+// raised value. Note, the Ambient vector cannot contain values not raised
+// in the Inh vector, so setting values directly in one vector may have
+// the side effect of mirroring the value in the other vector to
+// maintain this constraint. Note, raising a Bound vector bit is
+// equivalent to lowering the Bounding vector of the process (when
+// successfully applied with (*IAB).SetProc()).
+func (iab *IAB) SetVector(vec Vector, raised bool, vals ...Value) error {
+ for _, val := range vals {
+ if val >= Value(maxValues) {
+ return ErrBadValue
+ }
+ offset, mask := omask(val)
+ switch vec {
+ case Inh:
+ if raised {
+ iab.i[offset] |= mask
+ } else {
+ iab.i[offset] &= ^mask
+ iab.a[offset] &= ^mask
+ }
+ case Amb:
+ if raised {
+ iab.a[offset] |= mask
+ iab.i[offset] |= mask
+ } else {
+ iab.a[offset] &= ^mask
+ }
+ case Bound:
+ if raised {
+ iab.nb[offset] |= mask
+ } else {
+ iab.nb[offset] &= ^mask
+ }
+ default:
+ return ErrBadValue
+ }
+ }
+ return nil
+}
+
+// Fill fills one of the Inh, Amb and Bound capability vectors from
+// one of the flag vectors of a Set. Note, filling the Inh vector
+// will mask the Amb vector, and filling the Amb vector may raise
+// entries in the Inh vector. Further, when filling the Bound vector,
+// the bits are inverted from what you might expect - that is lowered
+// bits from the Set will be raised in the Bound vector.
+func (iab *IAB) Fill(vec Vector, c *Set, flag Flag) error {
+ if len(c.flat) != 0 || flag > Inheritable {
+ return ErrBadSet
+ }
+ for i := 0; i < words; i++ {
+ flat := c.flat[i][flag]
+ switch vec {
+ case Inh:
+ iab.i[i] = flat
+ iab.a[i] &= ^flat
+ case Amb:
+ iab.a[i] = flat
+ iab.i[i] |= ^flat
+ case Bound:
+ iab.nb[i] = ^flat
+ default:
+ return ErrBadSet
+ }
+ }
+ return nil
+}
diff --git a/cap/launch.go b/cap/launch.go
new file mode 100644
index 0000000..4ae449c
--- /dev/null
+++ b/cap/launch.go
@@ -0,0 +1,252 @@
+package cap
+
+import (
+ "errors"
+ "os"
+ "runtime"
+ "syscall"
+ "unsafe"
+)
+
+// Launcher holds a configuration for launching a child process with
+// capability state different from (generally more restricted than)
+// the parent.
+//
+// Note, go1.10 is the earliest version of the Go toolchain that can
+// support this abstraction.
+type Launcher struct {
+ path string
+ args []string
+ env []string
+
+ callbackFn func(pa *syscall.ProcAttr, data interface{}) error
+
+ changeUIDs bool
+ uid int
+
+ changeGIDs bool
+ gid int
+ groups []int
+
+ changeMode bool
+ mode Mode
+
+ iab *IAB
+
+ chroot string
+}
+
+// NewLauncher returns a new launcher for the specified program path
+// and args with the specified environment.
+func NewLauncher(path string, args []string, env []string) *Launcher {
+ return &Launcher{
+ path: path,
+ args: args,
+ env: env,
+ }
+}
+
+// Callback specifies a callback for Launch() to call before changing
+// privilege. The only thing that is assumed is that the OS thread in
+// use to call this callback function at launch time will be the one
+// that ultimately calls fork. Any returned error value of said
+// function will terminate the launch process. A nil callback (the
+// default) is ignored. The specified callback fn should not call any
+// "cap" package functions since this may deadlock or generate
+// undefined behavior for the parent process.
+func (attr *Launcher) Callback(fn func(*syscall.ProcAttr, interface{}) error) {
+ attr.callbackFn = fn
+}
+
+// SetUID specifies the UID to be used by the launched command.
+func (attr *Launcher) SetUID(uid int) {
+ attr.changeUIDs = true
+ attr.uid = uid
+}
+
+// SetGroups specifies the GID and supplementary groups for the
+// launched command.
+func (attr *Launcher) SetGroups(gid int, groups []int) {
+ attr.changeGIDs = true
+ attr.gid = gid
+ attr.groups = groups
+}
+
+// SetMode specifies the libcap Mode to be used by the launched command.
+func (attr *Launcher) SetMode(mode Mode) {
+ attr.changeMode = true
+ attr.mode = mode
+}
+
+// SetIAB specifies the AIB capability vectors to be inherited by the
+// launched command. A nil value means the prevailing vectors of the
+// parent will be inherited.
+func (attr *Launcher) SetIAB(iab *IAB) {
+ attr.iab = iab
+}
+
+// SetChroot specifies the chroot value to be used by the launched
+// command. An empty value means no-change from the prevailing value.
+func (attr *Launcher) SetChroot(root string) {
+ attr.chroot = root
+}
+
+// lResult is used to get the result from the doomed launcher thread.
+type lResult struct {
+ pid int
+ err error
+}
+
+// ErrLaunchFailed is returned if a launch was aborted with no more
+// specific error.
+var ErrLaunchFailed = errors.New("launch failed")
+
+// ErrNoLaunch indicates the go runtime available to this binary does
+// not reliably support launching. See cap.LaunchSupported.
+var ErrNoLaunch = errors.New("launch not supported")
+
+// ErrAmbiguousChroot indicates that the Launcher is being used in
+// addition to a callback supplied Chroot. The former should be used
+// exclusively for this.
+var ErrAmbiguousChroot = errors.New("use Launcher for chroot")
+
+// ErrAmbiguousIDs indicates that the Launcher is being used in
+// addition to a callback supplied Credentials. The former should be
+// used exclusively for this.
+var ErrAmbiguousIDs = errors.New("use Launcher for uids and gids")
+
+// ErrAmbiguousAmbient indicates that the Launcher is being used in
+// addition to a callback supplied ambient set and the former should
+// be used exclusively in a Launch call.
+var ErrAmbiguousAmbient = errors.New("use Launcher for ambient caps")
+
+// lName is the name we temporarily give to the launcher thread. Note,
+// this will likely stick around in the process tree if the Go runtime
+// is not cleaning up locked launcher OS threads.
+var lName = []byte("cap-launcher\000")
+
+// <uapi/linux/prctl.h>
+const prSetName = 15
+
+//go:uintptrescapes
+func launch(result chan<- lResult, attr *Launcher, data interface{}, quit chan<- struct{}) {
+ if quit != nil {
+ defer close(quit)
+ }
+
+ pid := syscall.Getpid()
+ // Wait until we are not scheduled on the parent thread. We
+ // will exit this thread once the child has launched, and
+ // don't want other goroutines to use this thread afterwards.
+ runtime.LockOSThread()
+ tid := syscall.Gettid()
+ if tid == pid {
+ // Force the go runtime to find a new thread to run on.
+ quit := make(chan struct{})
+ go launch(result, attr, data, quit)
+
+ // Wait for that go routine to complete.
+ <-quit
+ runtime.UnlockOSThread()
+ return
+ }
+
+ // By never releasing the LockOSThread here, we guarantee that
+ // the runtime will terminate the current OS thread once this
+ // function returns.
+
+ // Name the launcher thread - transient, but helps to debug if
+ // the callbackFn or something else hangs up.
+ singlesc.prctlrcall(prSetName, uintptr(unsafe.Pointer(&lName[0])), 0)
+
+ // Provide a way to serialize the caller on the thread
+ // completing.
+ defer close(result)
+
+ pa := &syscall.ProcAttr{
+ Files: []uintptr{0, 1, 2},
+ }
+ var err error
+ var needChroot bool
+
+ if len(attr.env) != 0 {
+ pa.Env = attr.env
+ } else {
+ pa.Env = os.Environ()
+ }
+
+ if attr.callbackFn != nil {
+ if err = attr.callbackFn(pa, data); err != nil {
+ goto abort
+ }
+ }
+
+ if needChroot, err = validatePA(pa, attr.chroot); err != nil {
+ goto abort
+ }
+ if attr.changeUIDs {
+ if err = singlesc.setUID(attr.uid); err != nil {
+ goto abort
+ }
+ }
+ if attr.changeGIDs {
+ if err = singlesc.setGroups(attr.gid, attr.groups); err != nil {
+ goto abort
+ }
+ }
+ if attr.changeMode {
+ if err = singlesc.setMode(attr.mode); err != nil {
+ goto abort
+ }
+ }
+ if attr.iab != nil {
+ if err = singlesc.iabSetProc(attr.iab); err != nil {
+ goto abort
+ }
+ }
+
+ if needChroot {
+ c := GetProc()
+ if err = c.SetFlag(Effective, true, SYS_CHROOT); err != nil {
+ goto abort
+ }
+ if err = singlesc.setProc(c); err != nil {
+ goto abort
+ }
+ }
+ pid, err = syscall.ForkExec(attr.path, attr.args, pa)
+
+abort:
+ if err != nil {
+ pid = -1
+ }
+ result <- lResult{pid: pid, err: err}
+}
+
+// Launch performs a new program launch with security state specified
+// in the supplied attr settings.
+func (attr *Launcher) Launch(data interface{}) (int, error) {
+ if attr.path == "" || len(attr.args) == 0 {
+ return -1, ErrLaunchFailed
+ }
+ if !LaunchSupported {
+ return -1, ErrNoLaunch
+ }
+
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ result := make(chan lResult)
+
+ go launch(result, attr, data, nil)
+ for {
+ select {
+ case v, ok := <-result:
+ if !ok {
+ return -1, ErrLaunchFailed
+ }
+ return v.pid, v.err
+ default:
+ runtime.Gosched()
+ }
+ }
+}
diff --git a/cap/legacy.go b/cap/legacy.go
new file mode 100644
index 0000000..0bfd93e
--- /dev/null
+++ b/cap/legacy.go
@@ -0,0 +1,7 @@
+// +build linux,arm linux,386
+
+package cap
+
+import "syscall"
+
+var sysSetGroupsVariant = uintptr(syscall.SYS_SETGROUPS32)
diff --git a/cap/modern.go b/cap/modern.go
new file mode 100644
index 0000000..ad89416
--- /dev/null
+++ b/cap/modern.go
@@ -0,0 +1,8 @@
+// +build linux,!arm
+// +build linux,!386
+
+package cap
+
+import "syscall"
+
+var sysSetGroupsVariant = uintptr(syscall.SYS_SETGROUPS)
diff --git a/cap/names.go b/cap/names.go
new file mode 100644
index 0000000..9e02cd1
--- /dev/null
+++ b/cap/names.go
@@ -0,0 +1,428 @@
+package cap
+
+/* ** DO NOT EDIT THIS FILE. IT WAS AUTO-GENERATED BY LIBCAP'S GO BUILDER (mknames.go) ** */
+
+// NamedCount holds the number of capability values with official
+// names known at the time this libcap/cap version, was released. The
+// "../libcap/cap" package is fully able to manipulate higher numbered
+// capability values by numerical value. However, if you find
+// cap.NamedCount < cap.MaxBits(), it is probably time to upgrade this
+// package on your system.
+//
+// FWIW the userspace tool '/sbin/capsh' also contains a runtime check
+// for the condition that libcap is behind the running kernel in this
+// way.
+const NamedCount = 41
+
+// CHOWN etc., are the named capability values of the Linux
+// kernel. The canonical source for each name is the
+// "uapi/linux/capabilities.h" file. Some values may not be available
+// (yet) where the kernel is older. The actual number of capabities
+// supported by the running kernel can be obtained using the
+// cap.MaxBits() function.
+const (
+ // CHOWN allows a process to arbitrarily change the user and
+ // group ownership of a file.
+ CHOWN Value = iota
+
+ // DAC_OVERRIDE allows a process to override of all Discretionary
+ // Access Control (DAC) access, including ACL execute
+ // access. That is read, write or execute files that the
+ // process would otherwise not have access to. This
+ // excludes DAC access covered by cap.LINUX_IMMUTABLE.
+ DAC_OVERRIDE
+
+ // DAC_READ_SEARCH allows a process to override all DAC restrictions
+ // limiting the read and search of files and
+ // directories. This excludes DAC access covered by
+ // cap.LINUX_IMMUTABLE.
+ DAC_READ_SEARCH
+
+ // FOWNER allows a process to perform operations on files, even
+ // where file owner ID should otherwise need be equal to
+ // the UID, except where cap.FSETID is applicable. It
+ // doesn't override MAC and DAC restrictions.
+ FOWNER
+
+ // FSETID allows a process to set the S_ISUID and S_ISUID bits of
+ // the file permissions, even when the process' effective
+ // UID or GID/supplementary GIDs do not match that of the
+ // file.
+ FSETID
+
+ // KILL allows a process to sent a kill(2) signal to any other
+ // process - overriding the limitation that there be a
+ // [E]UID match between source and target process.
+ KILL
+
+ // SETGID allows a process to freely manipulate its own GIDs:
+ // - arbitrarily set the GID, EGID, REGID, RESGID values
+ // - arbitrarily set the supplementary GIDs
+ // - allows the forging of GID credentials passed over a
+ // socket
+ SETGID
+
+ // SETUID allows a process to freely manipulate its own UIDs:
+ // - arbitraily set the UID, EUID, REUID and RESUID
+ // values
+ // - allows the forging of UID credentials passed over a
+ // socket
+ SETUID
+
+ // SETPCAP allows a process to freely manipulate its inheritable
+ // capabilities. Linux supports the POSIX.1e Inheritable
+ // set, as well as Bounding and Ambient Linux extension
+ // vectors. This capability permits dropping bits from the
+ // Bounding vector. It also permits the process to raise
+ // Ambient vector bits that are both raised in the
+ // Permitted and Inheritable sets of the process. This
+ // capability cannot be used to raise Permitted bits, or
+ // Effective bits beyond those already present in the
+ // process' permitted set.
+ //
+ // [Historical note: prior to the advent of file
+ // capabilities (2008), this capability was suppressed by
+ // default, as its unsuppressed behavior was not
+ // auditable: it could asynchronously grant its own
+ // Permitted capabilities to and remove capabilities from
+ // other processes arbitraily. The former leads to
+ // undefined behavior, and the latter is better served by
+ // the kill system call.]
+ SETPCAP
+
+ // LINUX_IMMUTABLE allows a process to modify the S_IMMUTABLE and
+ // S_APPEND file attributes.
+ LINUX_IMMUTABLE
+
+ // NET_BIND_SERVICE allows a process to bind to privileged ports:
+ // - TCP/UDP sockets below 1024
+ // - ATM VCIs below 32
+ NET_BIND_SERVICE
+
+ // NET_BROADCAST allows a process to broadcast to the network and to
+ // listen to multicast.
+ NET_BROADCAST
+
+ // NET_ADMIN allows a process to perform network configuration
+ // operations:
+ // - interface configuration
+ // - administration of IP firewall, masquerading and
+ // accounting
+ // - setting debug options on sockets
+ // - modification of routing tables
+ // - setting arbitrary process, and process group
+ // ownership on sockets
+ // - binding to any address for transparent proxying
+ // (this is also allowed via cap.NET_RAW)
+ // - setting TOS (Type of service)
+ // - setting promiscuous mode
+ // - clearing driver statistics
+ // - multicasing
+ // - read/write of device-specific registers
+ // - activation of ATM control sockets
+ NET_ADMIN
+
+ // NET_RAW allows a process to use raw networking:
+ // - RAW sockets
+ // - PACKET sockets
+ // - binding to any address for transparent proxying
+ // (also permitted via cap.NET_ADMIN)
+ NET_RAW
+
+ // IPC_LOCK allows a process to lock shared memory segments for IPC
+ // purposes. Also enables mlock and mlockall system
+ // calls.
+ IPC_LOCK
+
+ // IPC_OWNER allows a process to override IPC ownership checks.
+ IPC_OWNER
+
+ // SYS_MODULE allows a process to initiate the loading and unloading
+ // of kernel modules. This capability can effectively
+ // modify kernel without limit.
+ SYS_MODULE
+
+ // SYS_RAWIO allows a process to perform raw IO:
+ // - permit ioper/iopl access
+ // - permit sending USB messages to any device via
+ // /dev/bus/usb
+ SYS_RAWIO
+
+ // SYS_CHROOT allows a process to perform a chroot syscall to change
+ // the effective root of the process' file system:
+ // redirect to directory "/" to some other location.
+ SYS_CHROOT
+
+ // SYS_PTRACE allows a process to perform a ptrace() of any other
+ // process.
+ SYS_PTRACE
+
+ // SYS_PACCT allows a process to configure process accounting.
+ SYS_PACCT
+
+ // SYS_ADMIN allows a process to perform a somewhat arbitrary
+ // grab-bag of privileged operations. Over time, this
+ // capability should weaken as specific capabilities are
+ // created for subsets of cap.SYS_ADMINs functionality:
+ // - configuration of the secure attention key
+ // - administration of the random device
+ // - examination and configuration of disk quotas
+ // - setting the domainname
+ // - setting the hostname
+ // - calling bdflush()
+ // - mount() and umount(), setting up new SMB connection
+ // - some autofs root ioctls
+ // - nfsservctl
+ // - VM86_REQUEST_IRQ
+ // - to read/write pci config on alpha
+ // - irix_prctl on mips (setstacksize)
+ // - flushing all cache on m68k (sys_cacheflush)
+ // - removing semaphores
+ // - Used instead of cap.CHOWN to "chown" IPC message
+ // queues, semaphores and shared memory
+ // - locking/unlocking of shared memory segment
+ // - turning swap on/off
+ // - forged pids on socket credentials passing
+ // - setting readahead and flushing buffers on block
+ // devices
+ // - setting geometry in floppy driver
+ // - turning DMA on/off in xd driver
+ // - administration of md devices (mostly the above, but
+ // some extra ioctls)
+ // - tuning the ide driver
+ // - access to the nvram device
+ // - administration of apm_bios, serial and bttv (TV)
+ // device
+ // - manufacturer commands in isdn CAPI support driver
+ // - reading non-standardized portions of PCI
+ // configuration space
+ // - DDI debug ioctl on sbpcd driver
+ // - setting up serial ports
+ // - sending raw qic-117 commands
+ // - enabling/disabling tagged queuing on SCSI
+ // controllers and sending arbitrary SCSI commands
+ // - setting encryption key on loopback filesystem
+ // - setting zone reclaim policy
+ SYS_ADMIN
+
+ // SYS_BOOT allows a process to initiate a reboot of the system.
+ SYS_BOOT
+
+ // SYS_NICE allows a process to maipulate the execution priorities
+ // of arbitrary processes:
+ // - those involving different UIDs
+ // - setting their CPU affinity
+ // - alter the FIFO vs. round-robin (realtime)
+ // scheduling for itself and other processes.
+ SYS_NICE
+
+ // SYS_RESOURCE allows a process to adjust resource related parameters
+ // of processes and the system:
+ // - set and override resource limits
+ // - override quota limits
+ // - override the reserved space on ext2 filesystem
+ // (this can also be achieved via cap.FSETID)
+ // - modify the data journaling mode on ext3 filesystem,
+ // which uses journaling resources
+ // - override size restrictions on IPC message queues
+ // - configure more than 64Hz interrupts from the
+ // real-time clock
+ // - override the maximum number of consoles for console
+ // allocation
+ // - override the maximum number of keymaps
+ //
+ //
+ SYS_RESOURCE
+
+ // SYS_TIME allows a process to perform time manipulation of clocks:
+ // - alter the system clock
+ // - enable irix_stime on MIPS
+ // - set the real-time clock
+ SYS_TIME
+
+ // SYS_TTY_CONFIG allows a process to manipulate tty devices:
+ // - configure tty devices
+ // - perform vhangup() of a tty
+ SYS_TTY_CONFIG
+
+ // MKNOD allows a process to perform privileged operations with
+ // the mknod() system call.
+ MKNOD
+
+ // LEASE allows a process to take leases on files.
+ LEASE
+
+ // AUDIT_WRITE allows a process to write to the audit log via a
+ // unicast netlink socket.
+ AUDIT_WRITE
+
+ // AUDIT_CONTROL allows a process to configure audit logging via a
+ // unicast netlink socket.
+ AUDIT_CONTROL
+
+ // SETFCAP allows a process to set capabilities on files.
+ SETFCAP
+
+ // MAC_OVERRIDE allows a process to override Manditory Access Control
+ // (MAC) access. Not all kernels are configured with a MAC
+ // mechanism, but this is the capability reserved for
+ // overriding them.
+ MAC_OVERRIDE
+
+ // MAC_ADMIN allows a process to configure the Mandatory Access
+ // Control (MAC) policy. Not all kernels are configured
+ // with a MAC enabled, but if they are this capability is
+ // reserved for code to perform administration tasks.
+ MAC_ADMIN
+
+ // SYSLOG allows a process to configure the kernel's syslog
+ // (printk) behavior.
+ SYSLOG
+
+ // WAKE_ALARM allows a process to trigger something that can wake the
+ // system up.
+ WAKE_ALARM
+
+ // BLOCK_SUSPEND allows a process to block system suspends - prevent the
+ // system from entering a lower power state.
+ BLOCK_SUSPEND
+
+ // AUDIT_READ allows a process to read the audit log via a multicast
+ // netlink socket.
+ AUDIT_READ
+
+ // PERFMON allows a process to enable observability of privileged
+ // operations related to performance. The mechanisms
+ // include perf_events, i915_perf and other kernel
+ // subsystems.
+ PERFMON
+
+ // BPF allows a process to manipulate aspects of the kernel
+ // enhanced Berkeley Packet Filter (BPF) system. This is
+ // an execution subsystem of the kernel, that manages BPF
+ // programs. cap.BPF permits a process to:
+ // - create all types of BPF maps
+ // - advanced verifier features:
+ // - indirect variable access
+ // - bounded loops
+ // - BPF to BPF function calls
+ // - scalar precision tracking
+ // - larger complexity limits
+ // - dead code elimination
+ // - potentially other features
+ //
+ // Other capabilities can be used together with cap.BFP to
+ // further manipulate the BPF system:
+ // - cap.PERFMON relaxes the verifier checks as follows:
+ // - BPF programs can use pointer-to-integer
+ // conversions
+ // - speculation attack hardening measures can be
+ // bypassed
+ // - bpf_probe_read to read arbitrary kernel memory is
+ // permitted
+ // - bpf_trace_printk to print the content of kernel
+ // memory
+ // - cap.SYS_ADMIN permits the following:
+ // - use of bpf_probe_write_user
+ // - iteration over the system-wide loaded programs,
+ // maps, links BTFs and convert their IDs to file
+ // descriptors.
+ // - cap.PERFMON is required to load tracing programs.
+ // - cap.NET_ADMIN is required to load networking
+ // programs.
+ BPF
+
+ // CHECKPOINT_RESTORE allows a process to perform checkpoint
+ // and restore operations. Also permits
+ // explicit PID control via clone3() and
+ // also writing to ns_last_pid.
+ CHECKPOINT_RESTORE
+)
+
+var names = map[Value]string{
+ CHOWN: "cap_chown",
+ DAC_OVERRIDE: "cap_dac_override",
+ DAC_READ_SEARCH: "cap_dac_read_search",
+ FOWNER: "cap_fowner",
+ FSETID: "cap_fsetid",
+ KILL: "cap_kill",
+ SETGID: "cap_setgid",
+ SETUID: "cap_setuid",
+ SETPCAP: "cap_setpcap",
+ LINUX_IMMUTABLE: "cap_linux_immutable",
+ NET_BIND_SERVICE: "cap_net_bind_service",
+ NET_BROADCAST: "cap_net_broadcast",
+ NET_ADMIN: "cap_net_admin",
+ NET_RAW: "cap_net_raw",
+ IPC_LOCK: "cap_ipc_lock",
+ IPC_OWNER: "cap_ipc_owner",
+ SYS_MODULE: "cap_sys_module",
+ SYS_RAWIO: "cap_sys_rawio",
+ SYS_CHROOT: "cap_sys_chroot",
+ SYS_PTRACE: "cap_sys_ptrace",
+ SYS_PACCT: "cap_sys_pacct",
+ SYS_ADMIN: "cap_sys_admin",
+ SYS_BOOT: "cap_sys_boot",
+ SYS_NICE: "cap_sys_nice",
+ SYS_RESOURCE: "cap_sys_resource",
+ SYS_TIME: "cap_sys_time",
+ SYS_TTY_CONFIG: "cap_sys_tty_config",
+ MKNOD: "cap_mknod",
+ LEASE: "cap_lease",
+ AUDIT_WRITE: "cap_audit_write",
+ AUDIT_CONTROL: "cap_audit_control",
+ SETFCAP: "cap_setfcap",
+ MAC_OVERRIDE: "cap_mac_override",
+ MAC_ADMIN: "cap_mac_admin",
+ SYSLOG: "cap_syslog",
+ WAKE_ALARM: "cap_wake_alarm",
+ BLOCK_SUSPEND: "cap_block_suspend",
+ AUDIT_READ: "cap_audit_read",
+ PERFMON: "cap_perfmon",
+ BPF: "cap_bpf",
+ CHECKPOINT_RESTORE: "cap_checkpoint_restore",
+}
+
+var bits = map[string]Value{
+ "cap_chown": CHOWN,
+ "cap_dac_override": DAC_OVERRIDE,
+ "cap_dac_read_search": DAC_READ_SEARCH,
+ "cap_fowner": FOWNER,
+ "cap_fsetid": FSETID,
+ "cap_kill": KILL,
+ "cap_setgid": SETGID,
+ "cap_setuid": SETUID,
+ "cap_setpcap": SETPCAP,
+ "cap_linux_immutable": LINUX_IMMUTABLE,
+ "cap_net_bind_service": NET_BIND_SERVICE,
+ "cap_net_broadcast": NET_BROADCAST,
+ "cap_net_admin": NET_ADMIN,
+ "cap_net_raw": NET_RAW,
+ "cap_ipc_lock": IPC_LOCK,
+ "cap_ipc_owner": IPC_OWNER,
+ "cap_sys_module": SYS_MODULE,
+ "cap_sys_rawio": SYS_RAWIO,
+ "cap_sys_chroot": SYS_CHROOT,
+ "cap_sys_ptrace": SYS_PTRACE,
+ "cap_sys_pacct": SYS_PACCT,
+ "cap_sys_admin": SYS_ADMIN,
+ "cap_sys_boot": SYS_BOOT,
+ "cap_sys_nice": SYS_NICE,
+ "cap_sys_resource": SYS_RESOURCE,
+ "cap_sys_time": SYS_TIME,
+ "cap_sys_tty_config": SYS_TTY_CONFIG,
+ "cap_mknod": MKNOD,
+ "cap_lease": LEASE,
+ "cap_audit_write": AUDIT_WRITE,
+ "cap_audit_control": AUDIT_CONTROL,
+ "cap_setfcap": SETFCAP,
+ "cap_mac_override": MAC_OVERRIDE,
+ "cap_mac_admin": MAC_ADMIN,
+ "cap_syslog": SYSLOG,
+ "cap_wake_alarm": WAKE_ALARM,
+ "cap_block_suspend": BLOCK_SUSPEND,
+ "cap_audit_read": AUDIT_READ,
+ "cap_perfmon": PERFMON,
+ "cap_bpf": BPF,
+ "cap_checkpoint_restore": CHECKPOINT_RESTORE,
+}
diff --git a/cap/oslockluster.go b/cap/oslockluster.go
new file mode 100644
index 0000000..0b2cf2e
--- /dev/null
+++ b/cap/oslockluster.go
@@ -0,0 +1,33 @@
+// +build !go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked OS
+// Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+// https://github.com/golang/go/issues/20395
+// https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes the Launch functionality
+// to fail with an error: cap.ErrNoLaunch. If this value is false you
+// have two choices with respect to the Launch functionality:
+//
+// 1) don't use cap.(*Launcher).Launch()
+// 2) upgrade your Go toolchain to 1.10+ (ie., do this one).
+const LaunchSupported = false
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+ return false, ErrNoLaunch
+}
diff --git a/cap/oslocks.go b/cap/oslocks.go
new file mode 100644
index 0000000..9754020
--- /dev/null
+++ b/cap/oslocks.go
@@ -0,0 +1,51 @@
+// +build go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked
+// OS Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+// https://github.com/golang/go/issues/20395
+// https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes the Launch functionality
+// to fail with an error: cap.ErrNoLaunch. If this value is false you
+// have two choices with respect to the Launch functionality:
+//
+// 1) don't use cap.(*Launcher).Launch()
+// 2) upgrade your Go toolchain to 1.10+ (ie., do this one).
+const LaunchSupported = true
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch and loads up the chroot value.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+ s := pa.Sys
+ if s == nil {
+ if chroot == "" {
+ return false, nil
+ }
+ s = &syscall.SysProcAttr{
+ Chroot: chroot,
+ }
+ pa.Sys = s
+ } else if s.Chroot != "" {
+ return false, ErrAmbiguousChroot
+ }
+ if s.Credential != nil {
+ return false, ErrAmbiguousIDs
+ }
+ if len(s.AmbientCaps) != 0 {
+ return false, ErrAmbiguousAmbient
+ }
+ return s != nil && s.Chroot != "", nil
+}
diff --git a/cap/syscalls.go b/cap/syscalls.go
new file mode 100644
index 0000000..ab4bcef
--- /dev/null
+++ b/cap/syscalls.go
@@ -0,0 +1,27 @@
+package cap
+
+import (
+ "syscall"
+
+ "kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+// multisc provides syscalls overridable for testing purposes that
+// support a single kernel security state for all OS threads.
+// We use this version when we are cgo compiling because
+// we need to manage the native C pthreads too.
+var multisc = &syscaller{
+ w3: psx.Syscall3,
+ w6: psx.Syscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
+
+// singlesc provides a single threaded implementation. Users should
+// take care to ensure the thread is locked and marked nogc.
+var singlesc = &syscaller{
+ w3: syscall.RawSyscall,
+ w6: syscall.RawSyscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
diff --git a/cap/text.go b/cap/text.go
new file mode 100644
index 0000000..cf11a2d
--- /dev/null
+++ b/cap/text.go
@@ -0,0 +1,325 @@
+package cap
+
+import (
+ "bufio"
+ "errors"
+ "strconv"
+ "strings"
+)
+
+// String converts a capability Value into its canonical text
+// representation.
+func (v Value) String() string {
+ name, ok := names[v]
+ if ok {
+ return name
+ }
+ // Un-named capabilities are referred to numerically (in decimal).
+ return strconv.Itoa(int(v))
+}
+
+// FromName converts a named capability Value to its binary
+// representation.
+func FromName(name string) (Value, error) {
+ startUp.Do(multisc.cInit)
+ v, ok := bits[name]
+ if ok {
+ if v >= Value(words*32) {
+ return 0, ErrBadValue
+ }
+ return v, nil
+ }
+ i, err := strconv.Atoi(name)
+ if err != nil {
+ return 0, err
+ }
+ if i >= 0 && i < int(words*32) {
+ return Value(i), nil
+ }
+ return 0, ErrBadValue
+}
+
+const (
+ eBin uint = (1 << Effective)
+ pBin = (1 << Permitted)
+ iBin = (1 << Inheritable)
+)
+
+var combos = []string{"", "e", "p", "ep", "i", "ei", "ip", "eip"}
+
+// histo generates a histogram of flag state combinations.
+func (c *Set) histo(bins []int, patterns []uint, from, limit Value) uint {
+ for v := from; v < limit; v++ {
+ b := uint(v & 31)
+ u, bit, err := bitOf(0, v)
+ if err != nil {
+ break
+ }
+ x := uint((c.flat[u][Effective]&bit)>>b) * eBin
+ x |= uint((c.flat[u][Permitted]&bit)>>b) * pBin
+ x |= uint((c.flat[u][Inheritable]&bit)>>b) * iBin
+ bins[x]++
+ patterns[uint(v)] = x
+ }
+ // Note, in the loop, we use >= to pick the smallest value for
+ // m with the highest bin value. That is ties break towards
+ // m=0.
+ m := uint(7)
+ for t := m; t > 0; {
+ t--
+ if bins[t] >= bins[m] {
+ m = t
+ }
+ }
+ return m
+}
+
+// String converts a full capability Set into a single short readable
+// string representation (which may contain spaces). See the
+// cap.FromText() function for an explanation of its return values.
+//
+// Note (*cap.Set).String() may evolve to generate more compact
+// strings representing the a given Set over time, but it should
+// maintain compatibility with the libcap:cap_to_text() function for
+// any given release. Further, it will always be an inverse of
+// cap.FromText().
+func (c *Set) String() string {
+ if c == nil || len(c.flat) == 0 {
+ return "<invalid>"
+ }
+ bins := make([]int, 8)
+ patterns := make([]uint, maxValues)
+
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+
+ // Note, in order to have a *Set pointer, startUp.Do(cInit)
+ // must have been called which sets maxValues.
+ m := c.histo(bins, patterns, 0, Value(maxValues))
+
+ // Background state is the most popular of the named bits.
+ vs := []string{"=" + combos[m]}
+ for i := uint(8); i > 0; {
+ i--
+ if i == m || bins[i] == 0 {
+ continue
+ }
+ var list []string
+ for j, p := range patterns {
+ if p != i {
+ continue
+ }
+ list = append(list, Value(j).String())
+ }
+ x := strings.Join(list, ",")
+ var y, z string
+ if cf := i & ^m; cf != 0 {
+ op := "+"
+ if len(vs) == 1 && vs[0] == "=" {
+ // Special case "= foo+..." == "foo=...".
+ // Prefer because it
+ vs = nil
+ op = "="
+ }
+ y = op + combos[cf]
+ }
+ if cf := m & ^i; cf != 0 {
+ z = "-" + combos[cf]
+ }
+ vs = append(vs, x+y+z)
+ }
+
+ // The unnamed bits can only add to the above named ones since
+ // unnamed ones are always defaulted to lowered.
+ uBins := make([]int, 8)
+ uPatterns := make([]uint, 32*words)
+ c.histo(uBins, uPatterns, Value(maxValues), 32*Value(words))
+ for i := uint(7); i > 0; i-- {
+ if uBins[i] == 0 {
+ continue
+ }
+ var list []string
+ for j, p := range uPatterns {
+ if p != i {
+ continue
+ }
+ list = append(list, Value(j).String())
+ }
+ vs = append(vs, strings.Join(list, ",")+"+"+combos[i])
+ }
+
+ return strings.Join(vs, " ")
+}
+
+// ErrBadText is returned if the text for a capability set cannot be parsed.
+var ErrBadText = errors.New("bad text")
+
+// FromText converts the canonical text representation for a Set into
+// a freshly allocated Set.
+//
+// The format follows the following pattern: a set of space separated
+// sequences. Each sequence applies over the previous sequence to
+// build up a Set. The format of a sequence is:
+//
+// [comma list of cap_values][[ops][flags]]*
+//
+// Examples:
+//
+// "all=ep"
+// "cap_chown,cap_setuid=ip cap_setuid+e"
+// "=p cap_setpcap-p+i"
+//
+// Here "all" refers to all named capabilities known to the hosting
+// kernel, and "all" is assumed if no capabilities are listed before
+// an "=".
+//
+// The ops values, "=", "+" and "-" imply "reset and raise", "raise"
+// and "lower" respectively. The "e", "i" and "p" characters
+// correspond to the capabilities of the corresponding Flag: "e"
+// (Effective); "i" (Inheritable); "p" (Permitted).
+//
+// This syntax is overspecified and there are many ways of building
+// the same final Set state. Any sequence that includes a '=' resets
+// the accumulated state of all Flags ignoring earlier sequences. On
+// each of the following lines we give three or more examples of ways
+// to specify a common Set. The last entry on each line is the one
+// generated by (*cap.Set).String() from that Set.
+//
+// "=p all+ei" "all=pie" "=pi all+e" "=eip"
+//
+// "cap_setuid=p cap_chown=i" "cap_chown=ip-p" "cap_chown=i"
+//
+// "cap_chown=-p" "all=" "cap_setuid=pie-pie" "="
+//
+// Note: FromText() is tested at release time to completely match the
+// import ability of the libcap:cap_from_text() function.
+func FromText(text string) (*Set, error) {
+ c := NewSet()
+ scanner := bufio.NewScanner(strings.NewReader(text))
+ scanner.Split(bufio.ScanWords)
+ chunks := 0
+ for scanner.Scan() {
+ chunks++
+
+ // Parsing for xxx([-+=][eip]+)+
+ t := scanner.Text()
+ i := strings.IndexAny(t, "=+-")
+ if i < 0 {
+ return nil, ErrBadText
+ }
+ var vs []Value
+ sep := t[i]
+ if vals := t[:i]; vals == "all" {
+ for v := Value(0); v < Value(maxValues); v++ {
+ vs = append(vs, v)
+ }
+ } else if vals != "" {
+ for _, name := range strings.Split(vals, ",") {
+ v, err := FromName(name)
+ if err != nil {
+ return nil, ErrBadText
+ }
+ vs = append(vs, v)
+ }
+ } else if sep != '=' {
+ if vals == "" {
+ // Only "=" supports ""=="all".
+ return nil, ErrBadText
+ }
+ } else if j := i + 1; j+1 < len(t) {
+ switch t[j] {
+ case '+':
+ sep = 'P'
+ i++
+ case '-':
+ sep = 'M'
+ i++
+ }
+ }
+ i++
+
+ // There are 5 ways to set: =, =+, =-, +, -. We call
+ // the 2nd and 3rd of these 'P' and 'M'.
+
+ for {
+ // read [eip]+ setting flags.
+ var fE, fP, fI bool
+ for ok := true; ok && i < len(t); i++ {
+ switch t[i] {
+ case 'e':
+ fE = true
+ case 'i':
+ fI = true
+ case 'p':
+ fP = true
+ default:
+ ok = false
+ }
+ if !ok {
+ break
+ }
+ }
+
+ if !(fE || fI || fP) {
+ if sep != '=' {
+ return nil, ErrBadText
+ }
+ }
+
+ switch sep {
+ case '=', 'P', 'M', '+':
+ if sep != '+' {
+ c.Clear()
+ if sep == 'M' {
+ break
+ }
+ }
+ if keep := len(vs) == 0; keep {
+ if sep != '=' {
+ return nil, ErrBadText
+ }
+ c.forceFlag(Effective, fE)
+ c.forceFlag(Permitted, fP)
+ c.forceFlag(Inheritable, fI)
+ break
+ }
+ // =, + and P for specific values are left.
+ if fE {
+ c.SetFlag(Effective, true, vs...)
+ }
+ if fP {
+ c.SetFlag(Permitted, true, vs...)
+ }
+ if fI {
+ c.SetFlag(Inheritable, true, vs...)
+ }
+ case '-':
+ if fE {
+ c.SetFlag(Effective, false, vs...)
+ }
+ if fP {
+ c.SetFlag(Permitted, false, vs...)
+ }
+ if fI {
+ c.SetFlag(Inheritable, false, vs...)
+ }
+ }
+
+ if i == len(t) {
+ break
+ }
+
+ switch t[i] {
+ case '+', '-':
+ sep = t[i]
+ i++
+ default:
+ return nil, ErrBadText
+ }
+ }
+ }
+ if chunks == 0 {
+ return nil, ErrBadText
+ }
+ return c, nil
+}
diff --git a/contrib/seccomp/explore.go b/contrib/seccomp/explore.go
new file mode 100644
index 0000000..37fe97b
--- /dev/null
+++ b/contrib/seccomp/explore.go
@@ -0,0 +1,277 @@
+// Program explore is evolved from the code discussed in more depth
+// here:
+//
+// https://github.com/golang/go/issues/3405
+//
+// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only
+// applies to the calling thread, since
+// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03
+// the seccomp filter application forces the setting to be mirrored on
+// all the threads of a process.
+//
+// Based on the command line options, we can manipulate the program to
+// behave in various ways. Example command lines:
+//
+// sudo ./explore
+// sudo ./explore --kill=false
+// sudo ./explore --kill=false --errno=0
+//
+// Supported Go toolchains are after go1.10. Those prior to go1.15
+// require this environment variable to be set to build successfully:
+//
+// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+//
+// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too,
+// demonstrating native nocgo support for seccomp features.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "runtime"
+ "syscall"
+ "time"
+ "unsafe"
+
+ "kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+var (
+ withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall")
+ delays = flag.Bool("delays", false, "use this to pause the program at various places")
+ kill = flag.Bool("kill", true, "kill the process if setuid attempted")
+ errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno")
+)
+
+const (
+ prSetNoNewPrivs = 38
+
+ sysSeccomp = 317 // x86_64 syscall number
+ seccompSetModeFilter = 1 // uses user-supplied filter.
+ seccompFilterFlagTsync = (1 << 0) // mirror filtering on all threads.
+ seccompRetErrno = 0x00050000 // returns an errno
+ seccompRetData = 0x0000ffff // mask for RET data payload (ex. errno)
+ seccompRetKillProcess = 0x80000000 // kill the whole process immediately
+ seccompRetTrap = 0x00030000 // disallow and force a SIGSYS
+ seccompRetAllow = 0x7fff0000
+
+ bpfLd = 0x00
+ bpfJmp = 0x05
+ bpfRet = 0x06
+
+ bpfW = 0x00
+
+ bpfAbs = 0x20
+ bpfJeq = 0x10
+
+ bpfK = 0x00
+
+ auditArchX86_64 = 3221225534 // HACK: I don't understand this value
+ archNr = auditArchX86_64
+
+ syscallNr = 0
+)
+
+// SockFilter is a single filter block.
+type SockFilter struct {
+ // Code is the filter code instruction.
+ Code uint16
+ // Jt is the target for a true result from the code execution.
+ Jt uint8
+ // Jf is the target for a false result from the code execution.
+ Jf uint8
+ // K is a generic multiuse field
+ K uint32
+}
+
+// SockFProg is a
+type SockFProg struct {
+ // Len is the number of contiguous SockFilter blocks that can
+ // be found at *Filter.
+ Len uint16
+ // Filter is the address of the first SockFilter block of a
+ // program sequence.
+ Filter *SockFilter
+}
+
+// SockFilterSlice is a subprogram filter.
+type SockFilterSlice []SockFilter
+
+func bpfStmt(code uint16, k uint32) SockFilter {
+ return SockFilter{code, 0, 0, k}
+}
+
+func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter {
+ return SockFilter{code, jt, jf, k}
+}
+
+func validateArchitecture() []SockFilter {
+ return []SockFilter{
+ bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4.
+ bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0),
+ bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
+ }
+}
+
+func ExamineSyscall() []SockFilter {
+ return []SockFilter{
+ bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr),
+ }
+}
+
+func AllowSyscall(syscallNum uint32) []SockFilter {
+ return []SockFilter{
+ bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+ bpfStmt(bpfRet+bpfK, seccompRetAllow),
+ }
+}
+
+func DisallowSyscall(syscallNum, errno uint32) []SockFilter {
+ return []SockFilter{
+ bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+ bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)),
+ }
+}
+
+func KillProcess() []SockFilter {
+ return []SockFilter{
+ bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
+ }
+}
+
+func NotifyProcessAndDie() []SockFilter {
+ return []SockFilter{
+ bpfStmt(bpfRet+bpfK, seccompRetTrap),
+ }
+}
+
+func TrapOnSyscall(syscallNum uint32) []SockFilter {
+ return []SockFilter{
+ bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+ bpfStmt(bpfRet+bpfK, seccompRetTrap),
+ }
+}
+
+func AllGood() []SockFilter {
+ return []SockFilter{
+ bpfStmt(bpfRet+bpfK, seccompRetAllow),
+ }
+}
+
+// prctl executes the prctl - unless the --psx commandline argument is
+// used, this is on a single thread.
+//go:uintptrescapes
+func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error {
+ var e syscall.Errno
+ if *withPSX {
+ _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
+ } else {
+ _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
+ }
+ if e != 0 {
+ return e
+ }
+ if *delays {
+ fmt.Println("prctl'd - check now")
+ time.Sleep(1 * time.Minute)
+ }
+ return nil
+}
+
+// SeccompSetModeFilter is our wrapper for performing our seccomp system call.
+//go:uintptrescapes
+func SeccompSetModeFilter(prog *SockFProg) error {
+ if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 {
+ return e
+ }
+ return nil
+}
+
+var empty func()
+
+func lockProcessThread(pick bool) {
+ // Make sure we are
+ pid := uintptr(syscall.Getpid())
+ runtime.LockOSThread()
+ for {
+ tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0)
+ if (tid == pid) == pick {
+ fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick)
+ break
+ }
+ runtime.UnlockOSThread()
+ go func() {
+ time.Sleep(1 * time.Microsecond)
+ }()
+ runtime.Gosched()
+ runtime.LockOSThread()
+ }
+}
+
+// applyPolicy uploads the program sequence.
+func applyPolicy(prog *SockFProg) {
+ // Without PSX we can't guarantee the thread we execute the
+ // seccomp call on will be the same one that we disabled new
+ // privs on. With PSX, the disabling of new privs is mirrored
+ // on all threads.
+ if !*withPSX {
+ lockProcessThread(false)
+ defer runtime.UnlockOSThread()
+ }
+
+ // This is required to load a filter without privilege.
+ if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil {
+ log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err)
+ }
+
+ fmt.Println("Applying syscall policy...")
+ if err := SeccompSetModeFilter(prog); err != nil {
+ log.Fatalf("seccomp_set_mode_filter: %v", err)
+ }
+ fmt.Println("...Policy applied")
+}
+
+func main() {
+ flag.Parse()
+
+ if *delays {
+ fmt.Println("check first", syscall.Getpid())
+ time.Sleep(60 * time.Second)
+ }
+
+ var filter []SockFilter
+ filter = append(filter, validateArchitecture()...)
+
+ // Grab the system call number.
+ filter = append(filter, ExamineSyscall()...)
+
+ // List disallowed syscalls.
+ for _, x := range []uint32{
+ syscall.SYS_SETUID,
+ } {
+ if *kill {
+ filter = append(filter, TrapOnSyscall(x)...)
+ } else {
+ filter = append(filter, DisallowSyscall(x, uint32(*errno))...)
+ }
+ }
+
+ filter = append(filter, AllGood()...)
+
+ prog := &SockFProg{
+ Len: uint16(len(filter)),
+ Filter: &filter[0],
+ }
+
+ applyPolicy(prog)
+
+ // Ensure we are running on the TID=PID.
+ lockProcessThread(true)
+
+ log.Print("Now it is time to try to run something privileged...")
+ if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 {
+ log.Fatalf("setuid failed with an error: %v", e)
+ }
+ log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1")
+}
diff --git a/contrib/seccomp/go.mod b/contrib/seccomp/go.mod
new file mode 100644
index 0000000..86e40c6
--- /dev/null
+++ b/contrib/seccomp/go.mod
@@ -0,0 +1,5 @@
+module explore
+
+go 1.14
+
+require kernel.org/pub/linux/libs/security/libcap/psx v0.2.48
diff --git a/distcheck.sh b/distcheck.sh
new file mode 100755
index 0000000..3360e31
--- /dev/null
+++ b/distcheck.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+actual=$(wget -o/dev/null -O/dev/stdout https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/linux/capability.h | grep "#define.CAP_LAST_CAP"|awk '{print $3}')
+working=$(grep "#define.CAP_LAST_CAP" libcap/include/uapi/linux/capability.h|awk '{print $3}')
+
+if [[ ${actual} = ${working} ]]; then
+ echo "up to date with officially named caps"
+ exit 0
+fi
+
+echo "want: ${actual}"
+echo "have: ${working}"
+exit 1
diff --git a/doc/Makefile b/doc/Makefile
index 8e87b17..e60f72d 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -13,8 +13,12 @@ MAN3S = cap_init.3 cap_free.3 cap_dup.3 \
cap_copy_ext.3 cap_size.3 cap_copy_int.3 \
cap_from_text.3 cap_to_text.3 cap_from_name.3 cap_to_name.3 \
capsetp.3 capgetp.3 libcap.3 \
- cap_get_bound.3 cap_drop_bound.3
-MAN8S = getcap.8 setcap.8
+ cap_get_bound.3 cap_drop_bound.3 \
+ cap_get_mode.3 cap_set_mode.3 cap_mode_name.3 \
+ cap_get_secbits.3 cap_set_secbits.3 \
+ cap_setuid.3 cap_setgroups.3 \
+ psx_syscall.3 psx_syscall3.3 psx_syscall6.3 libpsx.3
+MAN8S = getcap.8 setcap.8 getpcaps.8
MANS = $(MAN1S) $(MAN3S) $(MAN8S)
@@ -46,5 +50,3 @@ install:
clean:
$(LOCALCLEAN)
rm -rf html
-
-
diff --git a/doc/cap_clear.3 b/doc/cap_clear.3
index 48f5cc0..73aac61 100644
--- a/doc/cap_clear.3
+++ b/doc/cap_clear.3
@@ -18,7 +18,7 @@ cap_clear, cap_clear_flag, cap_get_flag, cap_set_flag, cap_compare \- capability
.sp
.BI "int cap_compare(cap_t " cap_a ", cap_t " cap_b ");"
.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
.fi
.SH DESCRIPTION
These functions work on a capability state held in working storage.
diff --git a/doc/cap_copy_ext.3 b/doc/cap_copy_ext.3
index 61d9381..acbb487 100644
--- a/doc/cap_copy_ext.3
+++ b/doc/cap_copy_ext.3
@@ -11,7 +11,7 @@ external representation translation
.sp
.BI "cap_t cap_copy_int(const void *" ext_p );
.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
.SH DESCRIPTION
These functions translate between internal and external
representations of a capability state. The external representation is
@@ -34,7 +34,7 @@ function in order to hold the capability data record created from
.BR cap_copy_ext ()
copies a capability state in working storage, identified by
.IR cap_p ,
-from system managed space to user-managed space (pointed to by
+from system-managed space to user-managed space (pointed to by
.IR ext_p )
and returns the length of the resulting data record. The size parameter
represents the maximum size, in bytes, of the resulting data record. The
@@ -74,12 +74,12 @@ as an argument.
.SH "RETURN VALUE"
.BR cap_size ()
returns the length required to hold a capability data record on success,
-and -1 on failure.
+and \-1 on failure.
.PP
.BR cap_copy_ext ()
returns the number of bytes placed in the user managed space pointed to by
.I ext_p
-on success, and -1 on failure.
+on success, and \-1 on failure.
.PP
.BR cap_copy_int ()
returns a pointer to the newly created capability state in working storage
diff --git a/doc/cap_from_text.3 b/doc/cap_from_text.3
index ccf7d95..59724c7 100644
--- a/doc/cap_from_text.3
+++ b/doc/cap_from_text.3
@@ -6,17 +6,15 @@
cap_from_text, cap_to_text, cap_to_name, cap_from_name \- capability
state textual representation translation
.SH SYNOPSIS
-.B #include <sys/capability.h>
-.sp
-.BI "cap_t cap_from_text(const char *" buf_p );
-.sp
-.BI "char *cap_to_text(cap_t " caps ", ssize_t *" length_p );
-.sp
-.BI "int cap_from_name(const char *" name ", cap_value_t *" cap_p );
-.sp
-.BI "char *cap_to_name(cap_value_t " cap );
-.sp
-Link with \fI-lcap\fP.
+.nf
+#include <sys/capability.h>
+
+cap_t cap_from_text(const char* buf_p );
+char *cap_to_text(cap_t caps, ssize_t * length_p);
+int cap_from_name(const char* name , cap_value_t* cap_p);
+char *cap_to_name(cap_value_t cap);
+.fi
+Link with \fI\-lcap\fP.
.SH DESCRIPTION
These functions translate a capability state between
an internal representation and a textual one.
@@ -100,7 +98,7 @@ followed by an
An action-list consists of a sequence of
.I operator flag
pairs. Legal operators are:
-.RB ` = "', '" + "', and `" - "'."
+.RB ` = "', '" + "', and `" \- "'."
Legal flags are:
.RB ` e "', `" i "', and `" p "'."
These flags are case-sensitive and specify the Effective, Inheritable
@@ -136,15 +134,22 @@ refer to `all' capabilities. For example, the following three
clauses are equivalent to each other (and indicate a completely empty
capability set): "all="; "="; "cap_chown,<every-other-capability>=".
.PP
-The operators, `+' and `-' both require an explicit preceding
+The operators, `+' and `\-' both require an explicit preceding
capability list and one or more explicit trailing flags. The `+'
operator will raise all of the listed capabilities in the flagged
-capability sets. The `-' operator will lower all of the listed
+capability sets. The `\-' operator will lower all of the listed
capabilities in the flagged capability sets. For example:
-"all+p" will raise all of the Permitted capabilities; "cap_fowner+p-i"
-will raise the override-file-ownership capability in the Permitted
-capability set and lower this Inheritable capability;
-"cap_fowner+pe-i" and "cap_fowner=+pe" are equivalent.
+"all+p" will raise all of the Permitted capabilities and
+"cap_fowner\-i" will lower the override-file-ownership in the Inheritable set.
+.PP
+The action list can consist of multiple
+.I operator flag
+pairs; the actions are performed in left-to-right order.
+Thus, for example,
+"cap_fowner+p\-i"
+is equivalent to "cap_fowner+p cap_fowner\-i".
+As another example,
+"cap_fowner+pe\-i" and "cap_fowner=+pe" are equivalent.
.SH "RETURN VALUE"
.BR cap_from_text (),
.BR cap_to_text ()
@@ -152,7 +157,7 @@ and
.BR cap_to_name ()
return a non-NULL value on success, and NULL on failure.
.BR cap_from_name ()
-returns 0 for success, and -1 on failure (unknown capability).
+returns 0 for success, and \-1 on failure (unknown capability).
.PP
On failure,
.I errno
@@ -174,17 +179,15 @@ The example program below demonstrates the use of
.BR cap_from_text ()
and
.BR cap_to_text ().
-The following shell session shows a some example runs:
-.in +4n
+The following shell session shows some example runs:
.nf
$ ./a.out "cap_chown=p cap_chown+e"
-caps_to_text() returned "= cap_chown+ep"
-$ ./a.out "all=pe cap_chown-e cap_kill-pe"
-caps_to_text() returned "=ep cap_chown-e cap_kill-ep"
+caps_to_text() returned "cap_chown=ep"
+$ ./a.out "all=pe cap_chown\-e cap_kill\-pe"
+caps_to_text() returned "=ep cap_chown\-e cap_kill\-ep"
.fi
-.in
The source code of the program is as follows:
.nf
@@ -225,7 +228,6 @@ main(int argc, char *argv[])
.SH "SEE ALSO"
.BR libcap (3),
.BR cap_clear (3),
-.BR cap_compare (3),
.BR cap_copy_ext (3),
.BR cap_get_file (3),
.BR cap_get_proc (3),
diff --git a/doc/cap_get_ambient.3 b/doc/cap_get_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_get_file.3 b/doc/cap_get_file.3
index 107b6d1..3f73734 100644
--- a/doc/cap_get_file.3
+++ b/doc/cap_get_file.3
@@ -6,8 +6,6 @@
cap_get_file, cap_set_file, cap_get_fd, cap_set_fd \- capability
manipulation on files
.SH SYNOPSIS
-.B
-.sp
.B #include <sys/capability.h>
.sp
.BI "cap_t cap_get_file(const char *" path_p );
@@ -18,7 +16,11 @@ manipulation on files
.sp
.BI "int cap_set_fd(int " fd ", cap_t " caps );
.sp
-Link with \fI-lcap\fP.
+.BI "uid_t cap_get_nsowner(cap_t " caps );
+.sp
+.BI "int cap_set_nsowner(cap_t " caps ", uid_t " rootuid );
+.sp
+Link with \fI\-lcap\fP.
.SH DESCRIPTION
.BR cap_get_file ()
and
@@ -55,14 +57,25 @@ A NULL value for
.IR cap_p
is used to indicate that capabilities for the file should be deleted.
For these functions to succeed, the calling process must have the
-effective capability,
-.BR CAP_SETFCAP ,
-enabled and either the effective user ID of the process must match the
+.BR CAP_SETFCAP
+capability in its effective set
+and either the effective user ID of the process must match the
file owner or the calling process must have the
.B CAP_FOWNER
-flag in its effective capability set. The effects of writing the
+capability in its effective capability set. The effects of writing the
capability state to any file type other than a regular file are
undefined.
+.PP
+A capability set held in memory can be associated with the root user ID in
+use in a specific user namespace. It is possible to get and set this value
+(in the memory copy) with
+.BR cap_get_nsowner ()
+and
+.BR cap_set_nsowner ()
+respectively. The root user ID is ignored by the libcap library in all cases
+other than when the capability is written to a file. Only if the value
+is non-zero will the library attempt to include it in the written file
+capability set.
.SH "RETURN VALUE"
.BR cap_get_file ()
and
@@ -121,4 +134,5 @@ Permitted or Inheritable flag enabled.
.BR cap_from_text (3),
.BR cap_get_proc (3),
.BR cap_init (3),
-.BR capabilities (7)
+.BR capabilities (7),
+.BR user_namespaces (7)
diff --git a/doc/cap_get_mode.3 b/doc/cap_get_mode.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_mode.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_get_proc.3 b/doc/cap_get_proc.3
index be28362..74e5e8c 100644
--- a/doc/cap_get_proc.3
+++ b/doc/cap_get_proc.3
@@ -1,10 +1,10 @@
-.\"
-.\" $Id: cap_get_proc.3,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
-.\"
-.TH CAP_GET_PROC 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_GET_PROC 3 "2019-12-21" "" "Linux Programmer's Manual"
.SH NAME
-cap_get_proc, cap_set_proc, capgetp, cap_get_bound, cap_drop_bound \-
-capability manipulation on processes
+cap_get_proc, cap_set_proc, capgetp, cap_get_bound, cap_drop_bound, \
+cap_get_ambient, cap_set_ambient, cap_reset_ambient, \
+cap_get_secbits, cap_set_secbits, cap_get_mode, cap_set_mode, \
+cap_mode_name, cap_get_pid, cap_setuid, cap_setgroups \
+\- capability manipulation on processes
.SH SYNOPSIS
.B #include <sys/capability.h>
.sp
@@ -18,11 +18,34 @@ capability manipulation on processes
.sp
.BI "int cap_drop_bound(cap_value_t " cap );
.sp
+.BI "int cap_get_ambient(cap_value_t " cap );
+.sp
+.BI "int cap_set_ambient(cap_value_t " cap ", cap_flag_value_t " value );
+.sp
+.B int cap_reset_ambient(void);
+.sp
+.BI CAP_AMBIENT_SUPPORTED();
+.sp
+.B "unsigned cap_get_secbits(void);"
+.sp
+.BI "int cap_set_secbits(unsigned " bits );
+.sp
+.B "cap_mode_t cap_get_mode(void);"
+.sp
+.BI "const char *cap_mode_name(cap_mode_t " mode );
+.sp
+.BI "int cap_set_mode(cap_mode_t " mode );
+.sp
.B #include <sys/types.h>
.sp
.BI "cap_t cap_get_pid(pid_t " pid );
.sp
-Link with \fI-lcap\fP.
+.BI "int cap_setuid(uid_t " uid );
+.sp
+.BI "int cap_setgroups(gid_t " gid ", size_t " ngroups ", const gid_t " \
+groups );
+.sp
+Link with \fI\-lcap\fP.
.SH DESCRIPTION
.BR cap_get_proc ()
allocates a capability state in working storage, sets its state to
@@ -55,6 +78,9 @@ see
.BR cap_init (3),
with the process capabilities of the process indicated by
.IR pid .
+(If
+.I pid
+is 0, then the calling process's capabilities are returned.)
This information can also be obtained from the
.I /proc/<pid>/status
file.
@@ -63,9 +89,9 @@ file.
with a
.I cap
as an argument returns the current value of this bounding set
-capability flag in effect for the current process. This operation is
-unpriveged. Note, a macro function
-.BI "CAP_IS_SUPPORTED(cap_value_t " cap )
+capability flag in effect for the calling process. This operation is
+unprivileged. Note, a macro function
+.BR "CAP_IS_SUPPORTED(cap_value_t " cap )
is provided that evaluates to true (1) if the system supports the
specified capability,
.IR cap .
@@ -75,11 +101,101 @@ If the system does not support the capability, this function returns
.PP
.BR cap_drop_bound ()
can be used to lower the specified bounding set capability,
-.BR cap ,
+.BR cap .
To complete successfully, the prevailing
.I effective
capability set must have a raised
.BR CAP_SETPCAP .
+.PP
+.BR cap_get_ambient ()
+returns the prevailing value of the specified ambient capability, or
+-1 if the capability is not supported by the running kernel. A macro
+.BR CAP_AMBIENT_SUPPORTED ()
+uses this function to determine if ambient capabilities are supported
+by the kernel.
+.PP
+.BR cap_set_ambient ()
+sets the specified ambient capability to a specific value. To complete
+successfully, the prevailing
+.I effective
+capability set must have a raised
+.BR CAP_SETPCAP .
+Further, to raise a specific ambient capability the
+.IR inheritable " and " permitted
+sets of the calling process must contain the specified capability, and
+raised ambient bits will only be retained as long as this remains true.
+.PP
+.BR cap_reset_ambient ()
+resets all of the ambient capabilities for the calling process to
+their lowered value. To complete successfully, the prevailing
+.I effective
+capability set must have a raised
+.BR CAP_SETPCAP .
+Note, the ambient set is intended to operate in a legacy environment
+where the application has limited awareness of capabilities in
+general. Executing a file with associated filesystem capabilities, the
+kernel will implicitly reset the ambient set of the process. Also,
+changes to the inheritable set by the program code without explicitly
+fixing up the ambient set can also drop ambient bits.
+.PP
+.BR cap_get_secbits ()
+returns the securebits of the calling process. These bits affect the
+way in which the calling process implements things like setuid-root
+fixup and ambient capabilities.
+.PP
+.BR cap_set_secbits ()
+attempts to modify the securebits of the calling process. Note
+.B CAP_SETPCAP
+must be in the effective capability set for this to be effective. Some
+settings lock the sub-states of the securebits, so attempts to set values
+may be denied by the kernel even when the
+.B CAP_SETPCAP
+capability is raised.
+.PP
+To help manage the complexity of the securebits, libcap provides a
+combined securebit and capability set concept called a libcap mode.
+.BR cap_get_mode ()
+attempts to summarize the prevailing security environment in the form
+of a numerical
+.B cap_mode_t
+value. A text representation of the mode can be obtained via the
+.BR cap_mode_name ()
+function. The vast majority of combinations of these values are not well
+defined in terms of a libcap mode, and for these states
+.BR cap_get_mode ()
+returns
+.RB ( cap_mode_t )0
+which
+.BR cap_get_name ()
+identifies as
+.RI `` UNCERTAIN ''.
+Supported modes are:
+.BR CAP_MODE_NOPRIV ", " CAP_MODE_PURE1E_INIT " and " CAP_MODE_PURE1E .
+.PP
+.BR cap_set_mode ()
+can be used to set the desired mode. The permitted capability
+.B CAP_SETPCAP
+is required for this function to succeed.
+.PP
+.BR cap_setuid ()
+is a convenience function for the
+.BR setuid (2)
+system call. Where
+.BR cap_setuid ()
+arranges for the right effective capability to be raised in order to
+perform the system call, and also arranges to preserve the
+availability of permitted capabilities after the uid has
+changed. Following this call all effective capabilities are lowered.
+.PP
+.BR cap_setgroups ()
+is a convenience function for performing both
+.BR setgid (2)
+and
+.BR setgroups (2)
+calls in one call. The
+.BR cap_setgroups ()
+call raises the right effective capability for the duration of the
+call, and empties the effective capability set before returning.
.SH "RETURN VALUE"
The functions
.BR cap_get_proc ()
@@ -89,11 +205,11 @@ return a non-NULL value on success, and NULL on failure.
.PP
The function
.BR cap_get_bound ()
-returns -1 if the requested capability is unknown, otherwise the
+returns \-1 if the requested capability is unknown, otherwise the
return value reflects the current state of that capability in the
prevailing bounding set. Note, a macro function,
.PP
-The functions
+The all of the setting functions such as
.BR cap_set_proc ()
and
.BR cap_drop_bound ()
@@ -103,7 +219,7 @@ On failure,
.I errno
is set to
.BR EINVAL ,
-.BR EPERM,
+.BR EPERM ,
or
.BR ENOMEM .
.SH "CONFORMING TO"
@@ -114,6 +230,28 @@ are specified in the withdrawn POSIX.1e draft specification.
.BR cap_get_pid ()
is a Linux extension.
.SH "NOTES"
+Neither glibc, nor the Linux kernel honors POSIX semantics for setting
+capabilities and securebits in the presence of pthreads. That is,
+changing capability sets, by default, only affect the running
+thread. To be meaningfully secure, however, the capability sets should
+be mirrored by all threads within a common program because threads are
+not memory isolated. As a workaround for this,
+.B libcap
+is packaged with a separate POSIX semantics system call library:
+.BR libpsx .
+If your program uses POSIX threads, to achieve meaningful POSIX
+semantics capability manipulation, you should link your program with:
+.sp
+.B ld ... \-lcap \-lpsx \-lpthread \-\-wrap=pthread_create
+.sp
+or,
+.sp
+.B gcc ... \-lcap \-lpsx \-lpthread \-Wl,\-wrap,pthread_create
+.sp
+When linked this way, due to linker magic, libcap uses
+.BR psx_syscall "(3) and " psx_syscall6 (3)
+to perform state setting system calls.
+.SS capgetp() and capsetp()
The library also supports the deprecated functions:
.PP
.BI "int capgetp(pid_t " pid ", cap_t " cap_d );
@@ -123,47 +261,62 @@ The library also supports the deprecated functions:
.BR capgetp ()
attempts to obtain the capabilities of some other process; storing the
capabilities in a pre-allocated
-.IR cap_d . See
+.IR cap_d .
+See
.BR cap_init ()
-for information on allocating an empty capability set. This function,
-.BR capgetp (),
-is deprecated, you should use
+for information on allocating an empty capability set. This function
+is deprecated; you should use
.BR cap_get_pid ().
.PP
.BR capsetp ()
-attempts to set the capabilities of some other process(es),
-.IR pid .
+attempts to set the capabilities of the calling porcess or of
+some other process(es),
+.IR pid .
+Note that setting capabilities of another process is only possible on older
+kernels that do not provide VFS support for setting file capabilities.
+See
+.BR capset (2)
+for information on which kernels provide such support.
+.PP
If
.I pid
is positive it refers to a specific process; if it is zero, it refers
-to the current process; -1 refers to all processes other than the
-current process and process '1' (typically
+to the calling process; \-1 refers to all processes other than the
+calling process and process '1' (typically
.BR init (8));
other negative values refer to the
-.I -pid
-process group. In order to use this function, the kernel must support
-it and the current process must have
+.I \-pid
+process group.
+.PP
+In order to use this function, the kernel must support
+it and the calling process must have
.B CAP_SETPCAP
raised in its Effective capability set. The capabilities set in the
target process(es) are those contained in
.IR cap_d .
+.PP
Kernels that support filesystem capabilities redefine the semantics of
.B CAP_SETPCAP
-and on such systems this function will always fail for any target not
-equal to the current process.
+and on such systems,
+.BR capsetp ()
+will always fail for any target not
+equal to the calling process.
.BR capsetp ()
returns zero for success, and \-1 on failure.
-
-Where supported by the kernel, the function
+.PP
+On kernels where it is (was) supported,
.BR capsetp ()
should be used with care. It existed, primarily, to overcome an early
lack of support for capabilities in the filesystems supported by
-Linux. Note that, by default, the only processes that have
+Linux. Note that on older kernels where
+.BR capsetp ()
+could be used to set the capabilities of another process,
+the only processes that had
.B CAP_SETPCAP
-available to them are processes started as a kernel thread.
+available to them by default were processes started as kernel threads.
(Typically this includes
.BR init (8),
-kflushd and kswapd). You will need to recompile the kernel to modify
+kflushd and kswapd.) A kernel recompilation was needed to modify
this default.
.SH EXAMPLE
The code segment below raises the
@@ -173,8 +326,9 @@ and
effective capabilities for the caller:
.nf
+ ...
cap_t caps;
- cap_value_t cap_list[2];
+ const cap_value_t cap_list[2] = {CAP_FOWNER, CAP_SETFCAP};
if (!CAP_IS_SUPPORTED(CAP_SETFCAP))
/* handle error */
@@ -183,22 +337,58 @@ effective capabilities for the caller:
if (caps == NULL)
/* handle error */;
- cap_list[0] = CAP_FOWNER;
- cap_list[1] = CAP_SETFCAP;
- if (cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_list, CAP_SET) == -1)
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_list, CAP_SET) == \-1)
+ /* handle error */;
+
+ if (cap_set_proc(caps) == \-1)
/* handle error */;
- if (cap_set_proc(caps) == -1)
+ if (cap_free(caps) == \-1)
/* handle error */;
+ ...
+
+.fi
+Alternatively, to completely drop privilege in a program launched
+setuid-root but wanting to run as a specific user ID etc. in such a
+way that neither it, nor any of its children can acquire privilege
+again:
+.nf
+
+ ...
+ uid_t nobody = 65534;
+ const gid_t groups[] = {65534};
- if (cap_free(caps) == -1)
+ if (cap_setgroups(groups[0], 1, groups) != 0)
/* handle error */;
+ if (cap_setuid(nobody) != 0)
+ /* handle error */;
+
+ /*
+ * privilege is still available here
+ */
+
+ if (cap_set_mode(CAP_MODE_NOPRIV) != 0)
+ /* handle error */
+ ...
+
.fi
+Note, the above sequence can be performed by the
+.B capsh
+tool as follows:
+.sp
+.B sudo /sbin/capsh \-\-user=nobody \-\-mode=NOPRIV \-\-print
+.sp
+where
+.B \-\-print
+displays the resulting privilege state.
.SH "SEE ALSO"
.BR libcap (3),
+.BR libpsx (3),
+.BR capsh (1),
.BR cap_clear (3),
.BR cap_copy_ext (3),
.BR cap_from_text (3),
.BR cap_get_file (3),
.BR cap_init (3),
-.BR capabilities (7)
+.BR psx_syscall (3),
+.BR capabilities (7).
diff --git a/doc/cap_get_secbits.3 b/doc/cap_get_secbits.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_secbits.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_init.3 b/doc/cap_init.3
index f198f63..362db66 100644
--- a/doc/cap_init.3
+++ b/doc/cap_init.3
@@ -13,7 +13,7 @@ cap_init, cap_free, cap_dup \- capability data object storage management
.sp
.BI "cap_t cap_dup(cap_t " cap_p );
.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
.SH DESCRIPTION
The capabilities associated with a file or process are never edited
directly. Instead, working storage is allocated to contain a
@@ -41,7 +41,7 @@ The
argument may identify either a
.I cap_t
entity, or a
-.I char *
+.I "char\ *"
entity allocated by the
.BR cap_to_text ()
function.
diff --git a/doc/cap_mode_name.3 b/doc/cap_mode_name.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_mode_name.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_reset_ambient.3 b/doc/cap_reset_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_reset_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_ambient.3 b/doc/cap_set_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_mode.3 b/doc/cap_set_mode.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_mode.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_secbits.3 b/doc/cap_set_secbits.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_secbits.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_setgroups.3 b/doc/cap_setgroups.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_setgroups.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_setuid.3 b/doc/cap_setuid.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_setuid.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/capsh.1 b/doc/capsh.1
index e68df2c..ab20c44 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -1,7 +1,4 @@
-.\"
-.\" capsh.1 Man page added 2009-12-23 Andrew G. Morgan <morgan@kernel.org>
-.\"
-.TH CAPSH 1 "2011-04-24" "libcap 2" "User Commands"
+.TH CAPSH 1 "2020-10-27" "libcap 2" "User Commands"
.SH NAME
capsh \- capability shell wrapper
.SH SYNOPSIS
@@ -13,27 +10,40 @@ this tool. This tool provides a handy wrapper for certain types
of capability testing and environment creation. It also provides some
debugging features useful for summarizing capability state.
.SH OPTIONS
-The tool takes a number of optional arguments, acting on them in the
+.B capsh
+takes a number of optional arguments, acting on them in the
order they are provided. They are as follows:
-.TP 22
-.B --print
+.TP
+.B \-\-help
+Display the list of commands supported by
+.BR capsh .
+.TP
+.B \-\-print
Display prevailing capability and related state.
.TP
-.BI -- " [args]"
+.BI \-\- " [args]"
Execute
.B /bin/bash
with trailing arguments. Note, you can use
-.B -c 'command to execute'
+.B \-c 'command to execute'
for specific commands.
.TP
.B ==
Execute
.B capsh
-again with remaining arguments. Useful for testing
+again with the remaining arguments. Useful for testing
.BR exec ()
-behavior.
+behavior. Note, PATH is searched when the running
+.B capsh
+was found via the shell's PATH searching. If the
+.B exec
+occurs after a
+.BI \-\-chroot= /some/path
+argument the PATH located binary may not be resolve to the same binary
+as that running initially. This behavior is an intented feature as it
+can complete the chroot transition.
.TP
-.BI --caps= cap-set
+.BI \-\-caps= cap-set
Set the prevailing process capabilities to those specified by
.IR cap-set .
Where
@@ -41,70 +51,115 @@ Where
is a text-representation of capability state as per
.BR cap_from_text (3).
.TP
-.BI --drop= cap-list
+.BI \-\-drop= cap-list
Remove the listed capabilities from the prevailing bounding set. The
-capabilites are a comma separated list of capabilities as recognized
+capabilities are a comma-separated list of capabilities as recognized
by the
.BR cap_from_name (3)
-function. Use of this feature requires that the capsh program is
-operating with
+function. Use of this feature requires that
+.B capsh
+is operating with
.B CAP_SETPCAP
in its effective set.
.TP
-.BI --inh= cap-list
+.BI \-\-inh= cap-list
Set the inheritable set of capabilities for the current process to
equal those provided in the comma separated list. For this action to
succeed, the prevailing process should already have each of these
capabilities in the union of the current inheritable and permitted
-capability sets, or the capsh program is operating with
+capability sets, or
+.B capsh
+should be operating with
.B CAP_SETPCAP
in its effective set.
.TP
-.BI --user= username
+.BI \-\-user= username
Assume the identity of the named user. That is, look up the user's
-.IR uid " and " gid
+UID and GID
with
.BR getpwuid (3)
and their group memberships with
.BR getgrouplist (3)
-and set them all.
+and set them all using
+.BR cap_setuid (3)
+and
+.BR cap_setgroups (3).
+Following this command, the effective capabilities will be cleared,
+but the permitted set will not be, so the running program is still
+privileged.
.TP
-.BI --uid= id
+.B \-\-modes
+Lists all of the libcap modes supported by
+.BR \-\-mode .
+.TP
+.BR \-\-mode= <mode>
+Force the program into a
+.BR cap_set_mode (3)
+security mode. This is a set of securebits and prevailing capability
+arrangement recommended for its pre-determined security stance.
+.TP
+.BR \-\-inmode= <mode>
+Confirm that the prevailing mode is that specified in
+.IR <mode> ,
+or exit with a status 1.
+.TP
+.BI \-\-uid= id
Force all
-.B uid
+UID
values to equal
.I id
using the
.BR setuid (2)
-system call.
+system call. This argument may require explicit preparation of the
+effective set.
.TP
-.BI --gid= <id>
+.BR \-\-cap\-uid= <uid>
+use the
+.BR cap_setuid (3)
+function to set the UID of the current process. This performs all
+preparations for setting the UID without dropping capabilities in the
+process. Following this command the prevailing effective capabilities
+will be lowered.
+.TP
+.BI \-\-is\-uid= <id>
+Exit with status 1 unless the current
+UID equals
+.IR <id> .
+.TP
+.BI \-\-gid= <id>
Force all
-.B gid
+GID
values to equal
.I id
using the
.BR setgid (2)
system call.
.TP
-.BI --groups= <id-list>
+.BI \-\-is\-gid= <id>
+Exit with status 1 unless the current
+GIQ equals
+.IR <id> .
+.TP
+.BI \-\-groups= <gid-list>
Set the supplementary groups to the numerical list provided. The
groups are set with the
.BR setgroups (2)
-system call.
+system call. See
+.B \-\-user
+for a more convenient way of doing this.
.TP
-.BI --keep= <0|1>
+.BI \-\-keep= <0|1>
In a non-pure capability mode, the kernel provides liberal privilege
to the super-user. However, it is normally the case that when the
super-user changes
-.I uid
+UID
to some lesser user, then capabilities are dropped. For these
situations, the kernel can permit the process to retain its
capabilities after a
.BR setuid (2)
system call. This feature is known as
.I keep-caps
-support. The way to activate it using this script is with this
+support. The way to activate it using this program is with this
argument. Setting the value to 1 will cause
.I keep-caps
to be active. Setting it to 0 will cause keep-caps to deactivate for
@@ -113,13 +168,26 @@ the current process. In all cases,
is deactivated when an
.BR exec ()
is performed. See
-.B --secbits
+.B \-\-secbits
for ways to disable this feature.
.TP
-.BI --secbits= N
-XXX - need to document this feature.
+.BI \-\-secbits= N
+Set the security-bits for the program.
+This is done using the
+.BR prctl (2)
+.B PR_SET_SECUREBITS
+operation.
+The list of supported bits and their meaning can be found in
+the
+.B <sys/secbits.h>
+header file. The program will list these bits via the
+.B \-\-print
+command.
+The argument is expressed as a numeric bitmask,
+in any of the formats permitted by
+.BR strtoul (3).
.TP
-.BI --chroot= path
+.BI \-\-chroot= /some/path
Execute the
.BR chroot (2)
system call with the new root-directory (/) equal to
@@ -128,46 +196,100 @@ This operation requires
.B CAP_SYS_CHROOT
to be in effect.
.TP
-.BI --forkfor= sec
+.BI \-\-forkfor= sec
+This command causes the program to fork a child process for so many
+seconds. The child will sleep that long and then exit with status
+0. The purpose of this command is to support exploring the way
+processes are killable in the face of capability changes. See the
+.B \-\-killit
+command. Only one fork can be active at a time.
.TP
-.BI --killit= sig
+.BI \-\-killit= sig
+This commands causes a
+.B \-\-forkfor
+child to be
+.BR kill (2)d
+with the specified signal. The command then waits for the child to exit.
+If the exit status does not match the signal being used to kill it, the
+.B capsh
+program exits with status 1.
.TP
-.BI --decode= N
+.BI \-\-decode= N
This is a convenience feature. If you look at
.B /proc/1/status
there are some capability related fields of the following form:
+.nf
- CapInh: 0000000000000000
- CapPrm: ffffffffffffffff
- CapEff: fffffffffffffeff
- CapBnd: ffffffffffffffff
+CapInh: 0000000000000000
+CapPrm: 0000003fffffffff
+CapEff: 0000003fffffffff
+CapBnd: 0000003fffffffff
+CapAmb: 0000000000000000
+.fi
This option provides a quick way to decode a capability vector
-represented in this form. For example, the missing capability from
-this effective set is 0x0100. By running:
-
- capsh --decode=0x0100
-
-we observe that the missing capability is:
-.BR cap_setpcap .
+represented in this hexadecimal form.
+Here's an example that decodes the two lowest capability bits:
+.IP
+.nf
+$ \fBcapsh \-\-decode=3\fP
+0x0000000000000003=cap_chown,cap_dac_override
+.fi
.TP
-.BI --supports= xxx
+.BI \-\-supports= xxx
As the kernel evolves, more capabilities are added. This option can be used
to verify the existence of a capability on the system. For example,
-.BI --supports= cap_syslog
-will cause capsh to promptly exit with a status of 1 when run on
+.BI \-\-supports= cap_syslog
+will cause
+.B capsh
+to promptly exit with a status of 1 when run on
kernel 2.6.27. However, when run on kernel 2.6.38 it will silently
succeed.
.TP
+.BI \-\-has\-p= xxx
+Exit with status 1 unless the
+.I permitted
+vector has capability
+.B xxx
+raised.
+.TP
+.B \-\-has\-ambient
+Performs a check to see if the running kernel supports ambient
+capabilities. If not,
+.B capsh
+exits with status 1.
+.TP
+.BI \-\-has\-a= xxx
+Exit with status 1 unless the
+.I ambient
+vector has capability
+.B xxx
+raised.
+.TP
+.BI \-\-addamb= xxx
+Adds the specified ambient capability to the running process.
+.TP
+.BI \-\-delamb= xxx
+Removes the specified ambient capability from the running process.
+.TP
+.B \-\-noamb
+Drops all ambient capabilities from the running process.
.SH "EXIT STATUS"
-Following successful execution the tool exits with status 0. Following
-an error, the tool immediately exits with status 1.
+Following successful execution,
+.B capsh
+exits with status 0. Following
+an error,
+.B capsh
+immediately exits with status 1.
.SH AUTHOR
Written by Andrew G. Morgan <morgan@kernel.org>.
.SH "REPORTING BUGS"
-Please report bugs to the author.
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1047723&product=Tools&resolution=---
.SH "SEE ALSO"
.BR libcap (3),
-.BR getcap (8), setcap (8)
+.BR getcap (8),
+.BR setcap (8)
and
.BR capabilities (7).
diff --git a/doc/getcap.8 b/doc/getcap.8
index 1d5ac0f..04b601c 100644
--- a/doc/getcap.8
+++ b/doc/getcap.8
@@ -1,29 +1,37 @@
-.\"
-.\" $Id: getcap.8,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
.\" written by Andrew Main <zefram@dcs.warwick.ac.uk>
-.\"
-.TH GETCAP 8 "12 Nov 2007"
+.TH GETCAP 8 "2020-01-07"
.SH NAME
getcap \- examine file capabilities
.SH SYNOPSIS
-\fBgetcap\fP [-v] [-r] [-h] \fIfilename\fP [ ... ]
+\fBgetcap\fP [\-v] [\-n] [\-r] [\-h] \fIfilename\fP [ ... ]
.SH DESCRIPTION
.B getcap
-displays the name and capabilities of each specified
+displays the name and capabilities of each specified file.
.SH OPTIONS
.TP 4
-.B -r
-enables recursive search.
+.B \-h
+prints quick usage.
.TP 4
-.B -v
-enables to display all searched entries, even if it has no file-capabilities.
+.B \-n
+prints any non-zero user namespace root user ID value
+found to be associated with
+a file's capabilities.
.TP 4
-.B -h
-prints quick usage.
+.B \-r
+enables recursive search.
+.TP 4
+.B \-v
+display all searched entries, even if the have no file-capabilities.
.TP 4
.IR filename
One file per line.
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1047723&product=Tools&resolution=---
.SH "SEE ALSO"
.BR cap_get_file (3),
.BR cap_to_text (3),
+.BR capabilities (7),
+.BR user_namespaces (7),
.BR setcap (8)
diff --git a/doc/getpcaps.8 b/doc/getpcaps.8
new file mode 100644
index 0000000..d519357
--- /dev/null
+++ b/doc/getpcaps.8
@@ -0,0 +1,45 @@
+.\" Hey, EMACS: -*- nroff -*-
+.TH GETPCAPS 8 "2020-01-04"
+.\" Please adjust this date whenever revising the manpage.
+.SH NAME
+getpcaps \- display process capabilities
+.SH SYNOPSIS
+.BR getpcaps " [optional args]"
+.IR pid ...
+.SH DESCRIPTION
+.B getpcaps
+displays the capabilities on the processes indicated by the
+.I pid
+value(s) given on the command line.
+A
+.I pid
+of 0 displays the capabilities of the process that is running
+.B getpcaps
+itself.
+.PP
+The capabilities are displayed in
+the
+.BR cap_from_text (3)
+format.
+.PP
+Optional arguments:
+.TP
+.BR \-\-help " or " \-\-usage
+Displays usage information and exits.
+.TP
+.BR \-\-ugly " or " \-\-legacy
+Displays output in a somewhat ugly legacy format.
+.TP
+.B \-\-verbose
+Displays usage in a legacy-like format but not quite so ugly in modern
+default terminal fonts.
+.SH SEE ALSO
+.BR capsh (1),
+.BR capabilities (7),
+.BR getcap (8),
+.BR setcap (8)
+.SH AUTHOR
+This manual page was originally written by Robert Bihlmeyer
+<robbe@debian.org>, for the Debian GNU/Linux system (but may be used
+by others).
+
diff --git a/doc/libcap.3 b/doc/libcap.3
index 0e76b4e..730e275 100644
--- a/doc/libcap.3
+++ b/doc/libcap.3
@@ -1,4 +1,4 @@
-.TH LIBCAP 3 "2008-07-29" "" "Linux Programmer's Manual"
+.TH LIBCAP 3 "2020-01-07" "" "Linux Programmer's Manual"
.SH NAME
cap_clear, cap_clear_flag, cap_compare, cap_copy_ext, cap_copy_int, \
cap_free, cap_from_name, cap_from_text, cap_get_fd, cap_get_file, \
@@ -56,7 +56,7 @@ cap_get_pid, cap_dup \- capability data object manipulation
.sp
.BI "cap_t cap_dup(cap_t " cap_p );
.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
.fi
.SH DESCRIPTION
These functions work on a capability state held in working storage.
@@ -97,11 +97,14 @@ is set appropriately.
These functions are as per the withdrawn POSIX.1e draft specification.
The following functions are Linux extensions:
.BR cap_clear_flag (),
-.BR cap_compare (),
.BR cap_from_name (),
.BR cap_to_name (),
and
.BR cap_compare ().
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1047723&product=Tools&resolution=---
.SH "SEE ALSO"
.BR cap_clear (3),
.BR cap_copy_ext (3),
diff --git a/doc/libpsx.3 b/doc/libpsx.3
new file mode 100644
index 0000000..61baa88
--- /dev/null
+++ b/doc/libpsx.3
@@ -0,0 +1,89 @@
+.TH LIBPSX 3 "2021-01-31" "" "Linux Programmer's Manual"
+.SH NAME
+psx_syscall3, psx_syscall6 \- POSIX semantics for system calls
+.SH SYNOPSIS
+.nf
+.B #include <sys/psx_syscall.h>
+.sp
+.BI "long int psx_syscall3(long int" " syscall_nr, " "long int" " arg1, " "long int" " arg2, " "long int" " arg3);"
+.sp
+.BI "long int psx_syscall6(long int" " syscall_nr, " "long int" " arg1, " "long int" " arg2, " "long int" " arg3, " "long int" " arg4, " "long int" " arg5, " "long int" " arg6);"
+.sp
+Link with one of these:
+.sp
+.I ld ... \-lpsx \-lpthread \-\-wrap=pthread_create
+.sp
+.I gcc ... \-lpsx \-lpthread \-Wl,\-wrap,pthread_create
+.SH DESCRIPTION
+The
+.B libpsx
+library attempts to fill a gap left by the
+.BR pthreads (7)
+implementation on Linux. To be compliant POSIX threads, via the
+.BR nptl "(7) " setxid
+mechanism glibc maintains consistent UID and GID credentials amongst
+all of the threads associated with the current process. However, other
+credential state is not supported by this abstraction. To support
+these extended kernel managed security attributes,
+.B libpsx
+provides a more generic pair of wrapping system call functions:
+.BR psx_syscall3 "() and " psx_syscall6 ().
+Like the
+.B setxid
+mechanism, the coordination of thread state is mediated by a realtime
+signal. Whereas the
+.B nptl:setxid
+mechanism uses signo=33 (which is hidden by glibc below a redefined
+SIGRTMIN),
+.B libpsx
+inserts itself in the SIGSYS handler stack. It goes to great length to
+be the first such handler but acts as a pass-through for other SIGSYS
+uses.
+.PP
+A linker trick of
+.I wrapping
+the
+.BR pthread_create ()
+call with a psx thread registration function is used to ensure
+.B libpsx
+can keep track of all pthreads.
+.PP
+An inefficient macrology trick supports the
+.BR psx_syscall ()
+pseudo function which takes 1 to 7 arguments, depending on the needs
+of the caller. The macrology pads out the call to actually use
+.BR psx_syscall3 ()
+or
+.BR psx_syscall6 ()
+with zeros filling the missing arguments. While using this in source
+code will make it appear clean, the actual code footprint is
+larger. You are encouraged to use the more explicit
+.BR psx_syscall3 ()
+and
+.BR psx_syscall6 ()
+functions as needed.
+.SH RETURN VALUE
+The return value for system call functions is generally the value
+returned by the kernel, or \-1 in the case of an error. In such cases
+.BR errno (3)
+is set to the detailed error value. The
+.BR psx_syscall3 "() and " psx_syscall6 ()
+functions attempt a single threaded system call and return immediately
+in the case of an error. Should this call succeed, then the same
+system calls are executed from a signal handler on each of the other
+threads of the process.
+.SH CONFORMING TO
+The needs of
+.BR libcap (3)
+for POSIX semantics of capability manipulation. You can read more
+about why this is needed here:
+.TP
+https://sites.google.com/site/fullycapable/who-ordered-libpsx
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1047723&product=Tools&resolution=---
+.SH SEE ALSO
+.BR libcap (3),
+.BR pthreads "(7) and"
+.BR nptl (7).
diff --git a/doc/md2html.lua b/doc/md2html.lua
new file mode 100644
index 0000000..c2677d8
--- /dev/null
+++ b/doc/md2html.lua
@@ -0,0 +1,6 @@
+-- This is the links-to-html.lua example from stackoverflow:
+-- https://stackoverflow.com/questions/40993488/convert-markdown-links-to-html-with-pandoc
+function Link(el)
+ el.target = string.gsub(el.target, "%.md", ".html")
+ return el
+end
diff --git a/doc/mkmd.sh b/doc/mkmd.sh
new file mode 100755
index 0000000..af835d5
--- /dev/null
+++ b/doc/mkmd.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+#
+# Handy script to rebuild the markdown version of the man pages.
+# This uses pandoc if it is installed.
+#
+# For rendering the md, we can use a different command:
+#
+# cd md; for x in *.md ; do pandoc -s $x --metadata pagetitle="${x%.md}" -o ${x%.md}.html --lua-filter=../md2html.lua ; done
+
+if [[ -z "$(which pandoc)" ]]; then
+ echo "pandoc not found - skipping conversion"
+ exit 0
+fi
+
+outdir="$1"
+if [[ -z "${outdir}" ]]; then
+ echo "usage $0 <outdir>"
+ exit 1
+fi
+
+mkdir -p "${outdir}"
+if [[ $? -ne 0 ]]; then
+ echo "failed to make output directory: ${outdir}"
+ exit 1
+fi
+
+index="${outdir}/index.md"
+
+function do_page () {
+ m="$1"
+ base="${m%.*}"
+ sect="${m#*.}"
+ output="${base}-${sect}.md"
+
+ redir="$(grep '^.so man' "${m}")"
+ if [[ $? -eq 0 ]]; then
+ r="${redir#*/}"
+ rbase="${r%.*}"
+ rsect="${r#*.}"
+ echo "* [${base}(${sect})](${rbase}-${rsect}.md)" >> "${index}"
+ return
+ fi
+
+ pandoc -f man -t markdown < "${m}" | sed 's/\*\*\([^*]\+\)\*\*(\([138]\+\))/[\1(\2)](\1-\2.md)/g' > "${outdir}/${base}-${sect}.md"
+ echo "* [${base}(${sect})](${base}-${sect}.md)" >> "${index}"
+}
+
+cat > "${index}" <<EOF
+# Manpages for libcap and libpsx
+
+## Individual reference pages
+EOF
+
+# Assumes the m's are listed alphabetically.
+for n in 1 3 8 ; do
+ cat >> "${index}" <<EOF
+
+### Section ${n}
+
+EOF
+ for m in *.${n}; do
+ do_page "${m}"
+ done
+done
+
+cat >> "${index}" <<EOF
+
+## More information
+
+For further information, see the
+[FullyCapable](https://sites.google.com/site/fullycapable/) homepage
+for libcap.
+
+## MD page generation
+
+These official man pages for libcap and libpsx were converted to
+markdown using [pandoc](https://pandoc.org).
+
+EOF
diff --git a/doc/psx_syscall.3 b/doc/psx_syscall.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_syscall3.3 b/doc/psx_syscall3.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall3.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_syscall6.3 b/doc/psx_syscall6.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall6.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/setcap.8 b/doc/setcap.8
index 9344ba5..463752d 100644
--- a/doc/setcap.8
+++ b/doc/setcap.8
@@ -1,49 +1,63 @@
-.\"
-.\" $Id: setcap.8,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
-.\"
-.TH SETCAP 8 "24th October 2008"
+.TH SETCAP 8 "2020-01-07"
.SH NAME
setcap \- set file capabilities
.SH SYNOPSIS
-\fBsetcap\fP [-q] [-v] (\fIcapabilities|-|-r) filename\fP [ ... \fIcapabilitiesN\fP \fIfileN\fP ]
+\fBsetcap\fP [\-q] [\-n <rootuid>] [\-v] {\fIcapabilities|\-|\-r} filename\fP [ ... \fIcapabilitiesN\fP \fIfileN\fP ]
.SH DESCRIPTION
In the absence of the
-.B -v
+.B \-v
(verify) option
.B setcap
sets the capabilities of each specified
.I filename
to the
.I capabilities
-specified. The
-.B -v
+specified. The optional
+.B \-n <rootuid>
+argument can be used to set the file capability for use only in a
+user namespace with this root user ID owner. The
+.B \-v
option is used to verify that the specified capabilities are currently
-associated with the file.
+associated with the file. If \-v and \-n are supplied, the
+.B \-n <rootuid>
+argument is also verified.
.PP
The
.I capabilities
are specified in the form described in
-.IR cap_from_text (3).
+.BR cap_from_text (3).
.PP
The special capability string,
-.BR '-' ,
+.BR '\-' ,
can be used to indicate that capabilities are read from the standard
input. In such cases, the capability set is terminated with a blank
line.
.PP
The special capability string,
-.BR '-r' ,
-is used to remove a capability set from a file.
+.BR '\-r' ,
+is used to remove a capability set from a file. Note, setting an empty
+capability set is
+.B not the same
+as removing it. An empty set can be used to guarantee a file is not
+executed with privilege in spite of the fact that the prevailing
+ambient+inheritable sets would otherwise bestow capabilities on
+executed binaries.
.PP
The
-.B -q
+.B \-q
flag is used to make the program less verbose in its output.
.SH "EXIT CODE"
The
.B setcap
program will exit with a 0 exit code if successful. On failure, the
exit code is 1.
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1047723&product=Tools&resolution=---
.SH "SEE ALSO"
.BR cap_from_text (3),
-.BR cap_set_file (3),
+.BR cap_get_file (3),
+.BR capabilities (7),
+.BR user_namespaces (7),
.BR getcap (8)
diff --git a/doc/values/0.txt b/doc/values/0.txt
new file mode 100644
index 0000000..dd2f360
--- /dev/null
+++ b/doc/values/0.txt
@@ -0,0 +1,2 @@
+Allows a process to arbitrarily change the user and
+group ownership of a file.
diff --git a/doc/values/1.txt b/doc/values/1.txt
new file mode 100644
index 0000000..a0e7f72
--- /dev/null
+++ b/doc/values/1.txt
@@ -0,0 +1,5 @@
+Allows a process to override of all Discretionary
+Access Control (DAC) access, including ACL execute
+access. That is read, write or execute files that the
+process would otherwise not have access to. This
+excludes DAC access covered by CAP_LINUX_IMMUTABLE.
diff --git a/doc/values/10.txt b/doc/values/10.txt
new file mode 100644
index 0000000..8335a6b
--- /dev/null
+++ b/doc/values/10.txt
@@ -0,0 +1,3 @@
+Allows a process to bind to privileged ports:
+ - TCP/UDP sockets below 1024
+ - ATM VCIs below 32
diff --git a/doc/values/11.txt b/doc/values/11.txt
new file mode 100644
index 0000000..6f63994
--- /dev/null
+++ b/doc/values/11.txt
@@ -0,0 +1,2 @@
+Allows a process to broadcast to the network and to
+listen to multicast.
diff --git a/doc/values/12.txt b/doc/values/12.txt
new file mode 100644
index 0000000..f4dc172
--- /dev/null
+++ b/doc/values/12.txt
@@ -0,0 +1,17 @@
+Allows a process to perform network configuration
+operations:
+ - interface configuration
+ - administration of IP firewall, masquerading and
+ accounting
+ - setting debug options on sockets
+ - modification of routing tables
+ - setting arbitrary process, and process group
+ ownership on sockets
+ - binding to any address for transparent proxying
+ (this is also allowed via CAP_NET_RAW)
+ - setting TOS (Type of service)
+ - setting promiscuous mode
+ - clearing driver statistics
+ - multicasing
+ - read/write of device-specific registers
+ - activation of ATM control sockets
diff --git a/doc/values/13.txt b/doc/values/13.txt
new file mode 100644
index 0000000..7a1faf7
--- /dev/null
+++ b/doc/values/13.txt
@@ -0,0 +1,5 @@
+Allows a process to use raw networking:
+ - RAW sockets
+ - PACKET sockets
+ - binding to any address for transparent proxying
+ (also permitted via CAP_NET_ADMIN)
diff --git a/doc/values/14.txt b/doc/values/14.txt
new file mode 100644
index 0000000..1f248d6
--- /dev/null
+++ b/doc/values/14.txt
@@ -0,0 +1,3 @@
+Allows a process to lock shared memory segments for IPC
+purposes. Also enables mlock and mlockall system
+calls.
diff --git a/doc/values/15.txt b/doc/values/15.txt
new file mode 100644
index 0000000..0f5e13c
--- /dev/null
+++ b/doc/values/15.txt
@@ -0,0 +1 @@
+Allows a process to override IPC ownership checks.
diff --git a/doc/values/16.txt b/doc/values/16.txt
new file mode 100644
index 0000000..03373b0
--- /dev/null
+++ b/doc/values/16.txt
@@ -0,0 +1,3 @@
+Allows a process to initiate the loading and unloading
+of kernel modules. This capability can effectively
+modify kernel without limit.
diff --git a/doc/values/17.txt b/doc/values/17.txt
new file mode 100644
index 0000000..79474af
--- /dev/null
+++ b/doc/values/17.txt
@@ -0,0 +1,4 @@
+Allows a process to perform raw IO:
+ - permit ioper/iopl access
+ - permit sending USB messages to any device via
+ /dev/bus/usb
diff --git a/doc/values/18.txt b/doc/values/18.txt
new file mode 100644
index 0000000..2ee0e2a
--- /dev/null
+++ b/doc/values/18.txt
@@ -0,0 +1,3 @@
+Allows a process to perform a chroot syscall to change
+the effective root of the process' file system:
+redirect to directory "/" to some other location.
diff --git a/doc/values/19.txt b/doc/values/19.txt
new file mode 100644
index 0000000..2861571
--- /dev/null
+++ b/doc/values/19.txt
@@ -0,0 +1,2 @@
+Allows a process to perform a ptrace() of any other
+process.
diff --git a/doc/values/2.txt b/doc/values/2.txt
new file mode 100644
index 0000000..99f0031
--- /dev/null
+++ b/doc/values/2.txt
@@ -0,0 +1,4 @@
+Allows a process to override all DAC restrictions
+limiting the read and search of files and
+directories. This excludes DAC access covered by
+CAP_LINUX_IMMUTABLE.
diff --git a/doc/values/20.txt b/doc/values/20.txt
new file mode 100644
index 0000000..3f5796f
--- /dev/null
+++ b/doc/values/20.txt
@@ -0,0 +1 @@
+Allows a process to configure process accounting.
diff --git a/doc/values/21.txt b/doc/values/21.txt
new file mode 100644
index 0000000..4cff57d
--- /dev/null
+++ b/doc/values/21.txt
@@ -0,0 +1,43 @@
+Allows a process to perform a somewhat arbitrary
+grab-bag of privileged operations. Over time, this
+capability should weaken as specific capabilities are
+created for subsets of CAP_SYS_ADMINs functionality:
+ - configuration of the secure attention key
+ - administration of the random device
+ - examination and configuration of disk quotas
+ - setting the domainname
+ - setting the hostname
+ - calling bdflush()
+ - mount() and umount(), setting up new SMB connection
+ - some autofs root ioctls
+ - nfsservctl
+ - VM86_REQUEST_IRQ
+ - to read/write pci config on alpha
+ - irix_prctl on mips (setstacksize)
+ - flushing all cache on m68k (sys_cacheflush)
+ - removing semaphores
+ - Used instead of CAP_CHOWN to "chown" IPC message
+ queues, semaphores and shared memory
+ - locking/unlocking of shared memory segment
+ - turning swap on/off
+ - forged pids on socket credentials passing
+ - setting readahead and flushing buffers on block
+ devices
+ - setting geometry in floppy driver
+ - turning DMA on/off in xd driver
+ - administration of md devices (mostly the above, but
+ some extra ioctls)
+ - tuning the ide driver
+ - access to the nvram device
+ - administration of apm_bios, serial and bttv (TV)
+ device
+ - manufacturer commands in isdn CAPI support driver
+ - reading non-standardized portions of PCI
+ configuration space
+ - DDI debug ioctl on sbpcd driver
+ - setting up serial ports
+ - sending raw qic-117 commands
+ - enabling/disabling tagged queuing on SCSI
+ controllers and sending arbitrary SCSI commands
+ - setting encryption key on loopback filesystem
+ - setting zone reclaim policy
diff --git a/doc/values/22.txt b/doc/values/22.txt
new file mode 100644
index 0000000..9380ceb
--- /dev/null
+++ b/doc/values/22.txt
@@ -0,0 +1 @@
+Allows a process to initiate a reboot of the system.
diff --git a/doc/values/23.txt b/doc/values/23.txt
new file mode 100644
index 0000000..c5a0360
--- /dev/null
+++ b/doc/values/23.txt
@@ -0,0 +1,6 @@
+Allows a process to maipulate the execution priorities
+of arbitrary processes:
+ - those involving different UIDs
+ - setting their CPU affinity
+ - alter the FIFO vs. round-robin (realtime)
+ scheduling for itself and other processes.
diff --git a/doc/values/24.txt b/doc/values/24.txt
new file mode 100644
index 0000000..bb3bac7
--- /dev/null
+++ b/doc/values/24.txt
@@ -0,0 +1,16 @@
+Allows a process to adjust resource related parameters
+of processes and the system:
+ - set and override resource limits
+ - override quota limits
+ - override the reserved space on ext2 filesystem
+ (this can also be achieved via CAP_FSETID)
+ - modify the data journaling mode on ext3 filesystem,
+ which uses journaling resources
+ - override size restrictions on IPC message queues
+ - configure more than 64Hz interrupts from the
+ real-time clock
+ - override the maximum number of consoles for console
+ allocation
+ - override the maximum number of keymaps
+
+
diff --git a/doc/values/25.txt b/doc/values/25.txt
new file mode 100644
index 0000000..95fd513
--- /dev/null
+++ b/doc/values/25.txt
@@ -0,0 +1,4 @@
+Allows a process to perform time manipulation of clocks:
+ - alter the system clock
+ - enable irix_stime on MIPS
+ - set the real-time clock
diff --git a/doc/values/26.txt b/doc/values/26.txt
new file mode 100644
index 0000000..ee446ba
--- /dev/null
+++ b/doc/values/26.txt
@@ -0,0 +1,3 @@
+Allows a process to manipulate tty devices:
+ - configure tty devices
+ - perform vhangup() of a tty
diff --git a/doc/values/27.txt b/doc/values/27.txt
new file mode 100644
index 0000000..0894164
--- /dev/null
+++ b/doc/values/27.txt
@@ -0,0 +1,2 @@
+Allows a process to perform privileged operations with
+the mknod() system call.
diff --git a/doc/values/28.txt b/doc/values/28.txt
new file mode 100644
index 0000000..fd0b6b9
--- /dev/null
+++ b/doc/values/28.txt
@@ -0,0 +1 @@
+Allows a process to take leases on files.
diff --git a/doc/values/29.txt b/doc/values/29.txt
new file mode 100644
index 0000000..ca1fdb8
--- /dev/null
+++ b/doc/values/29.txt
@@ -0,0 +1,2 @@
+Allows a process to write to the audit log via a
+unicast netlink socket.
diff --git a/doc/values/3.txt b/doc/values/3.txt
new file mode 100644
index 0000000..8a605c2
--- /dev/null
+++ b/doc/values/3.txt
@@ -0,0 +1,4 @@
+Allows a process to perform operations on files, even
+where file owner ID should otherwise need be equal to
+the UID, except where CAP_FSETID is applicable. It
+doesn't override MAC and DAC restrictions.
diff --git a/doc/values/30.txt b/doc/values/30.txt
new file mode 100644
index 0000000..d1ef942
--- /dev/null
+++ b/doc/values/30.txt
@@ -0,0 +1,2 @@
+Allows a process to configure audit logging via a
+unicast netlink socket.
diff --git a/doc/values/31.txt b/doc/values/31.txt
new file mode 100644
index 0000000..163b048
--- /dev/null
+++ b/doc/values/31.txt
@@ -0,0 +1 @@
+Allows a process to set capabilities on files.
diff --git a/doc/values/32.txt b/doc/values/32.txt
new file mode 100644
index 0000000..9c261d8
--- /dev/null
+++ b/doc/values/32.txt
@@ -0,0 +1,4 @@
+Allows a process to override Manditory Access Control
+(MAC) access. Not all kernels are configured with a MAC
+mechanism, but this is the capability reserved for
+overriding them.
diff --git a/doc/values/33.txt b/doc/values/33.txt
new file mode 100644
index 0000000..a4e441e
--- /dev/null
+++ b/doc/values/33.txt
@@ -0,0 +1,4 @@
+Allows a process to configure the Mandatory Access
+Control (MAC) policy. Not all kernels are configured
+with a MAC enabled, but if they are this capability is
+reserved for code to perform administration tasks.
diff --git a/doc/values/34.txt b/doc/values/34.txt
new file mode 100644
index 0000000..9728790
--- /dev/null
+++ b/doc/values/34.txt
@@ -0,0 +1,2 @@
+Allows a process to configure the kernel's syslog
+(printk) behavior.
diff --git a/doc/values/35.txt b/doc/values/35.txt
new file mode 100644
index 0000000..8ce5a17
--- /dev/null
+++ b/doc/values/35.txt
@@ -0,0 +1,2 @@
+Allows a process to trigger something that can wake the
+system up.
diff --git a/doc/values/36.txt b/doc/values/36.txt
new file mode 100644
index 0000000..7088ba6
--- /dev/null
+++ b/doc/values/36.txt
@@ -0,0 +1,2 @@
+Allows a process to block system suspends - prevent the
+system from entering a lower power state.
diff --git a/doc/values/37.txt b/doc/values/37.txt
new file mode 100644
index 0000000..fff9f60
--- /dev/null
+++ b/doc/values/37.txt
@@ -0,0 +1,2 @@
+Allows a process to read the audit log via a multicast
+netlink socket.
diff --git a/doc/values/38.txt b/doc/values/38.txt
new file mode 100644
index 0000000..f75db74
--- /dev/null
+++ b/doc/values/38.txt
@@ -0,0 +1,4 @@
+Allows a process to enable observability of privileged
+operations related to performance. The mechanisms
+include perf_events, i915_perf and other kernel
+subsystems.
diff --git a/doc/values/39.txt b/doc/values/39.txt
new file mode 100644
index 0000000..d05a5c6
--- /dev/null
+++ b/doc/values/39.txt
@@ -0,0 +1,33 @@
+Allows a process to manipulate aspects of the kernel
+enhanced Berkeley Packet Filter (BPF) system. This is
+an execution subsystem of the kernel, that manages BPF
+programs. CAP_BPF permits a process to:
+ - create all types of BPF maps
+ - advanced verifier features:
+ - indirect variable access
+ - bounded loops
+ - BPF to BPF function calls
+ - scalar precision tracking
+ - larger complexity limits
+ - dead code elimination
+ - potentially other features
+
+Other capabilities can be used together with CAP_BFP to
+further manipulate the BPF system:
+ - CAP_PERFMON relaxes the verifier checks as follows:
+ - BPF programs can use pointer-to-integer
+ conversions
+ - speculation attack hardening measures can be
+ bypassed
+ - bpf_probe_read to read arbitrary kernel memory is
+ permitted
+ - bpf_trace_printk to print the content of kernel
+ memory
+ - CAP_SYS_ADMIN permits the following:
+ - use of bpf_probe_write_user
+ - iteration over the system-wide loaded programs,
+ maps, links BTFs and convert their IDs to file
+ descriptors.
+ - CAP_PERFMON is required to load tracing programs.
+ - CAP_NET_ADMIN is required to load networking
+ programs.
diff --git a/doc/values/4.txt b/doc/values/4.txt
new file mode 100644
index 0000000..5797cf8
--- /dev/null
+++ b/doc/values/4.txt
@@ -0,0 +1,4 @@
+Allows a process to set the S_ISUID and S_ISUID bits of
+the file permissions, even when the process' effective
+UID or GID/supplementary GIDs do not match that of the
+file.
diff --git a/doc/values/40.txt b/doc/values/40.txt
new file mode 100644
index 0000000..c5993cf
--- /dev/null
+++ b/doc/values/40.txt
@@ -0,0 +1,4 @@
+Allows a process to perform checkpoint
+and restore operations. Also permits
+explicit PID control via clone3() and
+also writing to ns_last_pid.
diff --git a/doc/values/5.txt b/doc/values/5.txt
new file mode 100644
index 0000000..1097fe0
--- /dev/null
+++ b/doc/values/5.txt
@@ -0,0 +1,3 @@
+Allows a process to sent a kill(2) signal to any other
+process - overriding the limitation that there be a
+[E]UID match between source and target process.
diff --git a/doc/values/6.txt b/doc/values/6.txt
new file mode 100644
index 0000000..4ccc78b
--- /dev/null
+++ b/doc/values/6.txt
@@ -0,0 +1,5 @@
+Allows a process to freely manipulate its own GIDs:
+ - arbitrarily set the GID, EGID, REGID, RESGID values
+ - arbitrarily set the supplementary GIDs
+ - allows the forging of GID credentials passed over a
+ socket
diff --git a/doc/values/7.txt b/doc/values/7.txt
new file mode 100644
index 0000000..432a97e
--- /dev/null
+++ b/doc/values/7.txt
@@ -0,0 +1,5 @@
+Allows a process to freely manipulate its own UIDs:
+ - arbitraily set the UID, EUID, REUID and RESUID
+ values
+ - allows the forging of UID credentials passed over a
+ socket
diff --git a/doc/values/8.txt b/doc/values/8.txt
new file mode 100644
index 0000000..d6d7c1f
--- /dev/null
+++ b/doc/values/8.txt
@@ -0,0 +1,19 @@
+Allows a process to freely manipulate its inheritable
+capabilities. Linux supports the POSIX.1e Inheritable
+set, as well as Bounding and Ambient Linux extension
+vectors. This capability permits dropping bits from the
+Bounding vector. It also permits the process to raise
+Ambient vector bits that are both raised in the
+Permitted and Inheritable sets of the process. This
+capability cannot be used to raise Permitted bits, or
+Effective bits beyond those already present in the
+process' permitted set.
+
+[Historical note: prior to the advent of file
+capabilities (2008), this capability was suppressed by
+default, as its unsuppressed behavior was not
+auditable: it could asynchronously grant its own
+Permitted capabilities to and remove capabilities from
+other processes arbitraily. The former leads to
+undefined behavior, and the latter is better served by
+the kill system call.]
diff --git a/doc/values/9.txt b/doc/values/9.txt
new file mode 100644
index 0000000..651e1a0
--- /dev/null
+++ b/doc/values/9.txt
@@ -0,0 +1,2 @@
+Allows a process to modify the S_IMMUTABLE and
+S_APPEND file attributes.
diff --git a/generate_cap_names_list.awk b/generate_cap_names_list.awk
new file mode 100644
index 0000000..b92fd60
--- /dev/null
+++ b/generate_cap_names_list.awk
@@ -0,0 +1 @@
+/#define[ \t](CAP[_A-Z]+)[ \t]+([0-9]+)[ \t]*$/ { printf "{\"%s\", %s},\n", tolower($2), $3; }
diff --git a/go/.gitignore b/go/.gitignore
new file mode 100644
index 0000000..c0a9737
--- /dev/null
+++ b/go/.gitignore
@@ -0,0 +1,14 @@
+good-names.go
+compare-cap
+try-launching
+try-launching-cgo
+psx-signals
+psx-signals-cgo
+b210613
+mknames
+web
+setid
+gowns
+ok
+pkg
+src
diff --git a/go/Makefile b/go/Makefile
new file mode 100644
index 0000000..757844a
--- /dev/null
+++ b/go/Makefile
@@ -0,0 +1,128 @@
+# Building the libcap/{cap.psx} Go packages, and examples.
+#
+# Note, we use symlinks to construct a GOPATH friendly src tree. The
+# packages themselves are intended to be (ultimately) found via proxy
+# as "kernel.org/pub/linux/libs/security/libcap/cap" and
+# "kernel.org/pub/linux/libs/security/libcap/psx". However, to
+# validate their use on these paths, we fake such a structure in the
+# build tree with symlinks.
+
+topdir=$(realpath ..)
+include $(topdir)/Make.Rules
+
+GOPATH=$(realpath .)
+IMPORTDIR=kernel.org/pub/linux/libs/security/libcap
+PKGDIR=pkg/$(GOOSARCH)/$(IMPORTDIR)
+PSXGOPACKAGE=$(PKGDIR)/psx.a
+CAPGOPACKAGE=$(PKGDIR)/cap.a
+
+DEPS=../libcap/libcap.a ../libcap/libpsx.a
+
+all: $(PSXGOPACKAGE) $(CAPGOPACKAGE) web setid gowns compare-cap try-launching psx-signals
+
+$(DEPS):
+ make -C ../libcap all
+
+../progs/tcapsh-static:
+ make -C ../progs tcapsh-static
+
+src/$(IMPORTDIR)/psx:
+ mkdir -p "src/$(IMPORTDIR)"
+ ln -s $(topdir)/psx $@
+
+src/$(IMPORTDIR)/cap:
+ mkdir -p "src/$(IMPORTDIR)"
+ ln -s $(topdir)/cap $@
+
+$(topdir)/libcap/cap_names.h: $(DEPS)
+ make -C $(topdir)/libcap all
+
+good-names.go: $(topdir)/libcap/cap_names.h src/$(IMPORTDIR)/cap mknames.go
+ $(GO) run mknames.go --header=$< --textdir=$(topdir)/doc/values | gofmt > $@ || rm -f $@
+ diff -u ../cap/names.go $@
+
+$(PSXGOPACKAGE): src/$(IMPORTDIR)/psx ../psx/*.go $(DEPS)
+ mkdir -p pkg
+ GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) install $(IMPORTDIR)/psx
+
+$(CAPGOPACKAGE): src/$(IMPORTDIR)/cap ../cap/*.go good-names.go $(PSXGOPACKAGE)
+ GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) install $(IMPORTDIR)/cap
+
+# Compiles something with this package to compare it to libcap. This
+# tests more when run under sudotest (see ../progs/quicktest.sh for that).
+compare-cap: compare-cap.go $(CAPGOPACKAGE)
+ GO111MODULE=off CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $<
+
+web: ../goapps/web/web.go $(CAPGOPACKAGE)
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $<
+ifeq ($(RAISE_GO_FILECAP),yes)
+ make -C ../progs setcap
+ sudo ../progs/setcap cap_setpcap,cap_net_bind_service=p web
+ @echo "NOTE: RAISED cap_setpcap,cap_net_bind_service ON web binary"
+endif
+
+setid: ../goapps/setid/setid.go $(CAPGOPACKAGE) $(PSXGOPACKAGE)
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $<
+
+gowns: ../goapps/gowns/gowns.go $(CAPGOPACKAGE)
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@ $<
+
+ok: ok.go
+ GO111MODULE=off CGO_ENABLED=0 GOPATH=$(GOPATH) $(GO) build $<
+
+try-launching: try-launching.go $(CAPGOPACKAGE) ok
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build $<
+ifeq ($(CGO_REQUIRED),0)
+ GO111MODULE=off CGO_ENABLED="1" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH=$(GOPATH) $(GO) build -o $@-cgo $<
+endif
+
+psx-signals: psx-signals.go $(PSXGOPACKAGE)
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $<
+ifeq ($(CGO_REQUIRED),0)
+ GO111MODULE=off CGO_ENABLED="1" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build -o $@-cgo $<
+endif
+
+b210613: b210613.go $(CAPGOPACKAGE)
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) $(GO) build $<
+
+test: all
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/psx
+ GO111MODULE=off CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" GOPATH="$(GOPATH)" $(GO) test $(IMPORTDIR)/cap
+ LD_LIBRARY_PATH=../libcap ./compare-cap
+ ./psx-signals
+ifeq ($(CGO_REQUIRED),0)
+ ./psx-signals-cgo
+endif
+ ./setid --caps=false
+ ./gowns -- -c "echo gowns runs"
+
+# Note, the user namespace doesn't require sudo, but I wanted to avoid
+# requiring that the hosting kernel supports user namespaces for the
+# regular test case.
+sudotest: test ../progs/tcapsh-static b210613
+ ./gowns --ns -- -c "echo gowns runs with user namespace"
+ ./try-launching
+ifeq ($(CGO_REQUIRED),0)
+ ./try-launching-cgo
+endif
+ sudo ./try-launching
+ifeq ($(CGO_REQUIRED),0)
+ sudo ./try-launching-cgo
+endif
+ sudo ../progs/tcapsh-static --cap-uid=$$(id -u) --caps="cap_setpcap=ep" --iab="^cap_setpcap" -- -c ./b210613
+
+install: all
+ rm -rf $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx
+ mkdir -p $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx
+ install -m 0644 src/$(IMPORTDIR)/psx/* $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/psx
+ mkdir -p $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap
+ rm -rf $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap/*
+ install -m 0644 src/$(IMPORTDIR)/cap/* $(FAKEROOT)$(GOPKGDIR)/$(IMPORTDIR)/cap
+
+clean:
+ rm -f *.o *.so *~ mknames ok good-names.go
+ rm -f web setid gowns
+ rm -f compare-cap try-launching try-launching-cgo
+ rm -f $(topdir)/cap/*~ $(topdir)/psx/*~
+ rm -f b210613 psx-signals psx-signals-cgo
+ rm -fr pkg src
diff --git a/go/b210613.go b/go/b210613.go
new file mode 100644
index 0000000..2bced06
--- /dev/null
+++ b/go/b210613.go
@@ -0,0 +1,21 @@
+// Program b210613 reproduces the code reported in:
+//
+// https://bugzilla.kernel.org/show_bug.cgi?id=210613
+//
+// This file is evolved directly from the reproducer attached to that
+// bug report originally authored by Lorenz Bauer.
+package main
+
+import (
+ "fmt"
+ "log"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+func main() {
+ if err := cap.ModeNoPriv.Set(); err != nil {
+ log.Fatalf("error dropping privilege: %v", err)
+ }
+ fmt.Println("b210613: PASSED")
+}
diff --git a/go/cgo-required.sh b/go/cgo-required.sh
new file mode 100755
index 0000000..f9afa52
--- /dev/null
+++ b/go/cgo-required.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Runtime check for whether or not syscall.AllThreadsSyscall is
+# available to the working go runtime or not. If it isn't we always
+# have to use libcap/psx to get POSIX semantics for syscalls that
+# change security state.
+if [ -n "$1" ]; then
+ export GO="${1}"
+else
+ export GO=go
+fi
+
+if [ -z "$(${GO} doc syscall 2>/dev/null|grep AllThreadsSyscall)" ]; then
+ echo "1"
+else
+ echo "0"
+fi
diff --git a/go/compare-cap.go b/go/compare-cap.go
new file mode 100644
index 0000000..4424ebe
--- /dev/null
+++ b/go/compare-cap.go
@@ -0,0 +1,384 @@
+// Program compare-cap is a sanity check that Go's cap package is
+// inter-operable with the C libcap.
+package main
+
+import (
+ "log"
+ "os"
+ "syscall"
+ "unsafe"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// #include <stdlib.h>
+// #include <sys/capability.h>
+// #cgo CFLAGS: -I../libcap/include
+// #cgo LDFLAGS: -L../libcap -lcap
+import "C"
+
+// tryFileCaps attempts to use the cap package to manipulate file
+// capabilities. No reference to libcap in this function.
+func tryFileCaps() {
+ saved := cap.GetProc()
+
+ // Capabilities we will place on a file.
+ want := cap.NewSet()
+ if err := want.SetFlag(cap.Permitted, true, cap.SETFCAP, cap.DAC_OVERRIDE); err != nil {
+ log.Fatalf("failed to explore desired file capability: %v", err)
+ }
+ if err := want.SetFlag(cap.Effective, true, cap.SETFCAP, cap.DAC_OVERRIDE); err != nil {
+ log.Fatalf("failed to raise the effective bits: %v", err)
+ }
+
+ if perm, err := saved.GetFlag(cap.Permitted, cap.SETFCAP); err != nil {
+ log.Fatalf("failed to read capability: %v", err)
+ } else if !perm {
+ log.Printf("skipping file cap tests - insufficient privilege")
+ return
+ }
+
+ if err := saved.ClearFlag(cap.Effective); err != nil {
+ log.Fatalf("failed to drop effective: %v", err)
+ }
+ if err := saved.SetProc(); err != nil {
+ log.Fatalf("failed to limit capabilities: %v", err)
+ }
+
+ // Failing attempt to remove capabilities.
+ var empty *cap.Set
+ if err := empty.SetFile(os.Args[0]); err != syscall.EPERM {
+ log.Fatalf("failed to be blocked from removing filecaps: %v", err)
+ }
+
+ // The privilege we want (in the case we are root, we need the
+ // DAC_OVERRIDE too).
+ working, err := saved.Dup()
+ if err != nil {
+ log.Fatalf("failed to duplicate (%v): %v", saved, err)
+ }
+ if err := working.SetFlag(cap.Effective, true, cap.DAC_OVERRIDE, cap.SETFCAP); err != nil {
+ log.Fatalf("failed to raise effective: %v", err)
+ }
+
+ // Critical (privilege using) section:
+ if err := working.SetProc(); err != nil {
+ log.Fatalf("failed to enable first effective privilege: %v", err)
+ }
+ // Delete capability
+ if err := empty.SetFile(os.Args[0]); err != nil && err != syscall.ENODATA {
+ log.Fatalf("blocked from removing filecaps: %v", err)
+ }
+ if got, err := cap.GetFile(os.Args[0]); err == nil {
+ log.Fatalf("read deleted file caps: %v", got)
+ }
+ // Create file caps (this use employs the effective bit).
+ if err := want.SetFile(os.Args[0]); err != nil {
+ log.Fatalf("failed to set file capability: %v", err)
+ }
+ if err := saved.SetProc(); err != nil {
+ log.Fatalf("failed to lower effective capability: %v", err)
+ }
+ // End of critical section.
+
+ if got, err := cap.GetFile(os.Args[0]); err != nil {
+ log.Fatalf("failed to read caps: %v", err)
+ } else if is, was := got.String(), want.String(); is != was {
+ log.Fatalf("read file caps do not match desired: got=%q want=%q", is, was)
+ }
+
+ // Now, do it all again but this time on an open file.
+ f, err := os.Open(os.Args[0])
+ if err != nil {
+ log.Fatalf("failed to open %q: %v", os.Args[0], err)
+ }
+ defer f.Close()
+
+ // Failing attempt to remove capabilities.
+ if err := empty.SetFd(f); err != syscall.EPERM {
+ log.Fatalf("failed to be blocked from fremoving filecaps: %v", err)
+ }
+
+ // For the next section, we won't set the effective bit on the file.
+ want.ClearFlag(cap.Effective)
+
+ // Critical (privilege using) section:
+ if err := working.SetProc(); err != nil {
+ log.Fatalf("failed to enable effective privilege: %v", err)
+ }
+ if err := empty.SetFd(f); err != nil && err != syscall.ENODATA {
+ log.Fatalf("blocked from fremoving filecaps: %v", err)
+ }
+ if got, err := cap.GetFd(f); err == nil {
+ log.Fatalf("read fdeleted file caps: %v", got)
+ }
+ // This one does not set the effective bit.
+ if err := want.SetFd(f); err != nil {
+ log.Fatalf("failed to fset file capability: %v", err)
+ }
+ if err := saved.SetProc(); err != nil {
+ log.Fatalf("failed to lower effective capability: %v", err)
+ }
+ // End of critical section.
+
+ if got, err := cap.GetFd(f); err != nil {
+ log.Fatalf("failed to fread caps: %v", err)
+ } else if is, was := got.String(), want.String(); is != was {
+ log.Fatalf("fread file caps do not match desired: got=%q want=%q", is, was)
+ }
+}
+
+// tryProcCaps performs a set of convenience functions and compares
+// the results with those seen by libcap. At the end of this function,
+// the running process has no privileges at all. So exiting the
+// program is the only option.
+func tryProcCaps() {
+ c := cap.GetProc()
+ if v, err := c.GetFlag(cap.Permitted, cap.SETPCAP); err != nil {
+ log.Fatalf("failed to read permitted setpcap: %v", err)
+ } else if !v {
+ log.Printf("skipping proc cap tests - insufficient privilege")
+ return
+ }
+ if err := cap.SetUID(99); err != nil {
+ log.Fatalf("failed to set uid=99: %v", err)
+ }
+ if u := syscall.Getuid(); u != 99 {
+ log.Fatal("uid=99 did not take: got=%d", u)
+ }
+ if err := cap.SetGroups(98, 100, 101); err != nil {
+ log.Fatalf("failed to set groups=98 [100, 101]: %v", err)
+ }
+ if g := syscall.Getgid(); g != 98 {
+ log.Fatalf("gid=98 did not take: got=%d", g)
+ }
+ if gs, err := syscall.Getgroups(); err != nil {
+ log.Fatalf("error getting groups: %v", err)
+ } else if len(gs) != 2 || gs[0] != 100 || gs[1] != 101 {
+ log.Fatalf("wrong of groups: got=%v want=[100 l01]", gs)
+ }
+
+ if mode := cap.GetMode(); mode != cap.ModeUncertain {
+ log.Fatalf("initial mode should be 0 (UNCERTAIN), got: %d (%v)", mode, mode)
+ }
+
+ // To distinguish PURE1E and PURE1E_INIT we need an inheritable capability set.
+ working := cap.GetProc()
+ if err := working.SetFlag(cap.Inheritable, true, cap.SETPCAP); err != nil {
+ log.Fatalf("unable to raise inheritable bit: %v", err)
+ }
+ if err := working.SetProc(); err != nil {
+ log.Fatalf("failed to add inheritable bit: %v", err)
+ }
+
+ for i, mode := range []cap.Mode{cap.ModePure1E, cap.ModePure1EInit, cap.ModeNoPriv} {
+ if err := mode.Set(); err != nil {
+ log.Fatalf("[%d] in mode=%v and failed to set mode to %d (%v): %v", i, cap.GetMode(), mode, mode, err)
+ }
+ if got := cap.GetMode(); got != mode {
+ log.Fatalf("[%d] unable to recognise mode %d (%v), got: %d (%v)", i, mode, mode, got, got)
+ }
+ cM := C.cap_get_mode()
+ if mode != cap.Mode(cM) {
+ log.Fatalf("[%d] C and Go disagree on mode: %d vs %d", cM, mode)
+ }
+ }
+
+ // The current process is now without any access to privelege.
+}
+
+func main() {
+ // Use the C libcap to obtain a non-trivial capability in text form (from init).
+ cC := C.cap_get_pid(1)
+ if cC == nil {
+ log.Fatal("basic c caps from init function failure")
+ }
+ defer C.cap_free(unsafe.Pointer(cC))
+ var tCLen C.ssize_t
+ tC := C.cap_to_text(cC, &tCLen)
+ if tC == nil {
+ log.Fatal("basic c init caps -> text failure")
+ }
+ defer C.cap_free(unsafe.Pointer(tC))
+
+ importT := C.GoString(tC)
+ if got, want := len(importT), int(tCLen); got != want {
+ log.Fatalf("C string import failed: got=%d [%q] want=%d", got, importT, want)
+ }
+
+ // Validate that it can be decoded in Go.
+ cGo, err := cap.FromText(importT)
+ if err != nil {
+ log.Fatalf("go parsing of c text import failed: %v", err)
+ }
+
+ // Validate that it matches the one directly loaded in Go.
+ c, err := cap.GetPID(1)
+ if err != nil {
+ log.Fatalf("...failed to read init's capabilities:", err)
+ }
+ tGo := c.String()
+ if got, want := tGo, cGo.String(); got != want {
+ log.Fatalf("go text rep does not match c: got=%q, want=%q", got, want)
+ }
+
+ // Export it in text form again from Go.
+ tForC := C.CString(tGo)
+ defer C.free(unsafe.Pointer(tForC))
+
+ // Validate it can be encoded in C.
+ cC2 := C.cap_from_text(tForC)
+ if cC2 == nil {
+ log.Fatal("go text rep not parsable by c")
+ }
+ defer C.cap_free(unsafe.Pointer(cC2))
+
+ // Validate that it can be exported in binary form in C
+ const enoughForAnyone = 1000
+ eC := make([]byte, enoughForAnyone)
+ eCLen := C.cap_copy_ext(unsafe.Pointer(&eC[0]), cC2, C.ssize_t(len(eC)))
+ if eCLen < 5 {
+ log.Fatalf("c export yielded bad length: %d", eCLen)
+ }
+
+ // Validate that it can be imported from binary in Go
+ iGo, err := cap.Import(eC[:eCLen])
+ if err != nil {
+ log.Fatalf("go import of c binary failed: %v", err)
+ }
+ if got, want := iGo.String(), importT; got != want {
+ log.Fatalf("go import of c binary miscompare: got=%q want=%q", got, want)
+ }
+
+ // Validate that it can be exported in binary in Go
+ iE, err := iGo.Export()
+ if err != nil {
+ log.Fatalf("go failed to export binary: %v", err)
+ }
+
+ // Validate that it can be imported in binary in C
+ iC := C.cap_copy_int(unsafe.Pointer(&iE[0]))
+ if iC == nil {
+ log.Fatal("c failed to import go binary")
+ }
+ defer C.cap_free(unsafe.Pointer(iC))
+ fC := C.cap_to_text(cC, &tCLen)
+ if fC == nil {
+ log.Fatal("basic c init caps -> text failure")
+ }
+ defer C.cap_free(unsafe.Pointer(fC))
+ if got, want := C.GoString(fC), importT; got != want {
+ log.Fatalf("c import from go yielded bad caps: got=%q want=%q", got, want)
+ }
+
+ // Validate that everyone agrees what all is:
+ want := "=ep"
+ all, err := cap.FromText("all=ep")
+ if err != nil {
+ log.Fatalf("unable to parse all=ep: %v", err)
+ }
+ if got := all.String(); got != want {
+ log.Fatalf("all decode failed in Go: got=%q, want=%q", got, want)
+ }
+
+ // Validate some random values stringify consistently between
+ // libcap.cap_to_text() and (*cap.Set).String().
+ mb := cap.MaxBits()
+ sample := cap.NewSet()
+ for c := cap.Value(0); c < 7*mb; c += 3 {
+ n := int(c)
+ raise, f := c%mb, cap.Flag(c/mb)%3
+ sample.SetFlag(f, true, raise)
+ if v, err := cap.FromText(sample.String()); err != nil {
+ log.Fatalf("[%d] cap to text for %q not reversible: %v", n, sample, err)
+ } else if cf, err := v.Compare(sample); err != nil {
+ log.Fatalf("[%d] FromText generated bad capability from %q: %v", n, sample, err)
+ } else if cf != 0 {
+ log.Fatalf("[%d] text import got=%q want=%q", n, v, sample)
+ }
+ e, err := sample.Export()
+ if err != nil {
+ log.Fatalf("[%d] failed to export %q: %v", n, sample, err)
+ }
+ i, err := cap.Import(e)
+ if err != nil {
+ log.Fatalf("[%d] failed to import %q: %v", n, sample, err)
+ }
+ if cf, err := i.Compare(sample); err != nil {
+ log.Fatalf("[%d] failed to compare %q vs original:%q", n, i, sample)
+ } else if cf != 0 {
+ log.Fatalf("[%d] import got=%q want=%q", n, i, sample)
+ }
+ // Confirm that importing this portable binary
+ // representation in libcap and converting to text,
+ // generates the same text as Go generates. This was
+ // broken prior to v0.2.41.
+ cCap := C.cap_copy_int(unsafe.Pointer(&e[0]))
+ if cCap == nil {
+ log.Fatalf("[%d] C import failed for %q export", n, sample)
+ }
+ var tCLen C.ssize_t
+ tC := C.cap_to_text(cCap, &tCLen)
+ if tC == nil {
+ log.Fatalf("[%d] basic c init caps -> text failure", n)
+ }
+ C.cap_free(unsafe.Pointer(cCap))
+ importT := C.GoString(tC)
+ C.cap_free(unsafe.Pointer(tC))
+ if got, want := len(importT), int(tCLen); got != want {
+ log.Fatalf("[%d] C text generated wrong length: Go=%d, C=%d", n, got, want)
+ }
+ if got, want := importT, sample.String(); got != want {
+ log.Fatalf("[%d] C and Go text rep disparity: C=%q Go=%q", n, got, want)
+ }
+ }
+
+ iab, err := cap.IABFromText("cap_chown,!cap_setuid,^cap_setgid")
+ if err != nil {
+ log.Fatalf("failed to initialize iab from text: %v", err)
+ }
+ cIAB := C.cap_iab_init()
+ defer C.cap_free(unsafe.Pointer(cIAB))
+ for c := cap.MaxBits(); c > 0; {
+ c--
+ if en, err := iab.GetVector(cap.Inh, c); err != nil {
+ log.Fatalf("failed to read iab.i[%v]", c)
+ } else if en {
+ if C.cap_iab_set_vector(cIAB, C.CAP_IAB_INH, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+ log.Fatalf("failed to set C's AIB.I %v: %v", c)
+ }
+ }
+ if en, err := iab.GetVector(cap.Amb, c); err != nil {
+ log.Fatalf("failed to read iab.a[%v]", c)
+ } else if en {
+ if C.cap_iab_set_vector(cIAB, C.CAP_IAB_AMB, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+ log.Fatalf("failed to set C's AIB.A %v: %v", c)
+ }
+ }
+ if en, err := iab.GetVector(cap.Bound, c); err != nil {
+ log.Fatalf("failed to read iab.b[%v]", c)
+ } else if en {
+ if C.cap_iab_set_vector(cIAB, C.CAP_IAB_BOUND, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+ log.Fatalf("failed to set C's AIB.B %v: %v", c)
+ }
+ }
+ }
+ iabC := C.cap_iab_to_text(cIAB)
+ if iabC == nil {
+ log.Fatalf("failed to get text from C for %q", iab)
+ }
+ defer C.cap_free(unsafe.Pointer(iabC))
+ if got, want := C.GoString(iabC), iab.String(); got != want {
+ log.Fatalf("IAB for Go and C differ: got=%q, want=%q", got, want)
+ }
+
+ // Next, we attempt to manipulate some file capabilities on
+ // the running program. These are optional, based on whether
+ // the current program is capable enough and do not involve
+ // any cgo calls to libcap.
+ tryFileCaps()
+
+ // Nothing left to do but exit after this one.
+ tryProcCaps()
+ log.Printf("compare-cap success!")
+}
diff --git a/go/go-mod-index.html b/go/go-mod-index.html
new file mode 100644
index 0000000..9cfe13f
--- /dev/null
+++ b/go/go-mod-index.html
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<meta name="go-import" content="kernel.org/pub/linux/libs/security/libcap git https://git.kernel.org/pub/scm/libs/libcap/libcap.git">
+<meta http-equiv="refresh" content="10; url=https://sites.google.com/site/fullycapable">
+</head>
+<body>
+ Redirecting in 10 seconds to
+ the <a href="https://sites.google.com/site/fullycapable">Fully
+ Capable</a> project page, the home of these Go packages:
+ <ul>
+ <li><tt>"kernel.org/pub/linux/libs/security/libcap/psx"</tt></li>
+ <li><tt>"kernel.org/pub/linux/libs/security/libcap/cap"</tt></li>
+</body>
+</html>
diff --git a/go/mknames.go b/go/mknames.go
new file mode 100644
index 0000000..ff07218
--- /dev/null
+++ b/go/mknames.go
@@ -0,0 +1,123 @@
+// Program mknames parses the cap_names.h file and creates an
+// equivalent names.go file including comments on each cap.Value from
+// the documentation directory.
+package main
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "strings"
+)
+
+var (
+ header = flag.String("header", "", "name of header file")
+ text = flag.String("textdir", "", "directory name for value txt files")
+)
+
+func main() {
+ flag.Parse()
+
+ if *header == "" {
+ log.Fatal("usage: mknames --header=.../cap_names.h")
+ }
+ d, err := ioutil.ReadFile(*header)
+ if err != nil {
+ log.Fatal("reading:", err)
+ }
+
+ b := bytes.NewBuffer(d)
+
+ var list []string
+ for {
+ line, err := b.ReadString('\n')
+ if err == io.EOF {
+ break
+ }
+ if !strings.Contains(line, `"`) {
+ continue
+ }
+ i := strings.Index(line, `"`)
+ line = line[i+1:]
+ i = strings.Index(line, `"`)
+ line = line[:i]
+ list = append(list, line)
+ }
+
+ // generate package file names.go
+ fmt.Print(`package cap
+
+/* ** DO NOT EDIT THIS FILE. IT WAS AUTO-GENERATED BY LIBCAP'S GO BUILDER (mknames.go) ** */
+
+// NamedCount holds the number of capability values with official
+// names known at the time this libcap/cap version, was released. The
+// "../libcap/cap" package is fully able to manipulate higher numbered
+// capability values by numerical value. However, if you find
+// cap.NamedCount < cap.MaxBits(), it is probably time to upgrade this
+// package on your system.
+//
+// FWIW the userspace tool '/sbin/capsh' also contains a runtime check
+// for the condition that libcap is behind the running kernel in this
+// way.
+const NamedCount = `, len(list), `
+
+// CHOWN etc., are the named capability values of the Linux
+// kernel. The canonical source for each name is the
+// "uapi/linux/capabilities.h" file. Some values may not be available
+// (yet) where the kernel is older. The actual number of capabities
+// supported by the running kernel can be obtained using the
+// cap.MaxBits() function.
+const (
+`)
+ bits := make(map[string]string)
+ for i, name := range list {
+ doc := fmt.Sprintf("%s/%d.txt", *text, i)
+ content, err := ioutil.ReadFile(doc)
+ if err != nil {
+ log.Fatalf("filed to read %q: %v", doc, err)
+ }
+ detail := strings.Split(strings.Replace(string(content), "CAP_", "cap.", -1), "\n")
+ if i != 0 {
+ fmt.Println()
+ }
+ v := strings.ToUpper(strings.TrimPrefix(name, "cap_"))
+ for j, line := range detail {
+ preamble := ""
+ offset := 0
+ if j == 0 {
+ if !strings.HasPrefix(line, "Allows ") {
+ log.Fatalf("line should begin \"Allows \": got %s:%d:%q", doc, j, line)
+ }
+ preamble = fmt.Sprint(v, " a")
+ offset = 1
+ }
+ if len(line) != 0 || j != len(detail)-1 {
+ fmt.Printf(" // %s%s\n", preamble, line[offset:])
+ }
+ }
+ bits[name] = v
+ if i == 0 {
+ fmt.Println(v, " Value = iota")
+ } else {
+ fmt.Println(v)
+ }
+ }
+ fmt.Print(`)
+
+var names = map[Value]string{
+`)
+ for _, name := range list {
+ fmt.Printf("%s: %q,\n", bits[name], name)
+ }
+ fmt.Print(`}
+
+var bits = map[string]Value {
+`)
+ for _, name := range list {
+ fmt.Printf("%q: %s,\n", name, bits[name])
+ }
+ fmt.Println(`}`)
+}
diff --git a/go/ok.go b/go/ok.go
new file mode 100644
index 0000000..509638e
--- /dev/null
+++ b/go/ok.go
@@ -0,0 +1,9 @@
+// Program ok exits with status zero. We use it as a chroot test.
+// To avoid any confusion, it needs to be linked statically.
+package main
+
+import "os"
+
+func main() {
+ os.Exit(0)
+}
diff --git a/go/psx-signals.go b/go/psx-signals.go
new file mode 100644
index 0000000..486f284
--- /dev/null
+++ b/go/psx-signals.go
@@ -0,0 +1,46 @@
+// Program psx-signals validates that the psx mechanism can coexist
+// with Go use of signals. This is an unprivilaged program derived
+// from the sample code provided in this bug report:
+//
+// https://bugzilla.kernel.org/show_bug.cgi?id=210533
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+ "time"
+
+ "kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+const maxSig = 10
+const prSetKeepCaps = 8
+
+func main() {
+ sig := make(chan os.Signal, maxSig)
+ signal.Notify(sig, os.Interrupt)
+
+ fmt.Print("Toggling KEEP_CAPS ")
+ for i := 0; i < maxSig; i++ {
+ fmt.Print(".")
+ _, _, err := psx.Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0)
+ if err != 0 {
+ log.Fatalf("[%d] attempt to set KEEPCAPS (to %d) failed: %v", i, i%2, err)
+ }
+ }
+
+ fmt.Println(" done")
+ fmt.Print("Wait 1 second to see if unwanted signals arrive...")
+ // Confirm no signals are delivered.
+ select {
+ case <-time.After(1 * time.Second):
+ break
+ case info := <-sig:
+ log.Fatalf("signal received: %v", info)
+ }
+ fmt.Println(" none arrived")
+ fmt.Println("PASSED")
+}
diff --git a/go/try-launching.go b/go/try-launching.go
new file mode 100644
index 0000000..9f20e6b
--- /dev/null
+++ b/go/try-launching.go
@@ -0,0 +1,114 @@
+// Program try-launching validates the cap.Launch feature.
+package main
+
+import (
+ "fmt"
+ "log"
+ "strings"
+ "syscall"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// tryLaunching attempts to launch a bunch of programs in parallel. It
+// first tries some unprivileged launches, and then (if privileged)
+// tries some more ambitious ones.
+func tryLaunching() {
+ cwd, err := syscall.Getwd()
+ if err != nil {
+ log.Fatalf("no working directory: %v", err)
+ }
+ root := cwd[:strings.LastIndex(cwd, "/")]
+
+ vs := []struct {
+ args []string
+ fail bool
+ callbackFn func(*syscall.ProcAttr, interface{}) error
+ chroot string
+ iab string
+ uid int
+ gid int
+ mode cap.Mode
+ groups []int
+ }{
+ {args: []string{root + "/go/ok"}},
+ {
+ args: []string{root + "/progs/tcapsh-static", "--dropped=cap_chown", "--is-uid=123", "--is-gid=456", "--has-a=cap_setuid"},
+ iab: "!cap_chown,^cap_setuid,cap_sys_admin",
+ uid: 123,
+ gid: 456,
+ groups: []int{1, 2, 3},
+ fail: syscall.Getuid() != 0,
+ },
+ {
+ args: []string{"/ok"},
+ chroot: root + "/go",
+ fail: syscall.Getuid() != 0,
+ },
+ {
+ args: []string{root + "/progs/tcapsh-static", "--inmode=NOPRIV", "--has-no-new-privs"},
+ mode: cap.ModeNoPriv,
+ fail: syscall.Getuid() != 0,
+ },
+ }
+
+ ps := make([]int, len(vs))
+ ws := make([]syscall.WaitStatus, len(vs))
+
+ for i, v := range vs {
+ e := cap.NewLauncher(v.args[0], v.args, nil)
+ e.Callback(v.callbackFn)
+ if v.chroot != "" {
+ e.SetChroot(v.chroot)
+ }
+ if v.uid != 0 {
+ e.SetUID(v.uid)
+ }
+ if v.gid != 0 {
+ e.SetGroups(v.gid, v.groups)
+ }
+ if v.mode != 0 {
+ e.SetMode(v.mode)
+ }
+ if v.iab != "" {
+ if iab, err := cap.IABFromText(v.iab); err != nil {
+ log.Fatalf("failed to parse iab=%q: %v", v.iab, err)
+ } else {
+ e.SetIAB(iab)
+ }
+ }
+ log.Printf("[%d] trying: %q\n", i, v.args)
+ if ps[i], err = e.Launch(nil); err != nil {
+ if v.fail {
+ continue
+ }
+ log.Fatalf("[%d] launch %q failed: %v", i, v.args, err)
+ }
+ }
+
+ for i, p := range ps {
+ if p == -1 {
+ continue
+ }
+ if pr, err := syscall.Wait4(p, &ws[i], 0, nil); err != nil {
+ log.Fatalf("wait4 <%d> failed: %v", p, err)
+ } else if p != pr {
+ log.Fatalf("wait4 <%d> returned <%d> instead", p, pr)
+ } else if ws[i] != 0 {
+ if vs[i].fail {
+ continue
+ }
+ log.Fatalf("wait4 <%d> status was %d", p, ws[i])
+ }
+ }
+}
+
+func main() {
+ if cap.LaunchSupported {
+ // The Go runtime had some OS threading bugs that
+ // prevented Launch from working. Specifically, the
+ // launch OS thread would get reused.
+ tryLaunching()
+ }
+ fmt.Println("PASSED")
+}
diff --git a/goapps/gowns/go.mod b/goapps/gowns/go.mod
new file mode 100644
index 0000000..bc534af
--- /dev/null
+++ b/goapps/gowns/go.mod
@@ -0,0 +1,5 @@
+module gowns
+
+go 1.15
+
+require kernel.org/pub/linux/libs/security/libcap/cap v0.2.48
diff --git a/goapps/gowns/gowns.go b/goapps/gowns/gowns.go
new file mode 100644
index 0000000..b9a14cd
--- /dev/null
+++ b/goapps/gowns/gowns.go
@@ -0,0 +1,249 @@
+// Program gowns is a small program to explore and demonstrate using
+// Go to Wrap a child in a NameSpace under Linux.
+package main
+
+import (
+ "errors"
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "strings"
+ "syscall"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// nsDetail is how we summarize the type of namespace we want to
+// enter.
+type nsDetail struct {
+ // uid holds the uid for the base user in this namespace (defaults to getuid).
+ uid int
+
+ // uidMap holds the namespace mapping of uid values.
+ uidMap []syscall.SysProcIDMap
+
+ // gid holds the gid for the base user in this namespace (defaults to getgid).
+ gid int
+
+ // uidMap holds the namespace mapping of gid values.
+ gidMap []syscall.SysProcIDMap
+}
+
+var (
+ baseID = flag.Int("base", -1, "base id for uids and gids (-1 = invoker's uid)")
+ uid = flag.Int("uid", -1, "uid of the hosting user")
+ gid = flag.Int("gid", -1, "gid of the hosting user")
+ iab = flag.String("iab", "", "IAB string for inheritable capabilities")
+ mode = flag.String("mode", "", "force a libcap mode (capsh --modes for list)")
+
+ ns = flag.Bool("ns", false, "enable user namespace features")
+ uids = flag.String("uids", "", "comma separated UID ranges to map contiguously (req. CAP_SETUID)")
+ gids = flag.String("gids", "", "comma separated GID ranges to map contiguously (req. CAP_SETGID)")
+
+ shell = flag.String("shell", "/bin/bash", "shell to be launched")
+ debug = flag.Bool("verbose", false, "more verbose output")
+)
+
+// r holds a base and count for a contiguous range.
+type r struct {
+ base, count int
+}
+
+// ranges unpacks numerical ranges.
+func ranges(s string) []r {
+ if s == "" {
+ return nil
+ }
+ var rs []r
+ for _, n := range strings.Split(s, ",") {
+ var base, upper int
+ if _, err := fmt.Sscanf(n, "%d-%d", &base, &upper); err == nil {
+ if upper < base {
+ log.Fatalf("invalid range: [%d-%d]", base, upper)
+ }
+ rs = append(rs, r{
+ base: base,
+ count: 1 + upper - base,
+ })
+ } else if _, err := fmt.Sscanf(n, "%d", &base); err == nil {
+ rs = append(rs, r{
+ base: base,
+ count: 1,
+ })
+ } else {
+ log.Fatalf("unable to parse range [%s]", n)
+ }
+ }
+ return rs
+}
+
+// restart launches the program again with the remaining arguments.
+func restart() {
+ log.Fatalf("failed to restart: flags: %q %q", os.Args[0], flag.Args()[1:])
+}
+
+// errUnableToSetup is how nsSetup fails.
+var errUnableToSetup = errors.New("data was not in supported format")
+
+// nsSetup is the callback used to enter the namespace for the user
+// via callback in the cap.Launcher mechanism.
+func nsSetup(pa *syscall.ProcAttr, data interface{}) error {
+ nsD, ok := data.(nsDetail)
+ if !ok {
+ return errUnableToSetup
+ }
+
+ if pa.Sys == nil {
+ pa.Sys = &syscall.SysProcAttr{}
+ }
+ pa.Sys.Cloneflags |= syscall.CLONE_NEWUSER
+ pa.Sys.UidMappings = nsD.uidMap
+ pa.Sys.GidMappings = nsD.gidMap
+ return nil
+}
+
+func parseRanges(detail *nsDetail, ids string, id int) []syscall.SysProcIDMap {
+ base := *baseID
+ if base < 0 {
+ base = detail.uid
+ }
+
+ list := []syscall.SysProcIDMap{
+ syscall.SysProcIDMap{
+ ContainerID: base,
+ HostID: id,
+ Size: 1,
+ },
+ }
+
+ base++
+ for _, next := range ranges(ids) {
+ fmt.Println("next:", next)
+ list = append(list,
+ syscall.SysProcIDMap{
+ ContainerID: base,
+ HostID: next.base,
+ Size: next.count,
+ })
+ base += next.count
+ }
+ return list
+}
+
+func main() {
+ flag.Parse()
+
+ detail := nsDetail{
+ gid: syscall.Getgid(),
+ }
+
+ thisUID := syscall.Getuid()
+ switch *uid {
+ case -1:
+ detail.uid = thisUID
+ default:
+ detail.uid = *uid
+ }
+ detail.uidMap = parseRanges(&detail, *uids, detail.uid)
+
+ thisGID := syscall.Getgid()
+ switch *gid {
+ case -1:
+ detail.gid = thisGID
+ default:
+ detail.gid = *gid
+ }
+ detail.gidMap = parseRanges(&detail, *gids, detail.gid)
+
+ unparsed := flag.Args()
+
+ arg0 := *shell
+ skip := 0
+ var w *cap.Launcher
+ if len(unparsed) > 0 {
+ switch unparsed[0] {
+ case "==":
+ arg0 = os.Args[0]
+ skip++
+ }
+ }
+
+ w = cap.NewLauncher(arg0, append([]string{arg0}, unparsed[skip:]...), nil)
+ if *ns {
+ // Include the namespace setup callback with the launcher.
+ w.Callback(nsSetup)
+ }
+
+ if thisUID != detail.uid {
+ w.SetUID(detail.uid)
+ }
+
+ if thisGID != detail.gid {
+ w.SetGroups(detail.gid, nil)
+ }
+
+ if *iab != "" {
+ ins, err := cap.IABFromText(*iab)
+ if err != nil {
+ log.Fatalf("--iab=%q parsing issue: %v", err)
+ }
+ w.SetIAB(ins)
+ }
+
+ if *mode != "" {
+ for m := cap.Mode(1); ; m++ {
+ if s := m.String(); s == "UNKNOWN" {
+ log.Fatalf("mode %q is unknown", *mode)
+ } else if s == *mode {
+ w.SetMode(m)
+ break
+ }
+ }
+ }
+
+ // The launcher can enable more functionality if involked with
+ // effective capabilities.
+ have := cap.GetProc()
+ for _, c := range []cap.Value{cap.SETUID, cap.SETGID} {
+ if canDo, err := have.GetFlag(cap.Permitted, c); err != nil {
+ log.Fatalf("failed to explore process capabilities, %q for %q", have, c)
+ } else if canDo {
+ if err := have.SetFlag(cap.Effective, true, c); err != nil {
+ log.Fatalf("failed to raise effective capability: \"%v e+%v\"", have, c)
+ }
+ }
+ }
+ if err := have.SetProc(); err != nil {
+ log.Fatalf("privilege assertion %q failed: %v", have, err)
+ }
+
+ if *debug {
+ if *ns {
+ fmt.Println("launching namespace")
+ } else {
+ fmt.Println("launching without namespace")
+ }
+ }
+
+ pid, err := w.Launch(detail)
+ if err != nil {
+ log.Fatalf("launch failed: %v", err)
+ }
+ if err := cap.NewSet().SetProc(); err != nil {
+ log.Fatalf("gowns could not drop privilege: %v", err)
+ }
+
+ p, err := os.FindProcess(pid)
+ if err != nil {
+ log.Fatalf("cannot find process: %v", err)
+ }
+ state, err := p.Wait()
+ if err != nil {
+ log.Fatalf("waiting failed: %v", err)
+ }
+
+ if *debug {
+ fmt.Println("process exited:", state)
+ }
+}
diff --git a/goapps/setid/go.mod b/goapps/setid/go.mod
new file mode 100644
index 0000000..cd2282d
--- /dev/null
+++ b/goapps/setid/go.mod
@@ -0,0 +1,8 @@
+module setid
+
+go 1.11
+
+require (
+ kernel.org/pub/linux/libs/security/libcap/cap v0.2.48
+ kernel.org/pub/linux/libs/security/libcap/psx v0.2.48
+)
diff --git a/goapps/setid/setid.go b/goapps/setid/setid.go
new file mode 100644
index 0000000..2bbe5b0
--- /dev/null
+++ b/goapps/setid/setid.go
@@ -0,0 +1,151 @@
+// Program setid demonstrates how the to use the cap and/or psx packages to
+// change the uid, gids of a program.
+//
+// A long writeup explaining how to use it in various different ways
+// is available:
+//
+// https://sites.google.com/site/fullycapable/Home/using-go-to-set-uid-and-gids
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "strconv"
+ "strings"
+ "syscall"
+ "unsafe"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+ "kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+var (
+ uid = flag.Int("uid", -1, "specify a uid with a value other than (euid)")
+ gid = flag.Int("gid", -1, "specify a gid with a value other than (egid)")
+ drop = flag.Bool("drop", true, "drop privilege once IDs have been changed")
+ suppl = flag.String("suppl", "", "comma separated list of groups")
+ withCaps = flag.Bool("caps", true, "raise capabilities to setuid/setgid")
+)
+
+// setIDWithCaps uses the cap.SetUID and cap.SetGroups functions.
+func setIDsWithCaps(setUID, setGID int, gids []int) {
+ if err := cap.SetGroups(setGID, gids...); err != nil {
+ log.Fatalf("group setting failed: %v", err)
+ }
+ if err := cap.SetUID(setUID); err != nil {
+ log.Fatalf("user setting failed: %v", err)
+ }
+}
+
+func main() {
+ flag.Parse()
+
+ showIDs("before", false, syscall.Getuid(), syscall.Getgid())
+
+ gids := splitToInts()
+ setGID := *gid
+ if *gid == -1 {
+ setGID = syscall.Getegid()
+ }
+ setUID := *uid
+ if *uid == -1 {
+ setUID = syscall.Getuid()
+ }
+
+ if *withCaps {
+ setIDsWithCaps(setUID, setGID, gids)
+ } else {
+ if _, _, err := psx.Syscall3(syscall.SYS_SETGID, uintptr(setGID), 0, 0); err != 0 {
+ log.Fatalf("failed to setgid(%d): %v", setGID, err)
+ }
+ if len(gids) != 0 {
+ gids32 := []int32{int32(setGID)}
+ for _, g := range gids {
+ gids32 = append(gids32, int32(g))
+ }
+ if _, _, err := psx.Syscall3(syscall.SYS_SETGROUPS, uintptr(unsafe.Pointer(&gids32[0])), 0, 0); err != 0 {
+ log.Fatalf("failed to setgroups(%d, %v): %v", setGID, gids32, err)
+ }
+ }
+ if _, _, err := psx.Syscall3(syscall.SYS_SETUID, uintptr(setUID), 0, 0); err != 0 {
+ log.Fatalf("failed to setgid(%d): %v", setUID, err)
+ }
+ }
+
+ if *drop {
+ if err := cap.NewSet().SetProc(); err != nil {
+ log.Fatalf("unable to drop privilege: %v", err)
+ }
+ }
+
+ showIDs("after", true, setUID, setGID)
+}
+
+// splitToInts parses a comma separated string to a slice of integers.
+func splitToInts() (ret []int) {
+ if *suppl == "" {
+ return
+ }
+ a := strings.Split(*suppl, ",")
+ for _, s := range a {
+ n, err := strconv.Atoi(s)
+ if err != nil {
+ log.Fatalf("bad supplementary group [%q]: %v", s, err)
+ }
+ ret = append(ret, n)
+ }
+ return
+}
+
+// dumpStatus explores the current process /proc/task/* status files
+// for matching values.
+func dumpStatus(testCase string, validate bool, filter, expect string) bool {
+ fmt.Printf("%s:\n", testCase)
+ var failed bool
+ pid := syscall.Getpid()
+ fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid))
+ if err != nil {
+ log.Fatal(err)
+ }
+ for _, f := range fs {
+ tf := fmt.Sprintf("/proc/%s/status", f.Name())
+ d, err := ioutil.ReadFile(tf)
+ if err != nil {
+ fmt.Println(tf, err)
+ failed = true
+ continue
+ }
+ lines := strings.Split(string(d), "\n")
+ for _, line := range lines {
+ if strings.HasPrefix(line, filter) {
+ fails := line != expect
+ failure := ""
+ if fails && validate {
+ failed = fails
+ failure = " (bad)"
+ }
+ fmt.Printf("%s %s%s\n", tf, line, failure)
+ break
+ }
+ }
+ }
+ return failed
+}
+
+// showIDs dumps the thread map out of the /proc/<proc>/tasks
+// filesystem to confirm that all of the threads associated with the
+// process have the same uid/gid values. Note, the code does not
+// attempt to validate the supplementary groups at present.
+func showIDs(test string, validate bool, wantUID, wantGID int) {
+ fmt.Printf("%s capability state: %q\n", test, cap.GetProc())
+
+ failed := dumpStatus(test+" gid", validate, "Gid:", fmt.Sprintf("Gid:\t%d\t%d\t%d\t%d", wantGID, wantGID, wantGID, wantGID))
+
+ failed = dumpStatus(test+" uid", validate, "Uid:", fmt.Sprintf("Uid:\t%d\t%d\t%d\t%d", wantUID, wantUID, wantUID, wantUID)) || failed
+
+ if validate && failed {
+ log.Fatal("did not observe desired *id state")
+ }
+}
diff --git a/goapps/web/README b/goapps/web/README
new file mode 100644
index 0000000..cc3c609
--- /dev/null
+++ b/goapps/web/README
@@ -0,0 +1,18 @@
+This sample program needs to be built as follows (when built with Go
+prior to 1.15):
+
+ CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" go build web.go
+
+go1.15+ does not require the CGO_LDFLAGS_ALLOW variable and can build
+this code with
+
+ go build web.go
+
+A more complete walk through of what this code does is provided here:
+
+ https://sites.google.com/site/fullycapable/building-go-programs-that-manipulate-capabilities
+
+Go compilers prior to go1.11.13 are not expected to work. Report more
+recent issues to:
+
+ https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1065141&product=Tools&resolution=---
diff --git a/goapps/web/go.mod b/goapps/web/go.mod
new file mode 100644
index 0000000..f7ae28b
--- /dev/null
+++ b/goapps/web/go.mod
@@ -0,0 +1,5 @@
+module web
+
+go 1.11
+
+require kernel.org/pub/linux/libs/security/libcap/cap v0.2.48
diff --git a/goapps/web/web.go b/goapps/web/web.go
new file mode 100644
index 0000000..d184e97
--- /dev/null
+++ b/goapps/web/web.go
@@ -0,0 +1,139 @@
+// Progam web provides an example of a webserver using capabilities to
+// bind to a privileged port, and then drop all capabilities before
+// handling the first web request.
+//
+// This program cannot work reliably as a pure Go application without
+// the equivalent of the Go runtime patch that adds a POSIX semantics
+// wrapper around the system calls that change per-thread security
+// state. A patch for the pure Go compiler/runtime to add this support
+// is available here [2019-12-14]:
+//
+// https://go-review.googlesource.com/c/go/+/210639/
+//
+// Until that patch, or something like it, is absorbed into the Go
+// runtime the only way to get capabilities to work reliably on the Go
+// runtime is to use something like libpsx via CGo to do capability
+// setting syscalls in C with POSIX semantics. As of this build of the
+// Go "kernel.org/pub/linux/libs/security/libcap/cap" package,
+// courtesy of the "kernel.org/pub/linux/libs/security/libcap/psx"
+// package, this is how things work.
+//
+// To set this up, compile and empower this binary as follows (read
+// over the detail in the psx package description if this doesn't
+// 'just' work):
+//
+// go build web.go
+// sudo setcap cap_setpcap,cap_net_bind_service=p web
+// ./web --port=80
+//
+// Make requests using wget and observe the log of web:
+//
+// wget -o/dev/null -O/dev/stdout localhost:80
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "net"
+ "net/http"
+ "runtime"
+ "syscall"
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+var (
+ port = flag.Int("port", 0, "port to listen on")
+ skipPriv = flag.Bool("skip", false, "skip raising the effective capability - will fail for low ports")
+)
+
+// ensureNotEUID aborts the program if it is running setuid something,
+// or being invoked by root. That is, the preparer isn't setting up
+// the program correctly.
+func ensureNotEUID() {
+ euid := syscall.Geteuid()
+ uid := syscall.Getuid()
+ egid := syscall.Getegid()
+ gid := syscall.Getgid()
+ if uid != euid || gid != egid {
+ log.Fatalf("go runtime is setuid uids:(%d vs %d), gids(%d vs %d)", uid, euid, gid, egid)
+ }
+ if uid == 0 {
+ log.Fatalf("go runtime is running as root - cheating")
+ }
+}
+
+// listen creates a listener by raising effective privilege only to
+// bind to address and then lowering that effective privilege.
+func listen(network, address string) (net.Listener, error) {
+ if *skipPriv {
+ return net.Listen(network, address)
+ }
+
+ orig := cap.GetProc()
+ defer orig.SetProc() // restore original caps on exit.
+
+ c, err := orig.Dup()
+ if err != nil {
+ return nil, fmt.Errorf("failed to dup caps: %v", err)
+ }
+
+ if on, _ := c.GetFlag(cap.Permitted, cap.NET_BIND_SERVICE); !on {
+ return nil, fmt.Errorf("insufficient privilege to bind to low ports - want %q, have %q", cap.NET_BIND_SERVICE, c)
+ }
+
+ if err := c.SetFlag(cap.Effective, true, cap.NET_BIND_SERVICE); err != nil {
+ return nil, fmt.Errorf("unable to set capability: %v", err)
+ }
+
+ if err := c.SetProc(); err != nil {
+ return nil, fmt.Errorf("unable to raise capabilities %q: %v", c, err)
+ }
+ return net.Listen(network, address)
+}
+
+// Handler is used to abstract the ServeHTTP function.
+type Handler struct{}
+
+// ServeHTTP says hello from a single Go hardware thread and reveals
+// its capabilities.
+func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ runtime.LockOSThread()
+ // Get some numbers consistent to the current execution, so
+ // the returned web page demonstrates that the code execution
+ // is bouncing around on different kernel thread ids.
+ p := syscall.Getpid()
+ t := syscall.Gettid()
+ c := cap.GetProc()
+ runtime.UnlockOSThread()
+
+ log.Printf("Saying hello from proc: %d->%d, caps=%q", p, t, c)
+ fmt.Fprintf(w, "Hello from proc: %d->%d, caps=%q\n", p, t, c)
+}
+
+func main() {
+ flag.Parse()
+
+ if *port == 0 {
+ log.Fatal("please supply --port value")
+ }
+
+ ensureNotEUID()
+
+ ls, err := listen("tcp", fmt.Sprintf(":%d", *port))
+ if err != nil {
+ log.Fatalf("aborting: %v", err)
+ }
+ defer ls.Close()
+
+ if !*skipPriv {
+ if err := cap.ModeNoPriv.Set(); err != nil {
+ log.Fatalf("unable to drop all privilege: %v", err)
+ }
+ }
+
+ if err := http.Serve(ls, &Handler{}); err != nil {
+ log.Fatalf("server failed: %v", err)
+ }
+}
diff --git a/kdebug/test-kernel.sh b/kdebug/test-kernel.sh
index c8ce144..1326cd7 100755
--- a/kdebug/test-kernel.sh
+++ b/kdebug/test-kernel.sh
@@ -13,7 +13,8 @@ function die {
}
pushd ..
-make || die "failed to make libcap tree"
+make test || die "failed to make test of libcap tree"
+make -C progs tcapsh-static || die "failed to make progs/tcapsh-static"
popd
# Assumes desired make *config (eg. make defconfig) is already done.
@@ -45,9 +46,16 @@ file /root/setcap $HERE/../progs/setcap 0755 0 0
file /root/getcap $HERE/../progs/getcap 0755 0 0
file /root/capsh $HERE/../progs/capsh 0755 0 0
file /root/getpcaps $HERE/../progs/getpcaps 0755 0 0
+file /root/tcapsh-static $HERE/../progs/tcapsh-static 0755 0 0
EOF
-COMMANDS="ls ln cp id pwd mkdir rmdir cat rm sh mount umount chmod less"
+# convenience for some local experiments
+if [ -f "$HERE/extras.sh" ]; then
+ echo "local, uncommitted enhancements to kernel test"
+ . "$HERE/extras.sh"
+fi
+
+COMMANDS="awk cat chmod cp dmesg fgrep id less ln ls mkdir mount pwd rm rmdir sh sort umount uniq vi"
for f in $COMMANDS; do
echo slink /bin/$f /sbin/busybox 0755 0 0 >> fs.conf
done
@@ -59,9 +67,10 @@ done
$KBASE/usr/gen_init_cpio fs.conf | gzip -9 > initramfs.img
-KERNEL=$KBASE/arch/x86_64/boot/bzImage
+KERNEL=$KBASE/arch/$(uname -m)/boot/bzImage
qemu-system-$(uname -m) -m 1024 \
-kernel $KERNEL \
-initrd initramfs.img \
- -append "$APPEND"
+ -append "$APPEND" \
+ -smp sockets=2,dies=1,cores=4
diff --git a/kdebug/test-passwd b/kdebug/test-passwd
index 4fa92a4..0ff71df 100644
--- a/kdebug/test-passwd
+++ b/kdebug/test-passwd
@@ -1,2 +1,3 @@
root:x:0:0:root:/root:/bin/bash
+luser:x:1:1:Luser:/:/bin/bash
nobody:x:99:99:Nobody:/:/sbin/nologin
diff --git a/libcap/.gitignore b/libcap/.gitignore
index 34cc5d6..8f77a0e 100644
--- a/libcap/.gitignore
+++ b/libcap/.gitignore
@@ -3,4 +3,9 @@ cap_names.list.h
_caps_output.gperf
libcap.a
libcap.so*
+libpsx.a
+libpsx.so*
_makenames
+cap_test
+libcap.pc
+libpsx.pc
diff --git a/libcap/Makefile b/libcap/Makefile
index d189777..9563d88 100644
--- a/libcap/Makefile
+++ b/libcap/Makefile
@@ -6,29 +6,60 @@ include ../Make.Rules
#
# Library version
#
-LIBNAME=$(LIBTITLE).so
-STALIBNAME=$(LIBTITLE).a
+CAPLIBNAME=$(LIBTITLE).so
+STACAPLIBNAME=$(LIBTITLE).a
#
+PSXLIBNAME=libpsx.so
+STAPSXLIBNAME=libpsx.a
-FILES=cap_alloc cap_proc cap_extint cap_flag cap_text cap_file
+CAPFILES=cap_alloc cap_proc cap_extint cap_flag cap_text cap_file
+PSXFILES=../psx/psx
INCLS=libcap.h cap_names.h $(INCS)
-OBJS=$(addsuffix .o, $(FILES))
-MAJLIBNAME=$(LIBNAME).$(VERSION)
-MINLIBNAME=$(MAJLIBNAME).$(MINOR)
GPERF_OUTPUT = _caps_output.gperf
-all: $(MINLIBNAME) $(STALIBNAME) libcap.pc
+CAPOBJS=$(addsuffix .o, $(CAPFILES))
+MAJCAPLIBNAME=$(CAPLIBNAME).$(VERSION)
+MINCAPLIBNAME=$(MAJCAPLIBNAME).$(MINOR)
+
+PSXOBJS=$(addsuffix .o, $(PSXFILES))
+MAJPSXLIBNAME=$(PSXLIBNAME).$(VERSION)
+MINPSXLIBNAME=$(MAJPSXLIBNAME).$(MINOR)
+
+all: pcs $(STACAPLIBNAME)
+ifeq ($(SHARED),yes)
+ $(MAKE) $(CAPLIBNAME)
+endif
+ifeq ($(PTHREADS),yes)
+ $(MAKE) $(STAPSXLIBNAME)
+ifeq ($(SHARED),yes)
+ $(MAKE) $(PSXLIBNAME)
+endif
+endif
+
+pcs: libcap.pc
+ifeq ($(PTHREADS),yes)
+ $(MAKE) libpsx.pc
+endif
ifeq ($(BUILD_GPERF),yes)
USE_GPERF_OUTPUT = $(GPERF_OUTPUT)
-INCLUDE_GPERF_OUTPUT = -include $(GPERF_OUTPUT)
+INCLUDE_GPERF_OUTPUT = -DINCLUDE_GPERF_OUTPUT='"$(GPERF_OUTPUT)"'
endif
libcap.pc: libcap.pc.in
sed -e 's,@prefix@,$(prefix),' \
-e 's,@exec_prefix@,$(exec_prefix),' \
- -e 's,@libdir@,$(lib_prefix)/$(lib),' \
+ -e 's,@libdir@,$(LIBDIR),' \
+ -e 's,@includedir@,$(inc_prefix)/include,' \
+ -e 's,@VERSION@,$(VERSION).$(MINOR),' \
+ -e 's,@deps@,$(DEPS),' \
+ $< >$@
+
+libpsx.pc: libpsx.pc.in
+ sed -e 's,@prefix@,$(prefix),' \
+ -e 's,@exec_prefix@,$(exec_prefix),' \
+ -e 's,@libdir@,$(LIBDIR),' \
-e 's,@includedir@,$(inc_prefix)/include,' \
-e 's,@VERSION@,$(VERSION).$(MINOR),' \
-e 's,@deps@,$(DEPS),' \
@@ -41,20 +72,37 @@ cap_names.h: _makenames
./_makenames > cap_names.h
$(GPERF_OUTPUT): cap_names.list.h
- perl -e 'print "struct __cap_token_s { const char *name; int index; };\n%{\nconst struct __cap_token_s *__cap_lookup_name(const char *, unsigned int);\n%}\n%%\n"; while ($$l = <>) { $$l =~ s/[\{\"]//g; $$l =~ s/\}.*// ; print $$l; }' < $< | gperf --ignore-case --language=ANSI-C --readonly --null-strings --global-table --hash-function-name=__cap_hash_name --lookup-function-name="__cap_lookup_name" -c -t -m20 $(INDENT) > $@
+ perl -e 'print "struct __cap_token_s { const char *name; int index; };\n%{\nconst struct __cap_token_s *__cap_lookup_name(const char *, size_t);\n%}\n%%\n"; while ($$l = <>) { $$l =~ s/[\{\"]//g; $$l =~ s/\}.*// ; print $$l; }' < $< | gperf --ignore-case --language=ANSI-C --readonly --null-strings --global-table --hash-function-name=__cap_hash_name --lookup-function-name="__cap_lookup_name" -c -t -m20 $(INDENT) > $@
+ sed -e 's/unsigned int len/size_t len/' -i $@
-cap_names.list.h: Makefile $(KERNEL_HEADERS)/linux/capability.h
- @echo "=> making $@ from $(KERNEL_HEADERS)/linux/capability.h"
- perl -e 'while ($$l=<>) { if ($$l =~ /^\#define[ \t](CAP[_A-Z]+)[ \t]+([0-9]+)\s+$$/) { $$tok=$$1; $$val=$$2; $$tok =~ tr/A-Z/a-z/; print "{\"$$tok\",$$val},\n"; } }' $(KERNEL_HEADERS)/linux/capability.h | fgrep -v 0x > $@
+# Intention is that libcap keeps up with torvalds' tree, as reflected
+# by this maintained version of the kernel header. libcap dynamically
+# trims the meaning of "all" capabilities down to that of the running
+# kernel as of 2.30.
+UAPI_HEADER := $(topdir)/libcap/include/uapi/linux/capability.h
+cap_names.list.h: Makefile $(UAPI_HEADER)
+ @echo "=> making $@ from $(UAPI_HEADER)"
+ perl -e 'while ($$l=<>) { if ($$l =~ /^\#define[ \t](CAP[_A-Z]+)[ \t]+([0-9]+)\s+$$/) { $$tok=$$1; $$val=$$2; $$tok =~ tr/A-Z/a-z/; print "{\"$$tok\",$$val},\n"; } }' $(UAPI_HEADER) | fgrep -v 0x > $@
-$(STALIBNAME): $(OBJS)
+$(STACAPLIBNAME): $(CAPOBJS)
$(AR) rcs $@ $^
$(RANLIB) $@
-$(MINLIBNAME): $(OBJS)
- $(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJLIBNAME) -o $@ $^
- ln -sf $(MINLIBNAME) $(MAJLIBNAME)
- ln -sf $(MAJLIBNAME) $(LIBNAME)
+$(STAPSXLIBNAME): $(PSXOBJS) include/sys/psx_syscall.h
+ $(AR) rcs $@ $(PSXOBJS)
+ $(RANLIB) $@
+
+ifeq ($(SHARED),yes)
+$(CAPLIBNAME) $(MAJCAPLIBNAME) $(MINCAPLIBNAME): $(CAPOBJS)
+ $(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJCAPLIBNAME) -o $(MINCAPLIBNAME) $^
+ ln -sf $(MINCAPLIBNAME) $(MAJCAPLIBNAME)
+ ln -sf $(MAJCAPLIBNAME) $(CAPLIBNAME)
+
+$(PSXLIBNAME) $(MAJPSXLIBNAME) $(MINPSXLIBNAME): $(PSXOBJS) include/sys/psx_syscall.h
+ $(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJPSXLIBNAME) -o $(MINPSXLIBNAME) $(PSXOBJS) $(PSXLINKFLAGS)
+ ln -sf $(MINPSXLIBNAME) $(MAJPSXLIBNAME)
+ ln -sf $(MAJPSXLIBNAME) $(PSXLIBNAME)
+endif
%.o: %.c $(INCLS)
$(CC) $(CFLAGS) $(IPATH) -c $< -o $@
@@ -62,22 +110,80 @@ $(MINLIBNAME): $(OBJS)
cap_text.o: cap_text.c $(USE_GPERF_OUTPUT) $(INCLS)
$(CC) $(CFLAGS) $(IPATH) $(INCLUDE_GPERF_OUTPUT) -c $< -o $@
-install: all
- mkdir -p -m 0755 $(FAKEROOT)$(INCDIR)/sys
- install -m 0644 include/sys/capability.h $(FAKEROOT)$(INCDIR)/sys
- mkdir -p -m 0755 $(FAKEROOT)$(LIBDIR)
- install -m 0644 $(STALIBNAME) $(FAKEROOT)$(LIBDIR)/$(STALIBNAME)
- install -m 0644 $(MINLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MINLIBNAME)
- ln -sf $(MINLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MAJLIBNAME)
- ln -sf $(MAJLIBNAME) $(FAKEROOT)$(LIBDIR)/$(LIBNAME)
+cap_test: cap_test.c libcap.h
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@
+
+test: cap_test
+ ./cap_test
+
+install: install-static
+ifeq ($(SHARED),yes)
+ $(MAKE) install-shared
+endif
+
+install-static: install-static-cap
+ifeq ($(PTHREADS),yes)
+ $(MAKE) install-static-psx
+endif
+
+install-shared: install-shared-cap
+ifeq ($(PTHREADS),yes)
+ $(MAKE) install-shared-psx
+endif
+
+install-cap: install-static-cap
+ifeq ($(SHARED),yes)
+ $(MAKE) install-shared-cap
+endif
+
+install-psx: install-static-psx
+ifeq ($(SHARED),yes)
+ $(MAKE) install-shared-psx
+endif
+
+install-static-cap: install-common-cap $(STACAPLIBNAME)
+ install -m 0644 $(STACAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(STACAPLIBNAME)
+
+install-shared-cap: install-common-cap $(MINCAPLIBNAME)
+ install -m 0644 $(MINCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MINCAPLIBNAME)
+ ln -sf $(MINCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MAJCAPLIBNAME)
+ ln -sf $(MAJCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(CAPLIBNAME)
ifeq ($(FAKEROOT),)
-/sbin/ldconfig
endif
- mkdir -p -m 0755 $(FAKEROOT)$(PKGCONFIGDIR)
+
+install-static-psx: install-common-psx $(STAPSXLIBNAME)
+ install -m 0644 $(STAPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(STAPSXLIBNAME)
+
+install-shared-psx: install-common-psx $(MINPSXLIBNAME)
+ install -m 0644 $(MINPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MINPSXLIBNAME)
+ ln -sf $(MINPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MAJPSXLIBNAME)
+ ln -sf $(MAJPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(PSXLIBNAME)
+ifeq ($(FAKEROOT),)
+ -/sbin/ldconfig
+endif
+
+install-common-cap: install-common libcap.pc
+ install -m 0644 include/sys/capability.h $(FAKEROOT)$(INCDIR)/sys
install -m 0644 libcap.pc $(FAKEROOT)$(PKGCONFIGDIR)/libcap.pc
+include/sys/psx_syscall.h: ../psx/psx_syscall.h
+ rm -f $@
+ ln -s ../../../psx/psx_syscall.h $@
+
+install-common-psx: install-common libpsx.pc include/sys/psx_syscall.h
+ install -m 0644 include/sys/psx_syscall.h $(FAKEROOT)$(INCDIR)/sys
+ install -m 0644 libpsx.pc $(FAKEROOT)$(PKGCONFIGDIR)/libpsx.pc
+
+install-common:
+ mkdir -p -m 0755 $(FAKEROOT)$(INCDIR)/sys
+ mkdir -p -m 0755 $(FAKEROOT)$(PKGCONFIGDIR)
+ mkdir -p -m 0755 $(FAKEROOT)$(LIBDIR)
+
clean:
$(LOCALCLEAN)
- rm -f $(OBJS) $(LIBNAME)* $(STALIBNAME) libcap.pc
- rm -f cap_names.h cap_names.list.h _makenames $(GPERF_OUTPUT)
+ rm -f $(CAPOBJS) $(CAPLIBNAME)* $(STACAPLIBNAME) libcap.pc
+ rm -f $(PSXOBJS) $(PSXLIBNAME)* $(STAPSXLIBNAME) libpsx.pc
+ rm -f cap_names.h cap_names.list.h _makenames $(GPERF_OUTPUT) cap_test
+ rm -f include/sys/psx_syscall.h
cd include/sys && $(LOCALCLEAN)
diff --git a/libcap/_makenames.c b/libcap/_makenames.c
index 8cc819b..46ab0c9 100644
--- a/libcap/_makenames.c
+++ b/libcap/_makenames.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997-8 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2020 Andrew G. Morgan <morgan@kernel.org>
*
* This is a file to make the capability <-> string mappings for
* libcap.
@@ -7,7 +7,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include <sys/capability.h>
+#include <string.h>
/*
* #include 'sed' generated array
@@ -21,41 +21,66 @@ struct {
{NULL, -1}
};
-/* this should be more than big enough (factor of three at least) */
-const char *pointers[8*sizeof(struct __user_cap_data_struct)];
+/*
+ * recalloc uses realloc to grow some memory but it resets the
+ * indicated extended empty space.
+ */
+static void *recalloc(void *p, int was, int is) {
+ void *n = realloc(p, is);
+ if (!n) {
+ fputs("out of memory", stderr);
+ exit(1);
+ }
+ memset(n+was, 0, is-was);
+ return n;
+}
int main(void)
{
- int i, maxcaps=0;
+ int i, maxcaps=0, maxlength=0;
+ const char **pointers = NULL;
+ int pointers_avail = 0;
for ( i=0; list[i].index >= 0 && list[i].name; ++i ) {
if (maxcaps <= list[i].index) {
maxcaps = list[i].index + 1;
}
+ if (list[i].index >= pointers_avail) {
+ int was = pointers_avail * sizeof(char *);
+ pointers_avail = 2 * list[i].index + 1;
+ pointers = recalloc(pointers, was, pointers_avail * sizeof(char *));
+ }
pointers[list[i].index] = list[i].name;
+ int n = strlen(list[i].name);
+ if (n > maxlength) {
+ maxlength = n;
+ }
}
printf("/*\n"
" * DO NOT EDIT: this file is generated automatically from\n"
" *\n"
- " * <linux/capability.h>\n"
- " */\n"
- "#define __CAP_BITS %d\n"
+ " * <uapi/linux/capability.h>\n"
+ " */\n\n"
+ "#define __CAP_BITS %d\n"
+ "#define __CAP_NAME_SIZE %d\n"
"\n"
"#ifdef LIBCAP_PLEASE_INCLUDE_ARRAY\n"
- " char const *_cap_names[__CAP_BITS] = {\n", maxcaps);
+ "#define LIBCAP_CAP_NAMES { \\\n", maxcaps, maxlength+1);
for (i=0; i<maxcaps; ++i) {
- if (pointers[i])
- printf(" /* %d */\t\"%s\",\n", i, pointers[i]);
- else
- printf(" /* %d */\tNULL,\t\t/* - presently unused */\n", i);
+ if (pointers[i]) {
+ printf(" /* %d */\t\"%s\", \\\n", i, pointers[i]);
+ } else {
+ printf(" /* %d */\tNULL,\t\t/* - presently unused */ \\\n", i);
+ }
}
- printf(" };\n"
+ printf(" }\n"
"#endif /* LIBCAP_PLEASE_INCLUDE_ARRAY */\n"
"\n"
"/* END OF FILE */\n");
+ free(pointers);
exit(0);
}
diff --git a/libcap/cap_alloc.c b/libcap/cap_alloc.c
index 525ea90..6dab4e6 100644
--- a/libcap/cap_alloc.c
+++ b/libcap/cap_alloc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997-8 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2019 Andrew G Morgan <morgan@kernel.org>
*
* This file deals with allocation and deallocation of internal
* capability sets as specified by POSIX.1e (formerlly, POSIX 6).
@@ -8,6 +8,23 @@
#include "libcap.h"
/*
+ * This gets set via the pre-main() executed constructor function below it.
+ */
+static cap_value_t _cap_max_bits;
+
+__attribute__((constructor (300))) static void _initialize_libcap(void) {
+ if (_cap_max_bits) {
+ return;
+ }
+ cap_set_syscall(NULL, NULL);
+ _binary_search(_cap_max_bits, cap_get_bound, 0, __CAP_MAXBITS, __CAP_BITS);
+}
+
+cap_value_t cap_max_bits(void) {
+ return _cap_max_bits;
+}
+
+/*
* Obtain a blank set of capabilities
*/
@@ -16,8 +33,7 @@ cap_t cap_init(void)
__u32 *raw_data;
cap_t result;
- raw_data = malloc( sizeof(__u32) + sizeof(*result) );
-
+ raw_data = calloc(1, sizeof(__u32) + sizeof(*result));
if (raw_data == NULL) {
_cap_debug("out of memory");
errno = ENOMEM;
@@ -26,7 +42,6 @@ cap_t cap_init(void)
*raw_data = CAP_T_MAGIC;
result = (cap_t) (raw_data + 1);
- memset(result, 0, sizeof(*result));
result->head.version = _LIBCAP_CAPABILITY_VERSION;
capget(&result->head, NULL); /* load the kernel-capability version */
@@ -106,6 +121,30 @@ cap_t cap_dup(cap_t cap_d)
return result;
}
+cap_iab_t cap_iab_init(void) {
+ __u32 *base = calloc(1, sizeof(__u32) + sizeof(struct cap_iab_s));
+ *(base++) = CAP_IAB_MAGIC;
+ return (cap_iab_t) base;
+}
+
+/*
+ * cap_new_launcher allocates some memory for a launcher and
+ * initializes it. To actually launch a program with this launcher,
+ * use cap_launch(). By default, the launcher is a no-op from a
+ * security perspective and will act just as fork()/execve()
+ * would. Use cap_launcher_setuid() etc to override this.
+ */
+cap_launch_t cap_new_launcher(const char *arg0, const char * const *argv,
+ const char * const *envp)
+{
+ __u32 *data = calloc(1, sizeof(__u32) + sizeof(struct cap_launch_s));
+ *(data++) = CAP_LAUNCH_MAGIC;
+ struct cap_launch_s *attr = (struct cap_launch_s *) data;
+ attr->arg0 = arg0;
+ attr->argv = argv;
+ attr->envp = envp;
+ return attr;
+}
/*
* Scrub and then liberate an internal capability set.
@@ -113,10 +152,10 @@ cap_t cap_dup(cap_t cap_d)
int cap_free(void *data_p)
{
- if ( !data_p )
+ if (!data_p)
return 0;
- if ( good_cap_t(data_p) ) {
+ if (good_cap_t(data_p)) {
data_p = -1 + (__u32 *) data_p;
memset(data_p, 0, sizeof(__u32) + sizeof(struct _cap_struct));
free(data_p);
@@ -124,7 +163,7 @@ int cap_free(void *data_p)
return 0;
}
- if ( good_cap_string(data_p) ) {
+ if (good_cap_string(data_p)) {
size_t length = strlen(data_p) + sizeof(__u32);
data_p = -1 + (__u32 *) data_p;
memset(data_p, 0, length);
@@ -133,6 +172,31 @@ int cap_free(void *data_p)
return 0;
}
+ if (good_cap_iab_t(data_p)) {
+ size_t length = sizeof(struct cap_iab_s) + sizeof(__u32);
+ data_p = -1 + (__u32 *) data_p;
+ memset(data_p, 0, length);
+ free(data_p);
+ data_p = NULL;
+ return 0;
+ }
+
+ if (good_cap_launch_t(data_p)) {
+ cap_launch_t launcher = data_p;
+ if (launcher->iab) {
+ cap_free(launcher->iab);
+ }
+ if (launcher->chroot) {
+ cap_free(launcher->chroot);
+ }
+ size_t length = sizeof(struct cap_iab_s) + sizeof(__u32);
+ data_p = -1 + (__u32 *) data_p;
+ memset(data_p, 0, length);
+ free(data_p);
+ data_p = NULL;
+ return 0;
+ }
+
_cap_debug("don't recognize what we're supposed to liberate");
errno = EINVAL;
return -1;
diff --git a/libcap/cap_file.c b/libcap/cap_file.c
index 76aac8c..84ae3e1 100644
--- a/libcap/cap_file.c
+++ b/libcap/cap_file.c
@@ -4,6 +4,10 @@
* This file deals with setting capabilities on files.
*/
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
#include <sys/types.h>
#include <byteswap.h>
#include <sys/stat.h>
@@ -23,6 +27,22 @@ extern int fsetxattr(int, const char *, const void *, size_t, int);
extern int removexattr(const char *, const char *);
extern int fremovexattr(int, const char *);
+/*
+ * This public API was moved to include/uapi/linux/xattr.h . For just
+ * these definitions, it isn't really worth managing this in our build
+ * system with yet another copy of a header file. We just, provide
+ * fallback definitions here.
+ */
+#ifndef XATTR_CAPS_SUFFIX
+#define XATTR_CAPS_SUFFIX "capability"
+#endif
+#ifndef XATTR_SECURITY_PREFIX
+#define XATTR_SECURITY_PREFIX "security."
+#endif
+#ifndef XATTR_NAME_CAPS
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+#endif
+
#include "libcap.h"
#ifdef VFS_CAP_U32
@@ -37,7 +57,7 @@ extern int fremovexattr(int, const char *);
#define FIXUP_32BITS(x) (x)
#endif
-static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
+static cap_t _fcaps_load(struct vfs_ns_cap_data *rawvfscap, cap_t result,
int bytes)
{
__u32 magic_etc;
@@ -45,19 +65,21 @@ static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
magic_etc = FIXUP_32BITS(rawvfscap->magic_etc);
switch (magic_etc & VFS_CAP_REVISION_MASK) {
-#ifdef VFS_CAP_REVISION_1
case VFS_CAP_REVISION_1:
tocopy = VFS_CAP_U32_1;
bytes -= XATTR_CAPS_SZ_1;
break;
-#endif
-#ifdef VFS_CAP_REVISION_2
case VFS_CAP_REVISION_2:
tocopy = VFS_CAP_U32_2;
bytes -= XATTR_CAPS_SZ_2;
break;
-#endif
+
+ case VFS_CAP_REVISION_3:
+ tocopy = VFS_CAP_U32_3;
+ bytes -= XATTR_CAPS_SZ_3;
+ result->rootid = FIXUP_32BITS(rawvfscap->rootid);
+ break;
default:
cap_free(result);
@@ -95,7 +117,7 @@ static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
return result;
}
-static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
+static int _fcaps_save(struct vfs_ns_cap_data *rawvfscap, cap_t cap_d,
int *bytes_p)
{
__u32 eff_not_zero, magic;
@@ -107,35 +129,36 @@ static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
}
switch (cap_d->head.version) {
-#ifdef _LINUX_CAPABILITY_VERSION_1
case _LINUX_CAPABILITY_VERSION_1:
magic = VFS_CAP_REVISION_1;
tocopy = VFS_CAP_U32_1;
*bytes_p = XATTR_CAPS_SZ_1;
break;
-#endif
-#ifdef _LINUX_CAPABILITY_VERSION_2
case _LINUX_CAPABILITY_VERSION_2:
- magic = VFS_CAP_REVISION_2;
- tocopy = VFS_CAP_U32_2;
- *bytes_p = XATTR_CAPS_SZ_2;
- break;
-#endif
-
-#ifdef _LINUX_CAPABILITY_VERSION_3
case _LINUX_CAPABILITY_VERSION_3:
magic = VFS_CAP_REVISION_2;
tocopy = VFS_CAP_U32_2;
*bytes_p = XATTR_CAPS_SZ_2;
break;
-#endif
default:
errno = EINVAL;
return -1;
}
+ if (cap_d->rootid != 0) {
+ if (cap_d->head.version < _LINUX_CAPABILITY_VERSION_3) {
+ _cap_debug("namespaces with non-0 rootid unsupported by kernel");
+ errno = EINVAL;
+ return -1;
+ }
+ magic = VFS_CAP_REVISION_3;
+ tocopy = VFS_CAP_U32_3;
+ *bytes_p = XATTR_CAPS_SZ_3;
+ rawvfscap->rootid = FIXUP_32BITS(cap_d->rootid);
+ }
+
_cap_debug("setting named file capabilities");
for (eff_not_zero = 0, i = 0; i < tocopy; i++) {
@@ -190,7 +213,7 @@ cap_t cap_get_fd(int fildes)
/* allocate a new capability set */
result = cap_init();
if (result) {
- struct vfs_cap_data rawvfscap;
+ struct vfs_ns_cap_data rawvfscap;
int sizeofcaps;
_cap_debug("getting fildes capabilities");
@@ -220,7 +243,7 @@ cap_t cap_get_file(const char *filename)
/* allocate a new capability set */
result = cap_init();
if (result) {
- struct vfs_cap_data rawvfscap;
+ struct vfs_ns_cap_data rawvfscap;
int sizeofcaps;
_cap_debug("getting filename capabilities");
@@ -240,13 +263,23 @@ cap_t cap_get_file(const char *filename)
}
/*
+ * Get rootid as seen in the current user namespace for the file capability
+ * sets.
+ */
+
+uid_t cap_get_nsowner(cap_t cap_d)
+{
+ return cap_d->rootid;
+}
+
+/*
* Set the capabilities of an open file, as specified by its file
* descriptor.
*/
int cap_set_fd(int fildes, cap_t cap_d)
{
- struct vfs_cap_data rawvfscap;
+ struct vfs_ns_cap_data rawvfscap;
int sizeofcaps;
struct stat buf;
@@ -278,7 +311,7 @@ int cap_set_fd(int fildes, cap_t cap_d)
int cap_set_file(const char *filename, cap_t cap_d)
{
- struct vfs_cap_data rawvfscap;
+ struct vfs_ns_cap_data rawvfscap;
int sizeofcaps;
struct stat buf;
@@ -303,6 +336,16 @@ int cap_set_file(const char *filename, cap_t cap_d)
return setxattr(filename, XATTR_NAME_CAPS, &rawvfscap, sizeofcaps, 0);
}
+/*
+ * Set rootid for the file capability sets.
+ */
+
+int cap_set_nsowner(cap_t cap_d, uid_t rootuid)
+{
+ cap_d->rootid = rootuid;
+ return 0;
+}
+
#else /* ie. ndef VFS_CAP_U32 */
cap_t cap_get_fd(int fildes)
@@ -317,6 +360,12 @@ cap_t cap_get_file(const char *filename)
return NULL;
}
+uid_t cap_get_nsowner(cap_t cap_d)
+{
+ errno = EINVAL;
+ return -1;
+}
+
int cap_set_fd(int fildes, cap_t cap_d)
{
errno = EINVAL;
@@ -329,4 +378,10 @@ int cap_set_file(const char *filename, cap_t cap_d)
return -1;
}
+int cap_set_nsowner(cap_t cap_d, uid_t rootuid)
+{
+ errno = EINVAL;
+ return -1;
+}
+
#endif /* def VFS_CAP_U32 */
diff --git a/libcap/cap_flag.c b/libcap/cap_flag.c
index 52ec3b3..c1ffa0d 100644
--- a/libcap/cap_flag.c
+++ b/libcap/cap_flag.c
@@ -1,8 +1,10 @@
/*
- * Copyright (c) 1997-8,2008 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2008,20 Andrew G. Morgan <morgan@kernel.org>
*
* This file deals with flipping of capabilities on internal
* capability sets as specified by POSIX.1e (formerlly, POSIX 6).
+ *
+ * It also contains similar code for bit flipping cap_iab_t values.
*/
#include "libcap.h"
@@ -21,7 +23,7 @@ int cap_get_flag(cap_t cap_d, cap_value_t value, cap_flag_t set,
* Is it a known capability?
*/
- if (raised && good_cap_t(cap_d) && value >= 0 && value < __CAP_BITS
+ if (raised && good_cap_t(cap_d) && value >= 0 && value < __CAP_MAXBITS
&& set >= 0 && set < NUMBER_OF_CAP_SETS) {
*raised = isset_cap(cap_d,value,set) ? CAP_SET:CAP_CLEAR;
return 0;
@@ -45,12 +47,12 @@ int cap_set_flag(cap_t cap_d, cap_flag_t set,
* Is it a known capability?
*/
- if (good_cap_t(cap_d) && no_values > 0 && no_values <= __CAP_BITS
+ if (good_cap_t(cap_d) && no_values > 0 && no_values < __CAP_MAXBITS
&& (set >= 0) && (set < NUMBER_OF_CAP_SETS)
&& (raise == CAP_SET || raise == CAP_CLEAR) ) {
int i;
for (i=0; i<no_values; ++i) {
- if (array_values[i] < 0 || array_values[i] >= __CAP_BITS) {
+ if (array_values[i] < 0 || array_values[i] >= __CAP_MAXBITS) {
_cap_debug("weird capability (%d) - skipped", array_values[i]);
} else {
int value = array_values[i];
@@ -80,16 +82,12 @@ int cap_set_flag(cap_t cap_d, cap_flag_t set,
int cap_clear(cap_t cap_d)
{
if (good_cap_t(cap_d)) {
-
memset(&(cap_d->u), 0, sizeof(cap_d->u));
return 0;
-
} else {
-
_cap_debug("invalid pointer");
errno = EINVAL;
return -1;
-
}
}
@@ -125,7 +123,6 @@ int cap_clear_flag(cap_t cap_d, cap_flag_t flag)
/*
* Compare two capability sets
*/
-
int cap_compare(cap_t a, cap_t b)
{
unsigned i;
@@ -148,3 +145,116 @@ int cap_compare(cap_t a, cap_t b)
}
return result;
}
+
+/*
+ * cap_iab_get_vector reads the single bit value from an IAB vector set.
+ */
+cap_flag_value_t cap_iab_get_vector(cap_iab_t iab, cap_iab_vector_t vec,
+ cap_value_t bit)
+{
+ if (!good_cap_iab_t(iab) || bit >= cap_max_bits()) {
+ return 0;
+ }
+
+ unsigned o = (bit >> 5);
+ __u32 mask = 1u << (bit & 31);
+
+ switch (vec) {
+ case CAP_IAB_INH:
+ return !!(iab->i[o] & mask);
+ break;
+ case CAP_IAB_AMB:
+ return !!(iab->a[o] & mask);
+ break;
+ case CAP_IAB_BOUND:
+ return !!(iab->nb[o] & mask);
+ break;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * cap_iab_set_vector sets the bits in an IAB to the value
+ * raised. Note, setting A implies setting I too, lowering I implies
+ * lowering A too. The B bits are, however, independently settable.
+ */
+int cap_iab_set_vector(cap_iab_t iab, cap_iab_vector_t vec, cap_value_t bit,
+ cap_flag_value_t raised)
+{
+ if (!good_cap_iab_t(iab) || (raised >> 1) || bit >= cap_max_bits()) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ unsigned o = (bit >> 5);
+ __u32 on = 1u << (bit & 31);
+ __u32 mask = ~on;
+
+ switch (vec) {
+ case CAP_IAB_INH:
+ iab->i[o] = (iab->i[o] & mask) | (raised ? on : 0);
+ iab->a[o] &= iab->i[o];
+ break;
+ case CAP_IAB_AMB:
+ iab->a[o] = (iab->a[o] & mask) | (raised ? on : 0);
+ iab->i[o] |= iab->a[o];
+ break;
+ case CAP_IAB_BOUND:
+ iab->nb[o] = (iab->nb[o] & mask) | (raised ? on : 0);
+ break;
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * cap_iab_fill copies a bit-vector of capability state from a cap_t
+ * to a cap_iab_t. Note, because the bounding bits in an iab are to be
+ * dropped when applied, the copying process, when to a CAP_IAB_BOUND
+ * vector involves inverting the bits. Also, adjusting I will mask
+ * bits in A, and adjusting A may implicitly raise bits in I.
+ */
+int cap_iab_fill(cap_iab_t iab, cap_iab_vector_t vec,
+ cap_t cap_d, cap_flag_t flag)
+{
+ if (!good_cap_t(cap_d) || !good_cap_iab_t(iab)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ switch (flag) {
+ case CAP_EFFECTIVE:
+ case CAP_INHERITABLE:
+ case CAP_PERMITTED:
+ break;
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+
+ int i;
+ for (i = 0; i < _LIBCAP_CAPABILITY_U32S; i++) {
+ switch (vec) {
+ case CAP_IAB_INH:
+ iab->i[i] = cap_d->u[i].flat[flag];
+ iab->a[i] &= iab->i[i];
+ break;
+ case CAP_IAB_AMB:
+ iab->a[i] = cap_d->u[i].flat[flag];
+ iab->i[i] |= cap_d->u[i].flat[flag];
+ break;
+ case CAP_IAB_BOUND:
+ iab->nb[i] = ~cap_d->u[i].flat[flag];
+ break;
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/libcap/cap_names.h b/libcap/cap_names.h
deleted file mode 100644
index 83186e5..0000000
--- a/libcap/cap_names.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * DO NOT EDIT: this file is generated automatically from
- *
- * <linux/capability.h>
- */
-#define __CAP_BITS 37
-
-#ifdef LIBCAP_PLEASE_INCLUDE_ARRAY
- char const *_cap_names[__CAP_BITS] = {
- /* 0 */ "cap_chown",
- /* 1 */ "cap_dac_override",
- /* 2 */ "cap_dac_read_search",
- /* 3 */ "cap_fowner",
- /* 4 */ "cap_fsetid",
- /* 5 */ "cap_kill",
- /* 6 */ "cap_setgid",
- /* 7 */ "cap_setuid",
- /* 8 */ "cap_setpcap",
- /* 9 */ "cap_linux_immutable",
- /* 10 */ "cap_net_bind_service",
- /* 11 */ "cap_net_broadcast",
- /* 12 */ "cap_net_admin",
- /* 13 */ "cap_net_raw",
- /* 14 */ "cap_ipc_lock",
- /* 15 */ "cap_ipc_owner",
- /* 16 */ "cap_sys_module",
- /* 17 */ "cap_sys_rawio",
- /* 18 */ "cap_sys_chroot",
- /* 19 */ "cap_sys_ptrace",
- /* 20 */ "cap_sys_pacct",
- /* 21 */ "cap_sys_admin",
- /* 22 */ "cap_sys_boot",
- /* 23 */ "cap_sys_nice",
- /* 24 */ "cap_sys_resource",
- /* 25 */ "cap_sys_time",
- /* 26 */ "cap_sys_tty_config",
- /* 27 */ "cap_mknod",
- /* 28 */ "cap_lease",
- /* 29 */ "cap_audit_write",
- /* 30 */ "cap_audit_control",
- /* 31 */ "cap_setfcap",
- /* 32 */ "cap_mac_override",
- /* 33 */ "cap_mac_admin",
- /* 34 */ "cap_syslog",
- /* 35 */ "cap_wake_alarm",
- /* 36 */ "cap_block_suspend",
- };
-#endif /* LIBCAP_PLEASE_INCLUDE_ARRAY */
-
-/* END OF FILE */
diff --git a/libcap/cap_proc.c b/libcap/cap_proc.c
index 8ecb57a..1329f94 100644
--- a/libcap/cap_proc.c
+++ b/libcap/cap_proc.c
@@ -1,13 +1,160 @@
/*
- * Copyright (c) 1997-8,2007,2011 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2007,11,19,20 Andrew G Morgan <morgan@kernel.org>
*
* This file deals with getting and setting capabilities on processes.
*/
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h> /* Obtain O_* constant definitions */
+#include <grp.h>
#include <sys/prctl.h>
+#include <sys/securebits.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <linux/limits.h>
#include "libcap.h"
+/*
+ * libcap uses this abstraction for all system calls that change
+ * kernel managed capability state. This permits the user to redirect
+ * it for testing and also to better implement posix semantics when
+ * using pthreads.
+ */
+
+static long int _cap_syscall3(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3)
+{
+ return syscall(syscall_nr, arg1, arg2, arg3);
+}
+
+static long int _cap_syscall6(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3,
+ long int arg4, long int arg5, long int arg6)
+{
+ return syscall(syscall_nr, arg1, arg2, arg3, arg4, arg5, arg6);
+}
+
+/*
+ * to keep the structure of the code conceptually similar in C and Go
+ * implementations, we introduce this abstraction for invoking state
+ * writing system calls. In psx+pthreaded code, the fork
+ * implementation provided by nptl ensures that we can consistently
+ * use the multithreaded syscalls even in the child after a fork().
+ */
+struct syscaller_s {
+ long int (*three)(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3);
+ long int (*six)(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3,
+ long int arg4, long int arg5, long int arg6);
+};
+
+/* use this syscaller for multi-threaded code */
+static struct syscaller_s multithread = {
+ .three = _cap_syscall3,
+ .six = _cap_syscall6
+};
+
+/* use this syscaller for single-threaded code */
+static struct syscaller_s singlethread = {
+ .three = _cap_syscall3,
+ .six = _cap_syscall6
+};
+
+/*
+ * This gets reset to 0 if we are *not* linked with libpsx.
+ */
+static int _libcap_overrode_syscalls = 1;
+
+/*
+ * psx_load_syscalls() is weakly defined so we can have it overridden
+ * by libpsx if that library is linked. Specifically, when libcap
+ * calls psx_load_sycalls() it is prepared to override the default
+ * values for the syscalls that libcap uses to change security state.
+ * As can be seen here this present function is mostly a
+ * no-op. However, if libpsx is linked, the one present in that
+ * library (not being weak) will replace this one and the
+ * _libcap_overrode_syscalls value isn't forced to zero.
+ *
+ * Note: we hardcode the prototype for the psx_load_syscalls()
+ * function here so the compiler isn't worried. If we force the build
+ * to include the header, we are close to requiring the optional
+ * libpsx to be linked.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+ long int, long int, long int),
+ long int (**syscall6_fn)(long int,
+ long int, long int, long int,
+ long int, long int, long int));
+
+__attribute__((weak))
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+ long int, long int, long int),
+ long int (**syscall6_fn)(long int,
+ long int, long int, long int,
+ long int, long int, long int))
+{
+ _libcap_overrode_syscalls = 0;
+}
+
+/*
+ * cap_set_syscall overrides the state setting syscalls that libcap does.
+ * Generally, you don't need to call this manually: libcap tries hard to
+ * set things up appropriately.
+ */
+void cap_set_syscall(long int (*new_syscall)(long int,
+ long int, long int, long int),
+ long int (*new_syscall6)(long int, long int,
+ long int, long int,
+ long int, long int,
+ long int)) {
+ if (new_syscall == NULL) {
+ psx_load_syscalls(&multithread.three, &multithread.six);
+ } else {
+ multithread.three = new_syscall;
+ multithread.six = new_syscall6;
+ }
+}
+
+static int _libcap_capset(struct syscaller_s *sc,
+ cap_user_header_t header, const cap_user_data_t data)
+{
+ if (_libcap_overrode_syscalls) {
+ return sc->three(SYS_capset, (long int) header, (long int) data, 0);
+ }
+ return capset(header, data);
+}
+
+static int _libcap_wprctl3(struct syscaller_s *sc,
+ long int pr_cmd, long int arg1, long int arg2)
+{
+ if (_libcap_overrode_syscalls) {
+ return sc->three(SYS_prctl, pr_cmd, arg1, arg2);
+ }
+ return prctl(pr_cmd, arg1, arg2, 0, 0, 0);
+}
+
+static int _libcap_wprctl6(struct syscaller_s *sc,
+ long int pr_cmd, long int arg1, long int arg2,
+ long int arg3, long int arg4, long int arg5)
+{
+ if (_libcap_overrode_syscalls) {
+ return sc->six(SYS_prctl, pr_cmd, arg1, arg2, arg3, arg4, arg5);
+ }
+ return prctl(pr_cmd, arg1, arg2, arg3, arg4, arg5);
+}
+
+/*
+ * cap_get_proc obtains the capability set for the current process.
+ */
cap_t cap_get_proc(void)
{
cap_t result;
@@ -27,8 +174,7 @@ cap_t cap_get_proc(void)
return result;
}
-int cap_set_proc(cap_t cap_d)
-{
+static int _cap_set_proc(struct syscaller_s *sc, cap_t cap_d) {
int retval;
if (!good_cap_t(cap_d)) {
@@ -37,11 +183,16 @@ int cap_set_proc(cap_t cap_d)
}
_cap_debug("setting process capabilities");
- retval = capset(&cap_d->head, &cap_d->u[0].set);
+ retval = _libcap_capset(sc, &cap_d->head, &cap_d->u[0].set);
return retval;
}
+int cap_set_proc(cap_t cap_d)
+{
+ return _cap_set_proc(&multithread, cap_d);
+}
+
/* the following two functions are not required by POSIX */
/* read the caps on a specific process */
@@ -85,7 +236,11 @@ cap_t cap_get_pid(pid_t pid)
return result;
}
-/* set the caps on a specific process/pg etc.. */
+/*
+ * set the caps on a specific process/pg etc.. The kernel has long
+ * since deprecated this asynchronous interface. DON'T EXPECT THIS TO
+ * EVER WORK AGAIN.
+ */
int capsetp(pid_t pid, cap_t cap_d)
{
@@ -105,22 +260,701 @@ int capsetp(pid_t pid, cap_t cap_d)
return error;
}
+/* the kernel api requires unsigned long arguments */
+#define pr_arg(x) ((unsigned long) x)
+
/* get a capability from the bounding set */
int cap_get_bound(cap_value_t cap)
{
int result;
- result = prctl(PR_CAPBSET_READ, cap);
+ result = prctl(PR_CAPBSET_READ, pr_arg(cap), pr_arg(0));
+ if (result < 0) {
+ errno = -result;
+ return -1;
+ }
+ return result;
+}
+
+static int _cap_drop_bound(struct syscaller_s *sc, cap_value_t cap)
+{
+ int result;
+
+ result = _libcap_wprctl3(sc, PR_CAPBSET_DROP, pr_arg(cap), pr_arg(0));
+ if (result < 0) {
+ errno = -result;
+ return -1;
+ }
return result;
}
/* drop a capability from the bounding set */
-int cap_drop_bound(cap_value_t cap)
+int cap_drop_bound(cap_value_t cap) {
+ return _cap_drop_bound(&multithread, cap);
+}
+
+/* get a capability from the ambient set */
+
+int cap_get_ambient(cap_value_t cap)
{
int result;
+ result = prctl(PR_CAP_AMBIENT, pr_arg(PR_CAP_AMBIENT_IS_SET),
+ pr_arg(cap), pr_arg(0), pr_arg(0));
+ if (result < 0) {
+ errno = -result;
+ return -1;
+ }
+ return result;
+}
+
+static int _cap_set_ambient(struct syscaller_s *sc,
+ cap_value_t cap, cap_flag_value_t set)
+{
+ int result, val;
+ switch (set) {
+ case CAP_SET:
+ val = PR_CAP_AMBIENT_RAISE;
+ break;
+ case CAP_CLEAR:
+ val = PR_CAP_AMBIENT_LOWER;
+ break;
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+ result = _libcap_wprctl6(sc, PR_CAP_AMBIENT, pr_arg(val), pr_arg(cap),
+ pr_arg(0), pr_arg(0), pr_arg(0));
+ if (result < 0) {
+ errno = -result;
+ return -1;
+ }
+ return result;
+}
+
+/*
+ * cap_set_ambient modifies a single ambient capability value.
+ */
+int cap_set_ambient(cap_value_t cap, cap_flag_value_t set)
+{
+ return _cap_set_ambient(&multithread, cap, set);
+}
- result = prctl(PR_CAPBSET_DROP, cap);
+static int _cap_reset_ambient(struct syscaller_s *sc)
+{
+ int olderrno = errno;
+ cap_value_t c;
+ int result = 0;
+
+ for (c = 0; !result; c++) {
+ result = cap_get_ambient(c);
+ if (result == -1) {
+ errno = olderrno;
+ return 0;
+ }
+ }
+
+ result = _libcap_wprctl6(sc, PR_CAP_AMBIENT,
+ pr_arg(PR_CAP_AMBIENT_CLEAR_ALL),
+ pr_arg(0), pr_arg(0), pr_arg(0), pr_arg(0));
+ if (result < 0) {
+ errno = -result;
+ return -1;
+ }
return result;
}
+
+/*
+ * cap_reset_ambient erases all ambient capabilities - this reads the
+ * ambient caps before performing the erase to workaround the corner
+ * case where the set is empty already but the ambient cap API is
+ * locked.
+ */
+int cap_reset_ambient()
+{
+ return _cap_reset_ambient(&multithread);
+}
+
+/*
+ * Read the security mode of the current process.
+ */
+unsigned cap_get_secbits(void)
+{
+ return (unsigned) prctl(PR_GET_SECUREBITS, pr_arg(0), pr_arg(0));
+}
+
+static int _cap_set_secbits(struct syscaller_s *sc, unsigned bits)
+{
+ return _libcap_wprctl3(sc, PR_SET_SECUREBITS, bits, 0);
+}
+
+/*
+ * Set the secbits of the current process.
+ */
+int cap_set_secbits(unsigned bits)
+{
+ return _cap_set_secbits(&multithread, bits);
+}
+
+/*
+ * Attempt to raise the no new privs prctl value.
+ */
+static void _cap_set_no_new_privs(struct syscaller_s *sc)
+{
+ (void) _libcap_wprctl6(sc, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0);
+}
+
+/*
+ * Some predefined constants
+ */
+#define CAP_SECURED_BITS_BASIC \
+ (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED | \
+ SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED | \
+ SECBIT_KEEP_CAPS_LOCKED)
+
+#define CAP_SECURED_BITS_AMBIENT (CAP_SECURED_BITS_BASIC | \
+ SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED)
+
+static cap_value_t raise_cap_setpcap[] = {CAP_SETPCAP};
+
+static int _cap_set_mode(struct syscaller_s *sc, cap_mode_t flavor)
+{
+ cap_t working = cap_get_proc();
+ unsigned secbits = CAP_SECURED_BITS_AMBIENT;
+
+ int ret = cap_set_flag(working, CAP_EFFECTIVE,
+ 1, raise_cap_setpcap, CAP_SET);
+ ret = ret | _cap_set_proc(sc, working);
+
+ if (ret == 0) {
+ cap_flag_t c;
+
+ switch (flavor) {
+ case CAP_MODE_NOPRIV:
+ /* fall through */
+ case CAP_MODE_PURE1E_INIT:
+ (void) cap_clear_flag(working, CAP_INHERITABLE);
+ /* fall through */
+ case CAP_MODE_PURE1E:
+ if (!CAP_AMBIENT_SUPPORTED()) {
+ secbits = CAP_SECURED_BITS_BASIC;
+ } else {
+ ret = _cap_reset_ambient(sc);
+ if (ret) {
+ break; /* ambient dropping failed */
+ }
+ }
+ ret = _cap_set_secbits(sc, secbits);
+ if (flavor != CAP_MODE_NOPRIV) {
+ break;
+ }
+
+ /* just for "case CAP_MODE_NOPRIV:" */
+
+ for (c = 0; cap_get_bound(c) >= 0; c++) {
+ (void) _cap_drop_bound(sc, c);
+ }
+ (void) cap_clear_flag(working, CAP_PERMITTED);
+
+ /* for good measure */
+ _cap_set_no_new_privs(sc);
+ break;
+
+ default:
+ errno = EINVAL;
+ ret = -1;
+ break;
+ }
+ }
+
+ (void) cap_clear_flag(working, CAP_EFFECTIVE);
+ ret = _cap_set_proc(sc, working) | ret;
+ (void) cap_free(working);
+ return ret;
+}
+
+/*
+ * cap_set_mode locks the overarching capability framework of the
+ * present process and thus its children to a predefined flavor. Once
+ * set, these modes cannot be undone by the affected process tree and
+ * can only be done by "cap_setpcap" permitted processes. Note, a side
+ * effect of this function, whether it succeeds or fails, is to clear
+ * at least the CAP_EFFECTIVE flags for the current process.
+ */
+int cap_set_mode(cap_mode_t flavor)
+{
+ return _cap_set_mode(&multithread, flavor);
+}
+
+/*
+ * cap_get_mode attempts to determine what the current capability mode
+ * is. If it can find no match in the libcap pre-defined modes, it
+ * returns CAP_MODE_UNCERTAIN.
+ */
+cap_mode_t cap_get_mode(void)
+{
+ unsigned secbits = cap_get_secbits();
+
+ if ((secbits & CAP_SECURED_BITS_BASIC) != CAP_SECURED_BITS_BASIC) {
+ return CAP_MODE_UNCERTAIN;
+ }
+
+ /* validate ambient is not set */
+ int olderrno = errno;
+ int ret = 0;
+ cap_value_t c;
+ for (c = 0; !ret; c++) {
+ ret = cap_get_ambient(c);
+ if (ret == -1) {
+ errno = olderrno;
+ if (c && secbits != CAP_SECURED_BITS_AMBIENT) {
+ return CAP_MODE_UNCERTAIN;
+ }
+ break;
+ }
+ if (ret) {
+ return CAP_MODE_UNCERTAIN;
+ }
+ }
+
+ cap_t working = cap_get_proc();
+ cap_t empty = cap_init();
+ int cf = cap_compare(empty, working);
+ cap_free(empty);
+ cap_free(working);
+
+ if (CAP_DIFFERS(cf, CAP_INHERITABLE)) {
+ return CAP_MODE_PURE1E;
+ }
+ if (CAP_DIFFERS(cf, CAP_PERMITTED) || CAP_DIFFERS(cf, CAP_EFFECTIVE)) {
+ return CAP_MODE_PURE1E_INIT;
+ }
+
+ for (c = 0; ; c++) {
+ int v = cap_get_bound(c);
+ if (v == -1) {
+ break;
+ }
+ if (v) {
+ return CAP_MODE_PURE1E_INIT;
+ }
+ }
+
+ return CAP_MODE_NOPRIV;
+}
+
+static int _cap_setuid(struct syscaller_s *sc, uid_t uid)
+{
+ const cap_value_t raise_cap_setuid[] = {CAP_SETUID};
+ cap_t working = cap_get_proc();
+ (void) cap_set_flag(working, CAP_EFFECTIVE,
+ 1, raise_cap_setuid, CAP_SET);
+ /*
+ * Note, we are cognizant of not using glibc's setuid in the case
+ * that we've modified the way libcap is doing setting
+ * syscalls. This is because prctl needs to be working in a POSIX
+ * compliant way for the code below to work, so we are either
+ * all-broken or not-broken and don't allow for "sort of working".
+ */
+ (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 1, 0);
+ int ret = _cap_set_proc(sc, working);
+ if (ret == 0) {
+ if (_libcap_overrode_syscalls) {
+ ret = sc->three(SYS_setuid, (long int) uid, 0, 0);
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ }
+ } else {
+ ret = setuid(uid);
+ }
+ }
+ int olderrno = errno;
+ (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 0, 0);
+ (void) cap_clear_flag(working, CAP_EFFECTIVE);
+ (void) _cap_set_proc(sc, working);
+ (void) cap_free(working);
+
+ errno = olderrno;
+ return ret;
+}
+
+/*
+ * cap_setuid attempts to set the uid of the process without dropping
+ * any permitted capabilities in the process. A side effect of a call
+ * to this function is that the effective set will be cleared by the
+ * time the function returns.
+ */
+int cap_setuid(uid_t uid)
+{
+ return _cap_setuid(&multithread, uid);
+}
+
+#if defined(__arm__) || defined(__i386__) || \
+ defined(__i486__) || defined(__i586__) || defined(__i686__)
+#define sys_setgroups_variant SYS_setgroups32
+#else
+#define sys_setgroups_variant SYS_setgroups
+#endif
+
+static int _cap_setgroups(struct syscaller_s *sc,
+ gid_t gid, size_t ngroups, const gid_t groups[])
+{
+ const cap_value_t raise_cap_setgid[] = {CAP_SETGID};
+ cap_t working = cap_get_proc();
+ (void) cap_set_flag(working, CAP_EFFECTIVE,
+ 1, raise_cap_setgid, CAP_SET);
+ /*
+ * Note, we are cognizant of not using glibc's setgid etc in the
+ * case that we've modified the way libcap is doing setting
+ * syscalls. This is because prctl needs to be working in a POSIX
+ * compliant way for the other functions of this file so we are
+ * all-broken or not-broken and don't allow for "sort of working".
+ */
+ int ret = _cap_set_proc(sc, working);
+ if (_libcap_overrode_syscalls) {
+ if (ret == 0) {
+ ret = sc->three(SYS_setgid, (long int) gid, 0, 0);
+ }
+ if (ret == 0) {
+ ret = sc->three(sys_setgroups_variant, (long int) ngroups,
+ (long int) groups, 0);
+ }
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ }
+ } else {
+ if (ret == 0) {
+ ret = setgid(gid);
+ }
+ if (ret == 0) {
+ ret = setgroups(ngroups, groups);
+ }
+ }
+ int olderrno = errno;
+
+ (void) cap_clear_flag(working, CAP_EFFECTIVE);
+ (void) _cap_set_proc(sc, working);
+ (void) cap_free(working);
+
+ errno = olderrno;
+ return ret;
+}
+
+/*
+ * cap_setgroups combines setting the gid with changing the set of
+ * supplemental groups for a user into one call that raises the needed
+ * capabilities to do it for the duration of the call. A side effect
+ * of a call to this function is that the effective set will be
+ * cleared by the time the function returns.
+ */
+int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[])
+{
+ return _cap_setgroups(&multithread, gid, ngroups, groups);
+}
+
+/*
+ * cap_iab_get_proc returns a cap_iab_t value initialized by the
+ * current process state related to these iab bits.
+ */
+cap_iab_t cap_iab_get_proc(void)
+{
+ cap_iab_t iab = cap_iab_init();
+ cap_t current = cap_get_proc();
+ cap_iab_fill(iab, CAP_IAB_INH, current, CAP_INHERITABLE);
+ cap_value_t c;
+ for (c = cap_max_bits(); c; ) {
+ --c;
+ int o = c >> 5;
+ __u32 mask = 1U << (c & 31);
+ if (cap_get_bound(c) == 0) {
+ iab->nb[o] |= mask;
+ }
+ if (cap_get_ambient(c) == 1) {
+ iab->a[o] |= mask;
+ }
+ }
+ return iab;
+}
+
+/*
+ * _cap_iab_set_proc sets the iab collection using the requested syscaller.
+ */
+static int _cap_iab_set_proc(struct syscaller_s *sc, cap_iab_t iab)
+{
+ int ret, i;
+ cap_t working, temp = cap_get_proc();
+ cap_value_t c;
+ int raising = 0;
+
+ for (i = 0; i < _LIBCAP_CAPABILITY_U32S; i++) {
+ __u32 newI = iab->i[i];
+ __u32 oldIP = temp->u[i].flat[CAP_INHERITABLE] |
+ temp->u[i].flat[CAP_PERMITTED];
+ raising |= (newI & ~oldIP) | iab->a[i] | iab->nb[i];
+ temp->u[i].flat[CAP_INHERITABLE] = newI;
+
+ }
+
+ working = cap_dup(temp);
+ if (raising) {
+ ret = cap_set_flag(working, CAP_EFFECTIVE,
+ 1, raise_cap_setpcap, CAP_SET);
+ if (ret) {
+ goto defer;
+ }
+ }
+ if ((ret = _cap_set_proc(sc, working))) {
+ goto defer;
+ }
+ if ((ret = _cap_reset_ambient(sc))) {
+ goto done;
+ }
+
+ for (c = cap_max_bits(); c-- != 0; ) {
+ unsigned offset = c >> 5;
+ __u32 mask = 1U << (c & 31);
+ if (iab->a[offset] & mask) {
+ ret = _cap_set_ambient(sc, c, CAP_SET);
+ if (ret) {
+ goto done;
+ }
+ }
+ if (iab->nb[offset] & mask) {
+ /* drop the bounding bit */
+ ret = _cap_drop_bound(sc, c);
+ if (ret) {
+ goto done;
+ }
+ }
+ }
+
+done:
+ (void) cap_set_proc(temp);
+
+defer:
+ cap_free(working);
+ cap_free(temp);
+
+ return ret;
+}
+
+/*
+ * cap_iab_set_proc sets the iab capability vectors of the current
+ * process.
+ */
+int cap_iab_set_proc(cap_iab_t iab)
+{
+ return _cap_iab_set_proc(&multithread, iab);
+}
+
+/*
+ * cap_launcher_callback primes the launcher with a callback that will
+ * be invoked after the fork() but before any privilege has changed
+ * and before the execve(). This can be used to augment the state of
+ * the child process within the cap_launch() process. You can cancel
+ * any callback associated with a launcher by calling this function
+ * with a callback_fn value NULL.
+ *
+ * If the callback function returns anything other than 0, it is
+ * considered to have failed and the launch will be aborted - further,
+ * errno will be communicated to the parent.
+ */
+void cap_launcher_callback(cap_launch_t attr, int (callback_fn)(void *detail))
+{
+ attr->custom_setup_fn = callback_fn;
+}
+
+/*
+ * cap_launcher_setuid primes the launcher to attempt a change of uid.
+ */
+void cap_launcher_setuid(cap_launch_t attr, uid_t uid)
+{
+ attr->uid = uid;
+ attr->change_uids = 1;
+}
+
+/*
+ * cap_launcher_setgroups primes the launcher to attempt a change of
+ * gid and groups.
+ */
+void cap_launcher_setgroups(cap_launch_t attr, gid_t gid,
+ int ngroups, const gid_t *groups)
+{
+ attr->gid = gid;
+ attr->ngroups = ngroups;
+ attr->groups = groups;
+ attr->change_gids = 1;
+}
+
+/*
+ * cap_launcher_set_mode primes the launcher to attempt a change of
+ * mode.
+ */
+void cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor)
+{
+ attr->mode = flavor;
+ attr->change_mode = 1;
+}
+
+/*
+ * cap_launcher_set_iab primes the launcher to attempt to change the iab bits of
+ * the launched child.
+ */
+cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab)
+{
+ cap_iab_t old = attr->iab;
+ attr->iab = iab;
+ return old;
+}
+
+/*
+ * cap_launcher_set_chroot sets the intended chroot for the launched
+ * child.
+ */
+void cap_launcher_set_chroot(cap_launch_t attr, const char *chroot)
+{
+ attr->chroot = _libcap_strdup(chroot);
+}
+
+static int _cap_chroot(struct syscaller_s *sc, const char *root)
+{
+ const cap_value_t raise_cap_sys_chroot[] = {CAP_SYS_CHROOT};
+ cap_t working = cap_get_proc();
+ (void) cap_set_flag(working, CAP_EFFECTIVE,
+ 1, raise_cap_sys_chroot, CAP_SET);
+ int ret = _cap_set_proc(sc, working);
+ if (ret == 0) {
+ if (_libcap_overrode_syscalls) {
+ ret = sc->three(SYS_chroot, (long int) root, 0, 0);
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ }
+ } else {
+ ret = chroot(root);
+ }
+ }
+ int olderrno = errno;
+ (void) cap_clear_flag(working, CAP_EFFECTIVE);
+ (void) _cap_set_proc(sc, working);
+ (void) cap_free(working);
+
+ errno = olderrno;
+ return ret;
+}
+
+/*
+ * _cap_launch is invoked in the forked child, it cannot return but is
+ * required to exit. If the execve fails, it will write the errno value
+ * over the filedescriptor, fd, and exit with status 0.
+ */
+__attribute__ ((noreturn))
+static void _cap_launch(int fd, cap_launch_t attr, void *detail) {
+ struct syscaller_s *sc = &singlethread;
+
+ if (attr->custom_setup_fn && attr->custom_setup_fn(detail)) {
+ goto defer;
+ }
+
+ if (attr->change_uids && _cap_setuid(sc, attr->uid)) {
+ goto defer;
+ }
+ if (attr->change_gids &&
+ _cap_setgroups(sc, attr->gid, attr->ngroups, attr->groups)) {
+ goto defer;
+ }
+ if (attr->change_mode && _cap_set_mode(sc, attr->mode)) {
+ goto defer;
+ }
+ if (attr->iab && _cap_iab_set_proc(sc, attr->iab)) {
+ goto defer;
+ }
+ if (attr->chroot != NULL && _cap_chroot(sc, attr->chroot)) {
+ goto defer;
+ }
+
+ /*
+ * Some type wrangling to work around what the kernel API really
+ * means: not "const char **".
+ */
+ const void *temp_args = attr->argv;
+ const void *temp_envp = attr->envp;
+
+ execve(attr->arg0, temp_args, temp_envp);
+ /* if the exec worked, execution will not reach here */
+
+defer:
+ /*
+ * getting here means an error has occurred and errno is
+ * communicated to the parent
+ */
+ for (;;) {
+ int n = write(fd, &errno, sizeof(errno));
+ if (n < 0 && errno == EAGAIN) {
+ continue;
+ }
+ break;
+ }
+ close(fd);
+ exit(1);
+}
+
+/*
+ * cap_launch performs a wrapped fork+exec that works in both an
+ * unthreaded environment and also where libcap is linked with
+ * psx+pthreads. The function supports dropping privilege in the
+ * forked thread, but retaining privilege in the parent thread(s).
+ *
+ * Since the ambient set is fragile with respect to changes in I or P,
+ * the function carefully orders setting of these inheritable
+ * characteristics, to make sure they stick, or return an error
+ * of -1 setting errno because the launch failed.
+ */
+pid_t cap_launch(cap_launch_t attr, void *data) {
+ int my_errno;
+ int ps[2];
+
+ if (pipe2(ps, O_CLOEXEC) != 0) {
+ return -1;
+ }
+
+ int child = fork();
+ my_errno = errno;
+
+ close(ps[1]);
+ if (child < 0) {
+ goto defer;
+ }
+ if (!child) {
+ close(ps[0]);
+ /* noreturn from this function: */
+ _cap_launch(ps[1], attr, data);
+ }
+
+ /*
+ * Extend this function's return codes to include setup failures
+ * in the child.
+ */
+ for (;;) {
+ int ignored;
+ int n = read(ps[0], &my_errno, sizeof(my_errno));
+ if (n == 0) {
+ goto defer;
+ }
+ if (n < 0 && errno == EAGAIN) {
+ continue;
+ }
+ waitpid(child, &ignored, 0);
+ child = -1;
+ my_errno = ECHILD;
+ break;
+ }
+
+defer:
+ close(ps[0]);
+ errno = my_errno;
+ return (pid_t) child;
+}
diff --git a/libcap/cap_test.c b/libcap/cap_test.c
new file mode 100644
index 0000000..4ea83c8
--- /dev/null
+++ b/libcap/cap_test.c
@@ -0,0 +1,39 @@
+#include "libcap.h"
+
+static cap_value_t top;
+
+static int cf(cap_value_t x) {
+ return top - x - 1;
+}
+
+static int test_cap_bits(void) {
+ static cap_value_t vs[] = {
+ 5, 6, 11, 12, 15, 16, 17, 38, 41, 63, 64, __CAP_MAXBITS+3, 0, -1
+ };
+ int failed = 0;
+ cap_value_t i;
+ for (i = 0; vs[i] >= 0; i++) {
+ cap_value_t ans;
+
+ top = i;
+ _binary_search(ans, cf, 0, __CAP_MAXBITS, 0);
+ if (ans != top) {
+ if (top > __CAP_MAXBITS && ans == __CAP_MAXBITS) {
+ } else {
+ printf("test_cap_bits miscompared [%d] top=%d - got=%d\n",
+ i, top, ans);
+ failed = -1;
+ }
+ }
+ }
+ return failed;
+}
+
+int main(int argc, char **argv) {
+ int result = 0;
+ result = test_cap_bits() | result;
+ if (result) {
+ printf("test FAILED\n");
+ exit(1);
+ }
+}
diff --git a/libcap/cap_text.c b/libcap/cap_text.c
index 42fb685..b0fad9d 100644
--- a/libcap/cap_text.c
+++ b/libcap/cap_text.c
@@ -1,22 +1,32 @@
/*
- * Copyright (c) 1997-8,2007-8 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2007-8,2019 Andrew G Morgan <morgan@kernel.org>
* Copyright (c) 1997 Andrew Main <zefram@dcs.warwick.ac.uk>
*
* This file deals with exchanging internal and textual
* representations of capability sets.
*/
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
+
#include <stdio.h>
#define LIBCAP_PLEASE_INCLUDE_ARRAY
#include "libcap.h"
+static char const *_cap_names[__CAP_BITS] = LIBCAP_CAP_NAMES;
+
#include <ctype.h>
#include <limits.h>
-/* Maximum output text length (16 per cap) */
-#define CAP_TEXT_SIZE (16*__CAP_MAXBITS)
+#ifdef INCLUDE_GPERF_OUTPUT
+/* we need to include it after #define _GNU_SOURCE is set */
+#include INCLUDE_GPERF_OUTPUT
+#endif
+
+/* Maximum output text length */
+#define CAP_TEXT_SIZE (__CAP_NAME_SIZE * __CAP_MAXBITS)
/*
* Parse a textual representation of capabilities, returning an internal
@@ -51,11 +61,25 @@ static char const *namcmp(char const *str, char const *nam)
return str;
}
+/*
+ * forceall forces all of the kernel named capabilities to be assigned
+ * the masked value, and zeroed otherwise. Note, if the kernel is ahead
+ * of libcap, the upper bits will be referred to by number.
+ */
static void forceall(__u32 *flat, __u32 value, unsigned blks)
{
unsigned n;
-
- for (n = blks; n--; flat[n] = value);
+ cap_value_t cmb = cap_max_bits();
+ for (n = blks; n--; ) {
+ unsigned base = 32*n;
+ __u32 mask = 0;
+ if (cmb >= base + 32) {
+ mask = ~0;
+ } else if (cmb > base) {
+ mask = (unsigned) ((1ULL << (cmb % 32)) - 1);
+ }
+ flat[n] = value & mask;
+ }
return;
}
@@ -94,13 +118,16 @@ static int lookupname(char const **strp)
}
#else /* ie., ndef GPERF_DOWNCASE */
char const *s;
- unsigned n;
-
- for (n = __CAP_BITS; n--; )
+ unsigned n = cap_max_bits();
+ if (n > __CAP_BITS) {
+ n = __CAP_BITS;
+ }
+ while (n--) {
if (_cap_names[n] && (s = namcmp(str.constp, _cap_names[n]))) {
*strp = s;
return n;
}
+ }
#endif /* def GPERF_DOWNCASE */
return -1; /* No definition available */
@@ -136,7 +163,7 @@ cap_t cap_from_text(const char *str)
errno = EINVAL;
return NULL;
}
-
+
_cap_debug("%s", str);
for (;;) {
@@ -144,7 +171,7 @@ cap_t cap_from_text(const char *str)
char op;
int flags = 0, listed=0;
- forceall(list, 0, __CAP_BLKS);
+ memset(list, 0, sizeof(__u32)*__CAP_BLKS);
/* skip leading spaces */
while (isspace((unsigned char)*str))
@@ -326,11 +353,10 @@ static int getstateflags(cap_t caps, int capno)
char *cap_to_text(cap_t caps, ssize_t *length_p)
{
char buf[CAP_TEXT_SIZE+CAP_TEXT_BUFFER_ZONE];
- char *p;
+ char *p, *base;
int histo[8];
int m, t;
unsigned n;
- unsigned cap_maxbits, cap_blks;
/* Check arguments */
if (!good_cap_t(caps)) {
@@ -338,31 +364,15 @@ char *cap_to_text(cap_t caps, ssize_t *length_p)
return NULL;
}
- switch (caps->head.version) {
- case _LINUX_CAPABILITY_VERSION_1:
- cap_blks = _LINUX_CAPABILITY_U32S_1;
- break;
- case _LINUX_CAPABILITY_VERSION_2:
- cap_blks = _LINUX_CAPABILITY_U32S_2;
- break;
- case _LINUX_CAPABILITY_VERSION_3:
- cap_blks = _LINUX_CAPABILITY_U32S_3;
- break;
- default:
- errno = EINVAL;
- return NULL;
- }
-
- cap_maxbits = 32 * cap_blks;
-
_cap_debugcap("e = ", *caps, CAP_EFFECTIVE);
_cap_debugcap("i = ", *caps, CAP_INHERITABLE);
_cap_debugcap("p = ", *caps, CAP_PERMITTED);
memset(histo, 0, sizeof(histo));
- /* default prevailing state to the upper - unnamed bits */
- for (n = cap_maxbits-1; n > __CAP_BITS; n--)
+ /* default prevailing state to the named bits */
+ cap_value_t cmb = cap_max_bits();
+ for (n = 0; n < cmb; n++)
histo[getstateflags(caps, n)]++;
/* find which combination of capability sets shares the most bits
@@ -373,57 +383,224 @@ char *cap_to_text(cap_t caps, ssize_t *length_p)
if (histo[t] >= histo[m])
m = t;
- /* capture remaining bits - selecting m from only the unnamed bits,
- we maximize the likelihood that we won't see numeric capability
- values in the text output. */
- while (n--)
- histo[getstateflags(caps, n)]++;
-
/* blank is not a valid capability set */
+ base = buf;
p = sprintf(buf, "=%s%s%s",
(m & LIBCAP_EFF) ? "e" : "",
(m & LIBCAP_INH) ? "i" : "",
(m & LIBCAP_PER) ? "p" : "" ) + buf;
- for (t = 8; t--; )
- if (t != m && histo[t]) {
- *p++ = ' ';
- for (n = 0; n < cap_maxbits; n++)
- if (getstateflags(caps, n) == t) {
- char *this_cap_name;
-
- this_cap_name = cap_to_name(n);
- if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
- cap_free(this_cap_name);
- errno = ERANGE;
- return NULL;
- }
- p += sprintf(p, "%s,", this_cap_name);
+ for (t = 8; t--; ) {
+ if (t == m || !histo[t]) {
+ continue;
+ }
+ *p++ = ' ';
+ for (n = 0; n < cmb; n++) {
+ if (getstateflags(caps, n) == t) {
+ char *this_cap_name = cap_to_name(n);
+ if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
cap_free(this_cap_name);
- }
- p--;
- n = t & ~m;
- if (n)
- p += sprintf(p, "+%s%s%s",
- (n & LIBCAP_EFF) ? "e" : "",
- (n & LIBCAP_INH) ? "i" : "",
- (n & LIBCAP_PER) ? "p" : "");
- n = ~t & m;
- if (n)
- p += sprintf(p, "-%s%s%s",
- (n & LIBCAP_EFF) ? "e" : "",
- (n & LIBCAP_INH) ? "i" : "",
- (n & LIBCAP_PER) ? "p" : "");
- if (p - buf > CAP_TEXT_SIZE) {
- errno = ERANGE;
- return NULL;
+ errno = ERANGE;
+ return NULL;
+ }
+ p += sprintf(p, "%s,", this_cap_name);
+ cap_free(this_cap_name);
+ }
+ }
+ p--;
+ n = t & ~m;
+ if (n) {
+ char op = '+';
+ if (base[0] == '=' && base[1] == ' ') {
+ /*
+ * Special case all lowered default "= foo,...+eip
+ * ..." as "foo,...=eip ...". (Equivalent but shorter.)
+ */
+ base += 2;
+ op = '=';
+ }
+ p += sprintf(p, "%c%s%s%s", op,
+ (n & LIBCAP_EFF) ? "e" : "",
+ (n & LIBCAP_INH) ? "i" : "",
+ (n & LIBCAP_PER) ? "p" : "");
+ }
+ n = ~t & m;
+ if (n) {
+ p += sprintf(p, "-%s%s%s",
+ (n & LIBCAP_EFF) ? "e" : "",
+ (n & LIBCAP_INH) ? "i" : "",
+ (n & LIBCAP_PER) ? "p" : "");
+ }
+ if (p - buf > CAP_TEXT_SIZE) {
+ errno = ERANGE;
+ return NULL;
+ }
+ }
+
+ /* capture remaining unnamed bits - which must all be +. */
+ memset(histo, 0, sizeof(histo));
+ for (n = cmb; n < __CAP_MAXBITS; n++)
+ histo[getstateflags(caps, n)]++;
+
+ for (t = 8; t-- > 1; ) {
+ if (!histo[t]) {
+ continue;
+ }
+ *p++ = ' ';
+ for (n = cmb; n < __CAP_MAXBITS; n++) {
+ if (getstateflags(caps, n) == t) {
+ char *this_cap_name = cap_to_name(n);
+ if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
+ cap_free(this_cap_name);
+ errno = ERANGE;
+ return NULL;
+ }
+ p += sprintf(p, "%s,", this_cap_name);
+ cap_free(this_cap_name);
}
}
+ p--;
+ p += sprintf(p, "+%s%s%s",
+ (t & LIBCAP_EFF) ? "e" : "",
+ (t & LIBCAP_INH) ? "i" : "",
+ (t & LIBCAP_PER) ? "p" : "");
+ if (p - buf > CAP_TEXT_SIZE) {
+ errno = ERANGE;
+ return NULL;
+ }
+ }
- _cap_debug("%s", buf);
+ _cap_debug("%s", base);
if (length_p) {
- *length_p = p - buf;
+ *length_p = p - base;
+ }
+
+ return (_libcap_strdup(base));
+}
+
+/*
+ * cap_mode_name returns a text token naming the specified mode.
+ */
+const char *cap_mode_name(cap_mode_t flavor) {
+ switch (flavor) {
+ case CAP_MODE_NOPRIV:
+ return "NOPRIV";
+ case CAP_MODE_PURE1E_INIT:
+ return "PURE1E_INIT";
+ case CAP_MODE_PURE1E:
+ return "PURE1E";
+ case CAP_MODE_UNCERTAIN:
+ return "UNCERTAIN";
+ default:
+ return "UNKNOWN";
}
+}
- return (_libcap_strdup(buf));
+/*
+ * cap_iab_to_text serializes an iab into a canonical text
+ * representation.
+ */
+char *cap_iab_to_text(cap_iab_t iab)
+{
+ char buf[CAP_TEXT_SIZE+CAP_TEXT_BUFFER_ZONE];
+ char *p = buf;
+ cap_value_t c, cmb = cap_max_bits();
+ int first = 1;
+
+ if (good_cap_iab_t(iab)) {
+ for (c = 0; c < cmb; c++) {
+ int keep = 0;
+ int o = c >> 5;
+ __u32 bit = 1U << (c & 31);
+ __u32 ib = iab->i[o] & bit;
+ __u32 ab = iab->a[o] & bit;
+ __u32 nbb = iab->nb[o] & bit;
+ if (!(nbb | ab | ib)) {
+ continue;
+ }
+ if (!first) {
+ *p++ = ',';
+ }
+ if (nbb) {
+ *p++ = '!';
+ keep = 1;
+ }
+ if (ab) {
+ *p++ = '^';
+ keep = 1;
+ } else if (nbb && ib) {
+ *p++ = '%';
+ }
+ if (keep || ib) {
+ if (c < __CAP_BITS) {
+ strcpy(p, _cap_names[c]);
+ } else {
+ sprintf(p, "%u", c);
+ }
+ p += strlen(p);
+ first = 0;
+ }
+ }
+ }
+ *p = '\0';
+ return _libcap_strdup(buf);
+}
+
+cap_iab_t cap_iab_from_text(const char *text)
+{
+ cap_iab_t iab = cap_iab_init();
+ if (text != NULL) {
+ unsigned flags;
+ for (flags = 0; *text; text++) {
+ /* consume prefixes */
+ switch (*text) {
+ case '!':
+ flags |= LIBCAP_IAB_NB_FLAG;
+ continue;
+ case '^':
+ flags |= LIBCAP_IAB_IA_FLAG;
+ continue;
+ case '%':
+ flags |= LIBCAP_IAB_I_FLAG;
+ continue;
+ default:
+ break;
+ }
+ if (!flags) {
+ flags = LIBCAP_IAB_I_FLAG;
+ }
+
+ /* consume cap name */
+ cap_value_t c = lookupname(&text);
+ if (c == -1) {
+ goto cleanup;
+ }
+ unsigned o = c >> 5;
+ __u32 mask = 1U << (c & 31);
+ if (flags & LIBCAP_IAB_I_FLAG) {
+ iab->i[o] |= mask;
+ }
+ if (flags & LIBCAP_IAB_A_FLAG) {
+ iab->a[o] |= mask;
+ }
+ if (flags & LIBCAP_IAB_NB_FLAG) {
+ iab->nb[o] |= mask;
+ }
+
+ /* rest should be end or comma */
+ if (*text == '\0') {
+ break;
+ }
+ if (*text != ',') {
+ goto cleanup;
+ }
+ flags = 0;
+ }
+ }
+ return iab;
+
+cleanup:
+ cap_free(iab);
+ errno = EINVAL;
+ return NULL;
}
diff --git a/libcap/include/sys/.gitignore b/libcap/include/sys/.gitignore
new file mode 100644
index 0000000..595fc39
--- /dev/null
+++ b/libcap/include/sys/.gitignore
@@ -0,0 +1 @@
+psx_syscall.h
diff --git a/libcap/include/sys/capability.h b/libcap/include/sys/capability.h
index dddc75b..ac13c12 100644
--- a/libcap/include/sys/capability.h
+++ b/libcap/include/sys/capability.h
@@ -2,7 +2,7 @@
* <sys/capability.h>
*
* Copyright (C) 1997 Aleph One
- * Copyright (C) 1997-8,2008 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (C) 1997,8, 2008,19,20 Andrew G. Morgan <morgan@kernel.org>
*
* defunct POSIX.1e Standard: 25.2 Capabilities <sys/capability.h>
*/
@@ -47,14 +47,45 @@ typedef struct _cap_struct *cap_t;
typedef int cap_value_t;
/*
+ * libcap initialized first unnamed capability of the running kernel.
+ * capsh includes a runtime test to flag when this is larger than
+ * what is known to libcap... Time for a new libcap release!
+ */
+extern cap_value_t cap_max_bits(void);
+
+/*
* Set identifiers
*/
typedef enum {
- CAP_EFFECTIVE=0, /* Specifies the effective flag */
- CAP_PERMITTED=1, /* Specifies the permitted flag */
- CAP_INHERITABLE=2 /* Specifies the inheritable flag */
+ CAP_EFFECTIVE = 0, /* Specifies the effective flag */
+ CAP_PERMITTED = 1, /* Specifies the permitted flag */
+ CAP_INHERITABLE = 2 /* Specifies the inheritable flag */
} cap_flag_t;
+typedef enum {
+ CAP_IAB_INH = 2,
+ CAP_IAB_AMB = 3,
+ CAP_IAB_BOUND = 4
+} cap_iab_vector_t;
+
+/*
+ * An opaque generalization of the inheritable bits that includes both
+ * what ambient bits to raise and what bounding bits to *lower* (aka
+ * drop). None of these bits once set, using cap_iab_set(), affect
+ * the running process but are consulted, through the execve() system
+ * call, by the kernel. Note, the ambient bits ('A') of the running
+ * process are fragile with respect to other aspects of the "posix"
+ * (cap_t) operations: most importantly, 'A' cannot ever hold bits not
+ * present in the intersection of 'pI' and 'pP'. The kernel
+ * immediately drops all ambient caps whenever such a situation
+ * arises. Typically, the ambient bits are used to support a naive
+ * capability inheritance model - at odds with the POSIX (sic) model
+ * of inheritance where inherited (pI) capabilities need to also be
+ * wanted by the executed binary (fI) in order to become raised
+ * through exec.
+ */
+typedef struct cap_iab_s *cap_iab_t;
+
/*
* These are the states available to each capability
*/
@@ -66,11 +97,17 @@ typedef enum {
/*
* User-space capability manipulation routines
*/
+typedef unsigned cap_mode_t;
+#define CAP_MODE_UNCERTAIN ((cap_mode_t) 0)
+#define CAP_MODE_NOPRIV ((cap_mode_t) 1)
+#define CAP_MODE_PURE1E_INIT ((cap_mode_t) 2)
+#define CAP_MODE_PURE1E ((cap_mode_t) 3)
/* libcap/cap_alloc.c */
-extern cap_t cap_dup(cap_t);
-extern int cap_free(void *);
-extern cap_t cap_init(void);
+extern cap_t cap_dup(cap_t);
+extern int cap_free(void *);
+extern cap_t cap_init(void);
+extern cap_iab_t cap_iab_init(void);
/* libcap/cap_flag.c */
extern int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
@@ -79,11 +116,19 @@ extern int cap_set_flag(cap_t, cap_flag_t, int, const cap_value_t *,
extern int cap_clear(cap_t);
extern int cap_clear_flag(cap_t, cap_flag_t);
+extern cap_flag_value_t cap_iab_get_vector(cap_iab_t, cap_iab_vector_t,
+ cap_value_t);
+extern int cap_iab_set_vector(cap_iab_t, cap_iab_vector_t, cap_value_t,
+ cap_flag_value_t);
+extern int cap_iab_fill(cap_iab_t, cap_iab_vector_t, cap_t, cap_flag_t);
+
/* libcap/cap_file.c */
extern cap_t cap_get_fd(int);
extern cap_t cap_get_file(const char *);
+extern uid_t cap_get_nsowner(cap_t);
extern int cap_set_fd(int, cap_t);
extern int cap_set_file(const char *, cap_t);
+extern int cap_set_nsowner(cap_t, uid_t);
/* libcap/cap_proc.c */
extern cap_t cap_get_proc(void);
@@ -92,9 +137,13 @@ extern int cap_set_proc(cap_t);
extern int cap_get_bound(cap_value_t);
extern int cap_drop_bound(cap_value_t);
-
#define CAP_IS_SUPPORTED(cap) (cap_get_bound(cap) >= 0)
+extern int cap_get_ambient(cap_value_t);
+extern int cap_set_ambient(cap_value_t, cap_flag_value_t);
+extern int cap_reset_ambient(void);
+#define CAP_AMBIENT_SUPPORTED() (cap_get_ambient(CAP_CHOWN) >= 0)
+
/* libcap/cap_extint.c */
extern ssize_t cap_size(cap_t);
extern ssize_t cap_copy_ext(void *, cap_t, ssize_t);
@@ -106,12 +155,53 @@ extern char * cap_to_text(cap_t, ssize_t *);
extern int cap_from_name(const char *, cap_value_t *);
extern char * cap_to_name(cap_value_t);
+extern char * cap_iab_to_text(cap_iab_t iab);
+extern cap_iab_t cap_iab_from_text(const char *text);
+
#define CAP_DIFFERS(result, flag) (((result) & (1 << (flag))) != 0)
extern int cap_compare(cap_t, cap_t);
-/* system calls - look to libc for function to system call mapping */
-extern int capset(cap_user_header_t header, cap_user_data_t data);
-extern int capget(cap_user_header_t header, const cap_user_data_t data);
+/* libcap/cap_proc.c */
+extern void cap_set_syscall(long int (*new_syscall)(long int,
+ long int, long int, long int),
+ long int (*new_syscall6)(long int,
+ long int, long int, long int,
+ long int, long int, long int));
+
+extern int cap_set_mode(cap_mode_t flavor);
+extern cap_mode_t cap_get_mode(void);
+extern const char *cap_mode_name(cap_mode_t flavor);
+
+extern unsigned cap_get_secbits(void);
+extern int cap_set_secbits(unsigned bits);
+
+extern int cap_setuid(uid_t uid);
+extern int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[]);
+
+extern cap_iab_t cap_iab_get_proc(void);
+extern int cap_iab_set_proc(cap_iab_t iab);
+
+typedef struct cap_launch_s *cap_launch_t;
+
+extern cap_launch_t cap_new_launcher(const char *arg0, const char * const *argv,
+ const char * const *envp);
+extern void cap_launcher_callback(cap_launch_t attr,
+ int (callback_fn)(void *detail));
+extern void cap_launcher_setuid(cap_launch_t attr, uid_t uid);
+extern void cap_launcher_setgroups(cap_launch_t attr, gid_t gid,
+ int ngroups, const gid_t *groups);
+extern void cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor);
+extern cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab);
+extern void cap_launcher_set_chroot(cap_launch_t attr, const char *chroot);
+extern pid_t cap_launch(cap_launch_t attr, void *data);
+
+/*
+ * system calls - look to libc for function to system call
+ * mapping. Note, libcap does not use capset directly, but permits the
+ * cap_set_syscall() to redirect the system call function.
+ */
+extern int capget(cap_user_header_t header, cap_user_data_t data);
+extern int capset(cap_user_header_t header, const cap_user_data_t data);
/* deprecated - use cap_get_pid() */
extern int capgetp(pid_t pid, cap_t cap_d);
diff --git a/libcap/include/uapi/linux/capability.h b/libcap/include/uapi/linux/capability.h
index 432e023..09b5563 100644
--- a/libcap/include/uapi/linux/capability.h
+++ b/libcap/include/uapi/linux/capability.h
@@ -7,6 +7,7 @@
*
* See here for the libcap library ("POSIX draft" compliance):
*
+ * https://git.kernel.org/pub/scm/libs/libcap/libcap.git/refs/
* http://www.kernel.org/pub/linux/libs/security/linux-privs/
*/
@@ -15,8 +16,6 @@
#include <linux/types.h>
-struct task_struct;
-
/* User-level do most of the mapping between kernel and user
capabilities based on the version tag given by the kernel. The
kernel might be somewhat backwards compatible, but don't bet on
@@ -40,13 +39,13 @@ struct task_struct;
typedef struct __user_cap_header_struct {
__u32 version;
int pid;
-} __user *cap_user_header_t;
+} *cap_user_header_t;
typedef struct __user_cap_data_struct {
__u32 effective;
__u32 permitted;
__u32 inheritable;
-} __user *cap_user_data_t;
+} *cap_user_data_t;
#define VFS_CAP_REVISION_MASK 0xFF000000
@@ -62,16 +61,32 @@ typedef struct __user_cap_data_struct {
#define VFS_CAP_U32_2 2
#define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
+#define VFS_CAP_REVISION_3 0x03000000
+#define VFS_CAP_U32_3 VFS_CAP_U32_2
+#define XATTR_CAPS_SZ_3 (sizeof(__le32)+XATTR_CAPS_SZ_2)
+
+/*
+ * Kernel capabilities default to v2. The v3 VFS caps are only used,
+ * at present, for namespace specific filesystem capabilities.
+ */
#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2
#define VFS_CAP_U32 VFS_CAP_U32_2
#define VFS_CAP_REVISION VFS_CAP_REVISION_2
+#define _VFS_CAP_DATA_HEAD \
+ __le32 magic_etc; /* Little endian */ \
+ struct { \
+ __le32 permitted; /* Little endian */ \
+ __le32 inheritable; /* Little endian */ \
+ } data[VFS_CAP_U32]
+
struct vfs_cap_data {
- __le32 magic_etc; /* Little endian */
- struct {
- __le32 permitted; /* Little endian */
- __le32 inheritable; /* Little endian */
- } data[VFS_CAP_U32];
+ _VFS_CAP_DATA_HEAD;
+};
+
+struct vfs_ns_cap_data {
+ _VFS_CAP_DATA_HEAD;
+ __le32 rootid;
};
#ifndef __KERNEL__
@@ -207,7 +222,7 @@ struct vfs_cap_data {
#define CAP_SYS_MODULE 16
/* Allow ioperm/iopl access */
-/* Allow sending USB messages to any device via /proc/bus/usb */
+/* Allow sending USB messages to any device via /dev/bus/usb */
#define CAP_SYS_RAWIO 17
@@ -316,6 +331,8 @@ struct vfs_cap_data {
#define CAP_AUDIT_CONTROL 30
+/* Set capabilities on files. */
+
#define CAP_SETFCAP 31
/* Override MAC access.
@@ -351,8 +368,50 @@ struct vfs_cap_data {
#define CAP_AUDIT_READ 37
+/* Allow system performance and observability privileged operations using
+ * perf_events, i915_perf and other kernel subsystems. */
+
+#define CAP_PERFMON 38
+
+/*
+ * CAP_BPF allows the following BPF operations:
+ * - Creating all types of BPF maps
+ * - Advanced verifier features
+ * - Indirect variable access
+ * - Bounded loops
+ * - BPF to BPF function calls
+ * - Scalar precision tracking
+ * - Larger complexity limits
+ * - Dead code elimination
+ * - And potentially other features
+ * - Loading BPF Type Format (BTF) data
+ * - Retrieve xlated and JITed code of BPF programs
+ * - Use bpf_spin_lock() helper
+ *
+ * CAP_PERFMON relaxes the verifier checks further:
+ * - BPF progs can use of pointer-to-integer conversions
+ * - speculation attack hardening measures are bypassed
+ * - bpf_probe_read to read arbitrary kernel memory is allowed
+ * - bpf_trace_printk to print kernel memory is allowed
+ *
+ * CAP_SYS_ADMIN is required to use bpf_probe_write_user.
+ *
+ * CAP_SYS_ADMIN is required to iterate system wide loaded
+ * programs, maps, links, BTFs and convert their IDs to file descriptors.
+ *
+ * CAP_PERFMON and CAP_BPF are required to load tracing programs.
+ * CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
+ */
+
+#define CAP_BPF 39
+
+/* Allow checkpoint/restore related operations */
+/* Allow PID selection during clone3() */
+/* Allow writing to ns_last_pid */
+
+#define CAP_CHECKPOINT_RESTORE 40
-#define CAP_LAST_CAP CAP_AUDIT_READ
+#define CAP_LAST_CAP CAP_CHECKPOINT_RESTORE
#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
@@ -361,7 +420,7 @@ struct vfs_cap_data {
*/
#define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */
-#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */
+#define CAP_TO_MASK(x) (1u << ((x) & 31)) /* mask for indexed __u32 */
#endif /* _UAPI_LINUX_CAPABILITY_H */
diff --git a/libcap/include/uapi/linux/prctl.h b/libcap/include/uapi/linux/prctl.h
index 289760f..1b6a009 100644
--- a/libcap/include/uapi/linux/prctl.h
+++ b/libcap/include/uapi/linux/prctl.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_PRCTL_H
#define _LINUX_PRCTL_H
+#include <linux/types.h>
+
/* Values to pass as first argument to prctl() */
#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */
@@ -119,6 +121,31 @@
# define PR_SET_MM_ENV_END 11
# define PR_SET_MM_AUXV 12
# define PR_SET_MM_EXE_FILE 13
+# define PR_SET_MM_MAP 14
+# define PR_SET_MM_MAP_SIZE 15
+
+/*
+ * This structure provides new memory descriptor
+ * map which mostly modifies /proc/pid/stat[m]
+ * output for a task. This mostly done in a
+ * sake of checkpoint/restore functionality.
+ */
+struct prctl_mm_map {
+ __u64 start_code; /* code section bounds */
+ __u64 end_code;
+ __u64 start_data; /* data section bounds */
+ __u64 end_data;
+ __u64 start_brk; /* heap for brk() syscall */
+ __u64 brk;
+ __u64 start_stack; /* stack starts at */
+ __u64 arg_start; /* command line arguments bounds */
+ __u64 arg_end;
+ __u64 env_start; /* environment variables bounds */
+ __u64 env_end;
+ __u64 *auxv; /* auxiliary vector */
+ __u32 auxv_size; /* vector size */
+ __u32 exe_fd; /* /proc/$pid/exe link file */
+};
/*
* Set specific pid that is allowed to ptrace the current task.
@@ -149,4 +176,25 @@
#define PR_GET_TID_ADDRESS 40
+#define PR_SET_THP_DISABLE 41
+#define PR_GET_THP_DISABLE 42
+
+/*
+ * Tell the kernel to start/stop helping userspace manage bounds tables.
+ */
+#define PR_MPX_ENABLE_MANAGEMENT 43
+#define PR_MPX_DISABLE_MANAGEMENT 44
+
+#define PR_SET_FP_MODE 45
+#define PR_GET_FP_MODE 46
+# define PR_FP_MODE_FR (1u << 0) /* 64b FP registers */
+# define PR_FP_MODE_FRE (1u << 1) /* 32b compatibility */
+
+/* Control the ambient capability set */
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+
#endif /* _LINUX_PRCTL_H */
diff --git a/libcap/include/uapi/linux/securebits.h b/libcap/include/uapi/linux/securebits.h
index 35ac35c..e9b1309 100644
--- a/libcap/include/uapi/linux/securebits.h
+++ b/libcap/include/uapi/linux/securebits.h
@@ -5,7 +5,7 @@
whether the setting is on or off. The other bit specify whether the
setting is locked or not. A setting which is locked cannot be
changed from user-level. */
-#define issecure_mask(X) (1 << (X))
+#define issecure_mask(X) (1u << (X))
#define SECUREBITS_DEFAULT 0x00000000
@@ -22,7 +22,7 @@
#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED))
/* When set, setuid to/from uid 0 does not trigger capability-"fixup".
- When unset, to provide compatiblility with old programs relying on
+ When unset, to provide compatibility with old programs relying on
set*uid to gain/lose privilege, transitions to/from uid 0 cause
capabilities to be gained/lost. */
#define SECURE_NO_SETUID_FIXUP 2
diff --git a/libcap/libcap.h b/libcap/libcap.h
index 2596c11..67fa0d0 100644
--- a/libcap/libcap.h
+++ b/libcap/libcap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997,2020 Andrew G Morgan <morgan@kernel.org>
*
* This file contains internal definitions for the various functions in
* this small capability library.
@@ -28,7 +28,7 @@
#ifndef _LINUX_CAPABILITY_U32S_1
# define _LINUX_CAPABILITY_U32S_1 1
-#endif /* ndef _LINUX_CAPABILITY_U32S */
+#endif /* ndef _LINUX_CAPABILITY_U32S_1 */
/*
* Do we match the local kernel?
@@ -118,6 +118,7 @@ struct _cap_struct {
struct __user_cap_data_struct set;
__u32 flat[NUMBER_OF_CAP_SETS];
} u[_LIBCAP_CAPABILITY_U32S];
+ uid_t rootid;
};
/* the maximum bits supportable */
@@ -126,13 +127,19 @@ struct _cap_struct {
/* string magic for cap_free */
#define CAP_S_MAGIC 0xCA95D0
+/* iab set magic for cap_free */
+#define CAP_IAB_MAGIC 0xCA9AB
+
+/* launcher magic for cap_free */
+#define CAP_LAUNCH_MAGIC 0xCA91A
+
/*
* kernel API cap set abstraction
*/
-#define raise_cap(x,set) u[(x)>>5].flat[set] |= (1<<((x)&31))
-#define lower_cap(x,set) u[(x)>>5].flat[set] &= ~(1<<((x)&31))
-#define isset_cap(y,x,set) ((y)->u[(x)>>5].flat[set] & (1<<((x)&31)))
+#define raise_cap(x, set) u[(x) >> 5].flat[set] |= (1u << ((x)&31))
+#define lower_cap(x, set) u[(x) >> 5].flat[set] &= ~(1u << ((x)&31))
+#define isset_cap(y, x, set) ((y)->u[(x) >> 5].flat[set] & (1u << ((x)&31)))
/*
* Private definitions for internal use by the library.
@@ -141,6 +148,8 @@ struct _cap_struct {
#define __libcap_check_magic(c,magic) ((c) && *(-1+(__u32 *)(c)) == (magic))
#define good_cap_t(c) __libcap_check_magic(c, CAP_T_MAGIC)
#define good_cap_string(c) __libcap_check_magic(c, CAP_S_MAGIC)
+#define good_cap_iab_t(c) __libcap_check_magic(c, CAP_IAB_MAGIC)
+#define good_cap_launch_t(c) __libcap_check_magic(c, CAP_LAUNCH_MAGIC)
/*
* These match CAP_DIFFERS() expectations
@@ -185,8 +194,7 @@ extern char *_libcap_strdup(const char *text);
* place them here too.
*/
-extern int capset(cap_user_header_t header, cap_user_data_t data);
-extern int capget(cap_user_header_t header, const cap_user_data_t data);
+extern int capget(cap_user_header_t header, cap_user_data_t data);
extern int capgetp(pid_t pid, cap_t cap_d);
extern int capsetp(pid_t pid, cap_t cap_d);
@@ -206,4 +214,86 @@ extern int capsetp(pid_t pid, cap_t cap_d);
*/
#define ssizeof(x) ((ssize_t) sizeof(x))
+/*
+ * Put this here as a macro so we can unit test it.
+ */
+#define _binary_search(val, fn, low, high, fallback) do { \
+ cap_value_t min = low, max = high; \
+ while (min <= max) { \
+ cap_value_t mid = (min+max) / 2; \
+ if (fn(mid) < 0) { \
+ max = mid - 1; \
+ } else { \
+ min = mid + 1; \
+ } \
+ } \
+ val = min ? min : fallback; \
+ } while(0)
+
+/*
+ * cap_iab_s holds a collection of inheritable capability bits. The i
+ * bits are inheritable (these are the same as those in cap_t), the a
+ * bits are ambient bits (which cannot be a superset of i&p), and nb
+ * are the bits that will be dropped from the bounding set when
+ * applied.
+ */
+struct cap_iab_s {
+ __u32 i[_LIBCAP_CAPABILITY_U32S];
+ __u32 a[_LIBCAP_CAPABILITY_U32S];
+ __u32 nb[_LIBCAP_CAPABILITY_U32S];
+};
+
+#define LIBCAP_IAB_I_FLAG (1U << CAP_IAB_INH)
+#define LIBCAP_IAB_A_FLAG (1U << CAP_IAB_AMB)
+#define LIBCAP_IAB_IA_FLAG (LIBCAP_IAB_I_FLAG | LIBCAP_IAB_A_FLAG)
+#define LIBCAP_IAB_NB_FLAG (1U << CAP_IAB_BOUND)
+
+/*
+ * The following support launching another process without destroying
+ * the state of the current process. This is especially useful for
+ * multithreaded applications.
+ */
+struct cap_launch_s {
+ /*
+ * Once forked but before active privilege is changed, this
+ * function (if non-NULL) is called.
+ */
+ int (*custom_setup_fn)(void *detail);
+
+ /*
+ * user and groups to be used by the forked child.
+ */
+ int change_uids;
+ uid_t uid;
+
+ int change_gids;
+ gid_t gid;
+ int ngroups;
+ const gid_t *groups;
+
+ /*
+ * mode holds the preferred capability mode. Any non-uncertain
+ * setting here will require an empty ambient set.
+ */
+ int change_mode;
+ cap_mode_t mode;
+
+ /*
+ * i,a,[n]b caps. These bitmaps hold all of the capability sets that
+ * cap_launch will affect. nb holds values to be lowered in the bounding
+ * set.
+ */
+ struct cap_iab_s *iab;
+
+ /* chroot holds a preferred chroot for the launched child. */
+ char *chroot;
+
+ /*
+ * execve style arguments
+ */
+ const char *arg0;
+ const char *const *argv;
+ const char *const *envp;
+};
+
#endif /* LIBCAP_H */
diff --git a/libcap/libcap.pc.in b/libcap/libcap.pc.in
index a28e3e4..69cd231 100644
--- a/libcap/libcap.pc.in
+++ b/libcap/libcap.pc.in
@@ -4,7 +4,7 @@ libdir=@libdir@
includedir=@includedir@
Name: libcap
-Description: libcap
+Description: libcap - linux capabilities library
Version: @VERSION@
Libs: -L${libdir} -lcap
Libs.private: @deps@
diff --git a/libcap/libpsx.pc.in b/libcap/libpsx.pc.in
new file mode 100644
index 0000000..d032b9f
--- /dev/null
+++ b/libcap/libpsx.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libpsx
+Description: libpsx - linux posix syscall API for pthreads
+Version: @VERSION@
+Libs: -L${libdir} -lpsx -lpthread -Wl,-wrap,pthread_create
+Libs.private: @deps@
+Cflags: -I${includedir}
diff --git a/pam_cap/.gitignore b/pam_cap/.gitignore
index 11806f5..05e9bbf 100644
--- a/pam_cap/.gitignore
+++ b/pam_cap/.gitignore
@@ -1,2 +1,3 @@
pam_cap.so
-testcompile
+testlink
+test_pam_cap
diff --git a/pam_cap/Makefile b/pam_cap/Makefile
index cc32fb6..56604fd 100644
--- a/pam_cap/Makefile
+++ b/pam_cap/Makefile
@@ -3,27 +3,41 @@
topdir=$(shell pwd)/..
include ../Make.Rules
-# Note (as the author of much of the Linux-PAM library, I am confident
-# that this next line does *not* require -lpam on it.) If you think it
-# does, *verify that it does*, and if you observe that it fails as
-# written (and you know why it fails), email me and explain why. Thanks!
-LDLIBS += -L../libcap -lcap
-
all: pam_cap.so
- $(MAKE) testcompile
+ $(MAKE) testlink
install: all
mkdir -p -m 0755 $(FAKEROOT)$(LIBDIR)/security
install -m 0755 pam_cap.so $(FAKEROOT)$(LIBDIR)/security
+# Note (as the author of much of the Linux-PAM library, I am confident
+# that this next line does *not* require -lpam on it.) If you think it
+# does, *verify that it does*, and if you observe that it fails as
+# written (and you know why it fails), email me and explain why. Thanks!
+
pam_cap.so: pam_cap.o
- $(LD) $(LDFLAGS) -o pam_cap.so $< $(LDLIBS)
+ $(LD) -o pam_cap.so $< $(LIBCAPLIB) $(LDFLAGS)
pam_cap.o: pam_cap.c
$(CC) $(CFLAGS) $(IPATH) -c $< -o $@
-testcompile: test.c pam_cap.o
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $+ -lpam -ldl $(LDLIBS)
+test_pam_cap: test_pam_cap.c pam_cap.c
+ $(CC) $(CFLAGS) $(IPATH) -o $@ test_pam_cap.c $(LIBCAPLIB) $(LDFLAGS) --static
+
+testlink: test.c pam_cap.o
+ $(CC) $(CFLAGS) -o $@ $+ -lpam -ldl $(LIBCAPLIB) $(LDFLAGS)
+
+test: pam_cap.so
+ make testlink
+
+sudotest: test test_pam_cap
+ sudo ./test_pam_cap root 0x0 0x0 0x0 config=./capability.conf
+ sudo ./test_pam_cap root 0x0 0x0 0x0 config=./sudotest.conf
+ sudo ./test_pam_cap alpha 0x0 0x0 0x0 config=./capability.conf
+ sudo ./test_pam_cap alpha 0x0 0x1 0x80 config=./sudotest.conf
+ sudo ./test_pam_cap beta 0x0 0x1 0x0 config=./sudotest.conf
+ sudo ./test_pam_cap gamma 0x0 0x0 0x81 config=./sudotest.conf
+ sudo ./test_pam_cap delta 0x41 0x80 0x41 config=./sudotest.conf
clean:
- rm -f *.o *.so testcompile *~
+ rm -f *.o *.so testlink test_pam_cap *~
diff --git a/pam_cap/pam_cap.c b/pam_cap/pam_cap.c
index e6ebbe9..6927f7b 100644
--- a/pam_cap/pam_cap.c
+++ b/pam_cap/pam_cap.c
@@ -1,20 +1,28 @@
/*
- * Copyright (c) 1999,2007 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1999,2007,19,20 Andrew G. Morgan <morgan@kernel.org>
*
- * The purpose of this module is to enforce inheritable capability sets
- * for a specified user.
+ * The purpose of this module is to enforce inheritable, bounding and
+ * ambient capability sets for a specified user.
*/
/* #define DEBUG */
-#include <stdio.h>
-#include <string.h>
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
#include <errno.h>
+#include <grp.h>
+#include <limits.h>
+#include <pwd.h>
#include <stdarg.h>
#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
#include <syslog.h>
-
#include <sys/capability.h>
+#include <sys/types.h>
+#include <linux/limits.h>
#include <security/pam_modules.h>
#include <security/_pam_macros.h>
@@ -22,8 +30,6 @@
#define USER_CAP_FILE "/etc/security/capability.conf"
#define CAP_FILE_BUFFER_SIZE 4096
#define CAP_FILE_DELIMITERS " \t\n"
-#define CAP_COMBINED_FORMAT "%s all-i %s+i"
-#define CAP_DROP_ALL "%s all-i"
struct pam_cap_s {
int debug;
@@ -31,25 +37,71 @@ struct pam_cap_s {
const char *conf_filename;
};
+/*
+ * load_groups obtains the list all of the groups associated with the
+ * requested user: gid & supplemental groups.
+ */
+static int load_groups(const char *user, char ***groups, int *groups_n) {
+ struct passwd *pwd;
+ gid_t grps[NGROUPS_MAX];
+ int ngrps = NGROUPS_MAX;
+
+ *groups = NULL;
+ *groups_n = 0;
+
+ pwd = getpwnam(user);
+ if (pwd == NULL) {
+ return -1;
+ }
+
+ /* must include at least pwd->pw_gid, hence < 1 test. */
+ if (getgrouplist(user, pwd->pw_gid, grps, &ngrps) < 1) {
+ return -1;
+ }
+
+ *groups = calloc(ngrps, sizeof(char *));
+ int g_n = 0, i;
+ for (i = 0; i < ngrps; i++) {
+ const struct group *g = getgrgid(grps[i]);
+ if (g == NULL) {
+ continue;
+ }
+ D(("noting [%s] is a member of [%s]", user, g->gr_name));
+ (*groups)[g_n++] = strdup(g->gr_name);
+ }
+
+ *groups_n = g_n;
+ return 0;
+}
+
/* obtain the inheritable capabilities for the current user */
static char *read_capabilities_for_user(const char *user, const char *source)
{
char *cap_string = NULL;
char buffer[CAP_FILE_BUFFER_SIZE], *line;
+ char **groups;
+ int groups_n;
FILE *cap_file;
+ if (load_groups(user, &groups, &groups_n)) {
+ D(("unknown user [%s]", user));
+ return NULL;
+ }
+
cap_file = fopen(source, "r");
if (cap_file == NULL) {
D(("failed to open capability file"));
- return NULL;
+ goto defer;
}
- while ((line = fgets(buffer, CAP_FILE_BUFFER_SIZE, cap_file))) {
- int found_one = 0;
+ int found_one = 0;
+ while (!found_one &&
+ (line = fgets(buffer, CAP_FILE_BUFFER_SIZE, cap_file))) {
const char *cap_text;
- cap_text = strtok(line, CAP_FILE_DELIMITERS);
+ char *next = NULL;
+ cap_text = strtok_r(line, CAP_FILE_DELIMITERS, &next);
if (cap_text == NULL) {
D(("empty line"));
@@ -60,38 +112,65 @@ static char *read_capabilities_for_user(const char *user, const char *source)
continue;
}
- while ((line = strtok(NULL, CAP_FILE_DELIMITERS))) {
-
+ /*
+ * Explore whether any of the ids are a match for the current
+ * user.
+ */
+ while ((line = strtok_r(next, CAP_FILE_DELIMITERS, &next))) {
if (strcmp("*", line) == 0) {
D(("wildcard matched"));
found_one = 1;
- cap_string = strdup(cap_text);
break;
}
if (strcmp(user, line) == 0) {
D(("exact match for user"));
found_one = 1;
- cap_string = strdup(cap_text);
break;
}
- D(("user is not [%s] - skipping", line));
- }
+ if (line[0] != '@') {
+ D(("user [%s] is not [%s] - skipping", user, line));
+ }
- cap_text = NULL;
- line = NULL;
+ int i;
+ for (i=0; i < groups_n; i++) {
+ if (!strcmp(groups[i], line+1)) {
+ D(("user group matched [%s]", line));
+ found_one = 1;
+ break;
+ }
+ }
+ if (found_one) {
+ break;
+ }
+ }
if (found_one) {
+ cap_string = strdup(cap_text);
D(("user [%s] matched - caps are [%s]", user, cap_string));
- break;
}
+
+ cap_text = NULL;
+ line = NULL;
}
fclose(cap_file);
+defer:
memset(buffer, 0, CAP_FILE_BUFFER_SIZE);
+ int i;
+ for (i = 0; i < groups_n; i++) {
+ char *g = groups[i];
+ _pam_overwrite(g);
+ _pam_drop(g);
+ }
+ if (groups != NULL) {
+ memset(groups, 0, groups_n * sizeof(char *));
+ _pam_drop(groups);
+ }
+
return cap_string;
}
@@ -100,15 +179,12 @@ static char *read_capabilities_for_user(const char *user, const char *source)
* permitted+executable sets combined with the configured inheritable
* set.
*/
-
static int set_capabilities(struct pam_cap_s *cs)
{
cap_t cap_s;
- ssize_t length = 0;
- char *conf_icaps;
- char *proc_epcaps;
- char *combined_caps;
+ char *conf_caps;
int ok = 0;
+ cap_iab_t iab;
cap_s = cap_get_proc();
if (cap_s == NULL) {
@@ -117,81 +193,55 @@ static int set_capabilities(struct pam_cap_s *cs)
return 0;
}
- conf_icaps =
- read_capabilities_for_user(cs->user,
- cs->conf_filename
- ? cs->conf_filename:USER_CAP_FILE );
- if (conf_icaps == NULL) {
+ conf_caps = read_capabilities_for_user(cs->user,
+ cs->conf_filename
+ ? cs->conf_filename:USER_CAP_FILE );
+ if (conf_caps == NULL) {
D(("no capabilities found for user [%s]", cs->user));
goto cleanup_cap_s;
}
- proc_epcaps = cap_to_text(cap_s, &length);
- if (proc_epcaps == NULL) {
- D(("unable to convert process capabilities to text"));
- goto cleanup_icaps;
+ ssize_t conf_caps_length = strlen(conf_caps);
+ if (!strcmp(conf_caps, "all")) {
+ /*
+ * all here is interpreted as no change/pass through, which is
+ * likely to be the same as none for sensible system defaults.
+ */
+ ok = 1;
+ goto cleanup_conf;
}
- /*
- * This is a pretty inefficient way to combine
- * capabilities. However, it seems to be the most straightforward
- * one, given the limitations of the POSIX.1e draft spec. The spec
- * is optimized for applications that know the capabilities they
- * want to manipulate at compile time.
- */
-
- combined_caps = malloc(1+strlen(CAP_COMBINED_FORMAT)
- +strlen(proc_epcaps)+strlen(conf_icaps));
- if (combined_caps == NULL) {
- D(("unable to combine capabilities into one string - no memory"));
- goto cleanup_epcaps;
+ if (!strcmp(conf_caps, "none")) {
+ /* clearing CAP_INHERITABLE will also clear the ambient caps,
+ * but for legacy reasons we do not alter the bounding set. */
+ cap_clear_flag(cap_s, CAP_INHERITABLE);
+ if (!cap_set_proc(cap_s)) {
+ ok = 1;
+ }
+ goto cleanup_cap_s;
}
- if (!strcmp(conf_icaps, "none")) {
- sprintf(combined_caps, CAP_DROP_ALL, proc_epcaps);
- } else if (!strcmp(conf_icaps, "all")) {
- /* no change */
- sprintf(combined_caps, "%s", proc_epcaps);
- } else {
- sprintf(combined_caps, CAP_COMBINED_FORMAT, proc_epcaps, conf_icaps);
- }
- D(("combined_caps=[%s]", combined_caps));
-
- cap_free(cap_s);
- cap_s = cap_from_text(combined_caps);
- _pam_overwrite(combined_caps);
- _pam_drop(combined_caps);
-
-#ifdef DEBUG
- {
- char *temp = cap_to_text(cap_s, NULL);
- D(("abbreviated caps for process will be [%s]", temp));
- cap_free(temp);
+ iab = cap_iab_from_text(conf_caps);
+ if (iab == NULL) {
+ D(("unable to parse the IAB [%s] value", conf_caps));
+ goto cleanup_conf;
}
-#endif /* DEBUG */
- if (cap_s == NULL) {
- D(("no capabilies to set"));
- } else if (cap_set_proc(cap_s) == 0) {
- D(("capabilities were set correctly"));
+ if (!cap_iab_set_proc(iab)) {
+ D(("able to set the IAB [%s] value", conf_caps));
ok = 1;
- } else {
- D(("failed to set specified capabilities: %s", strerror(errno)));
}
+ cap_free(iab);
-cleanup_epcaps:
- cap_free(proc_epcaps);
-
-cleanup_icaps:
- _pam_overwrite(conf_icaps);
- _pam_drop(conf_icaps);
+cleanup_conf:
+ memset(conf_caps, 0, conf_caps_length);
+ _pam_drop(conf_caps);
cleanup_cap_s:
if (cap_s) {
cap_free(cap_s);
cap_s = NULL;
}
-
return ok;
}
@@ -210,35 +260,34 @@ static void _pam_log(int err, const char *format, ...)
static void parse_args(int argc, const char **argv, struct pam_cap_s *pcs)
{
- int ctrl=0;
-
/* step through arguments */
- for (ctrl=0; argc-- > 0; ++argv) {
-
+ for (; argc-- > 0; ++argv) {
if (!strcmp(*argv, "debug")) {
pcs->debug = 1;
- } else if (!memcmp(*argv, "config=", 7)) {
+ } else if (!strncmp(*argv, "config=", 7)) {
pcs->conf_filename = 7 + *argv;
} else {
_pam_log(LOG_ERR, "unknown option; %s", *argv);
}
-
}
}
+/*
+ * pam_sm_authenticate parses the config file with respect to the user
+ * being authenticated and determines if they are covered by any
+ * capability inheritance rules.
+ */
int pam_sm_authenticate(pam_handle_t *pamh, int flags,
int argc, const char **argv)
{
int retval;
struct pam_cap_s pcs;
- char *conf_icaps;
+ char *conf_caps;
memset(&pcs, 0, sizeof(pcs));
-
parse_args(argc, argv, &pcs);
retval = pam_get_user(pamh, &pcs.user, NULL);
-
if (retval == PAM_CONV_AGAIN) {
D(("user conversation is not available yet"));
memset(&pcs, 0, sizeof(pcs));
@@ -251,59 +300,58 @@ int pam_sm_authenticate(pam_handle_t *pamh, int flags,
return PAM_AUTH_ERR;
}
- conf_icaps =
- read_capabilities_for_user(pcs.user,
- pcs.conf_filename
- ? pcs.conf_filename:USER_CAP_FILE );
-
+ conf_caps = read_capabilities_for_user(pcs.user,
+ pcs.conf_filename
+ ? pcs.conf_filename:USER_CAP_FILE );
memset(&pcs, 0, sizeof(pcs));
- if (conf_icaps) {
+ if (conf_caps) {
D(("it appears that there are capabilities for this user [%s]",
- conf_icaps));
+ conf_caps));
/* We could also store this as a pam_[gs]et_data item for use
by the setcred call to follow. As it is, there is a small
race associated with a redundant read. Oh well, if you
care, send me a patch.. */
- _pam_overwrite(conf_icaps);
- _pam_drop(conf_icaps);
+ _pam_overwrite(conf_caps);
+ _pam_drop(conf_caps);
return PAM_SUCCESS;
} else {
- D(("there are no capabilities restrctions on this user"));
+ D(("there are no capabilities restrictions on this user"));
return PAM_IGNORE;
}
}
+/*
+ * pam_sm_setcred applies inheritable capabilities loaded by the
+ * pam_sm_authenticate pass for the user.
+ */
int pam_sm_setcred(pam_handle_t *pamh, int flags,
int argc, const char **argv)
{
int retval;
struct pam_cap_s pcs;
- if (!(flags & PAM_ESTABLISH_CRED)) {
+ if (!(flags & (PAM_ESTABLISH_CRED | PAM_REINITIALIZE_CRED))) {
D(("we don't handle much in the way of credentials"));
return PAM_IGNORE;
}
memset(&pcs, 0, sizeof(pcs));
-
parse_args(argc, argv, &pcs);
retval = pam_get_item(pamh, PAM_USER, (const void **)&pcs.user);
if ((retval != PAM_SUCCESS) || (pcs.user == NULL) || !(pcs.user[0])) {
-
D(("user's name is not set"));
return PAM_AUTH_ERR;
}
retval = set_capabilities(&pcs);
-
memset(&pcs, 0, sizeof(pcs));
return (retval ? PAM_SUCCESS:PAM_IGNORE );
diff --git a/pam_cap/sudotest.conf b/pam_cap/sudotest.conf
new file mode 100644
index 0000000..ff528ce
--- /dev/null
+++ b/pam_cap/sudotest.conf
@@ -0,0 +1,23 @@
+# only root
+all root
+
+# this should fire for beta only
+!cap_chown beta
+
+# the next one should snag gamma since beta done
+cap_setuid,cap_chown @three
+
+# neither of these should fire
+cap_chown beta gamma
+
+# just alpha
+!cap_chown,cap_setuid @one
+
+# not this one
+^cap_setuid alpha
+
+# this should fire
+^cap_chown,^cap_setgid,!cap_setuid delta
+
+# not this one
+cap_setuid @four
diff --git a/pam_cap/test_pam_cap.c b/pam_cap/test_pam_cap.c
new file mode 100644
index 0000000..452a27f
--- /dev/null
+++ b/pam_cap/test_pam_cap.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2019 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * This test inlines the pam_cap module and runs test vectors against
+ * it.
+ */
+
+#include "./pam_cap.c"
+
+const char *test_groups[] = {
+ "root", "one", "two", "three", "four", "five", "six", "seven"
+};
+#define n_groups sizeof(test_groups)/sizeof(*test_groups)
+
+const char *test_users[] = {
+ "root", "alpha", "beta", "gamma", "delta"
+};
+#define n_users sizeof(test_users)/sizeof(*test_users)
+
+/* Note about memberships:
+ *
+ * user gid suppl groups
+ * root root
+ * alpha one two
+ * beta two three four
+ * gamma three four five six
+ * delta four five six seven [eight]
+ */
+
+static char *test_user;
+
+int pam_get_user(pam_handle_t *pamh, const char **user, const char *prompt) {
+ *user = test_user;
+ if (*user == NULL) {
+ return PAM_CONV_AGAIN;
+ }
+ return PAM_SUCCESS;
+}
+
+int pam_get_item(const pam_handle_t *pamh, int item_type, const void **item) {
+ if (item_type != PAM_USER) {
+ errno = EINVAL;
+ return -1;
+ }
+ *item = test_user;
+ return 0;
+}
+
+int getgrouplist(const char *user, gid_t group, gid_t *groups, int *ngroups) {
+ int i,j;
+ for (i = 0; i < n_users; i++) {
+ if (strcmp(user, test_users[i]) == 0) {
+ *ngroups = i+1;
+ break;
+ }
+ }
+ if (i == n_users) {
+ return -1;
+ }
+ groups[0] = i;
+ for (j = 1; j < *ngroups; j++) {
+ groups[j] = i+j;
+ }
+ return *ngroups;
+}
+
+static struct group gr;
+struct group *getgrgid(gid_t gid) {
+ if (gid >= n_groups) {
+ errno = EINVAL;
+ return NULL;
+ }
+ gr.gr_name = strdup(test_groups[gid]);
+ return &gr;
+}
+
+static struct passwd pw;
+struct passwd *getpwnam(const char *name) {
+ int i;
+ for (i = 0; i < n_users; i++) {
+ if (strcmp(name, test_users[i]) == 0) {
+ pw.pw_gid = i;
+ return &pw;
+ }
+ }
+ return NULL;
+}
+
+/* we'll use these to keep track of the three vectors - only use
+ lowest 64 bits */
+
+#define A 0
+#define B 1
+#define I 2
+
+/*
+ * load_vectors caches a copy of the lowest 64 bits of the inheritable
+ * cap vectors
+ */
+static void load_vectors(unsigned long int bits[3]) {
+ memset(bits, 0, 3*sizeof(unsigned long int));
+ cap_t prev = cap_get_proc();
+ int i;
+ for (i = 0; i < 64; i++) {
+ unsigned long int mask = (1ULL << i);
+ int v = cap_get_bound(i);
+ if (v < 0) {
+ break;
+ }
+ bits[B] |= v ? mask : 0;
+ cap_flag_value_t u;
+ if (cap_get_flag(prev, i, CAP_INHERITABLE, &u) != 0) {
+ break;
+ }
+ bits[I] |= u ? mask : 0;
+ v = cap_get_ambient(i);
+ if (v > 0) {
+ bits[A] |= mask;
+ }
+ }
+ cap_free(prev);
+}
+
+/*
+ * args: user a b i config-args...
+ */
+int main(int argc, char *argv[]) {
+ unsigned long int before[3], change[3], after[3];
+
+ /*
+ * Start out with a cleared inheritable set.
+ */
+ cap_t orig = cap_get_proc();
+ cap_clear_flag(orig, CAP_INHERITABLE);
+ cap_set_proc(orig);
+
+ change[A] = strtoul(argv[2], NULL, 0);
+ change[B] = strtoul(argv[3], NULL, 0);
+ change[I] = strtoul(argv[4], NULL, 0);
+
+ void* args_for_pam = argv+4;
+
+ int status = pam_sm_authenticate(NULL, 0, argc-4,
+ (const char **) args_for_pam);
+ if (status != PAM_INCOMPLETE) {
+ printf("failed to recognize no username\n");
+ exit(1);
+ }
+
+ test_user = argv[1];
+
+ status = pam_sm_authenticate(NULL, 0, argc-4, (const char **) args_for_pam);
+ if (status == PAM_IGNORE) {
+ if (strcmp(test_user, "root") == 0) {
+ exit(0);
+ }
+ printf("unconfigured non-root user: %s\n", test_user);
+ exit(1);
+ }
+ if (status != PAM_SUCCESS) {
+ printf("failed to recognize username\n");
+ exit(1);
+ }
+
+ /* Now it is time to execute the credential setting */
+ load_vectors(before);
+
+ status = pam_sm_setcred(NULL, PAM_ESTABLISH_CRED, argc-4,
+ (const char **) args_for_pam);
+
+ load_vectors(after);
+
+ printf("before: A=0x%016lx B=0x%016lx I=0x%016lx\n",
+ before[A], before[B], before[I]);
+
+ long unsigned int dA = before[A] ^ after[A];
+ long unsigned int dB = before[B] ^ after[B];
+ long unsigned int dI = before[I] ^ after[I];
+
+ printf("diff : A=0x%016lx B=0x%016lx I=0x%016lx\n", dA, dB, dI);
+ printf("after : A=0x%016lx B=0x%016lx I=0x%016lx\n",
+ after[A], after[B], after[I]);
+
+ int failure = 0;
+ if (after[A] != change[A]) {
+ printf("Ambient set error: got=0x%016lx, want=0x%016lx\n",
+ after[A], change[A]);
+ failure = 1;
+ }
+ if (dB != change[B]) {
+ printf("Bounding set error: got=0x%016lx, want=0x%016lx\n",
+ after[B], before[B] ^ change[B]);
+ failure = 1;
+ }
+ if (after[I] != change[I]) {
+ printf("Inheritable set error: got=0x%016lx, want=0x%016lx\n",
+ after[I], change[I]);
+ failure = 1;
+ }
+
+ exit(failure);
+}
diff --git a/progs/.gitignore b/progs/.gitignore
index f42095f..978229e 100644
--- a/progs/.gitignore
+++ b/progs/.gitignore
@@ -1,5 +1,7 @@
capsh
+tcapsh-static
getcap
getpcaps
setcap
verify-caps
+compare-cap
diff --git a/progs/Makefile b/progs/Makefile
index c094a24..1d7fc7a 100644
--- a/progs/Makefile
+++ b/progs/Makefile
@@ -8,18 +8,27 @@ PROGS=getpcaps capsh getcap setcap
BUILD=$(PROGS)
-ifneq ($(DYNAMIC),yes)
+all: $(BUILD)
+
+ifeq ($(DYNAMIC),yes)
+LDPATH = LD_LIBRARY_PATH=../libcap
+DEPS = ../libcap/libcap.so
+else
LDFLAGS += --static
+DEPS = ../libcap/libcap.a
endif
-LDLIBS += -L../libcap -lcap
-all: $(BUILD)
+../libcap/libcap.a:
+ make -C ../libcap libcap.a
+
+../libcap/libcap.so:
+ make -C ../libcap libcap.so
-$(BUILD): %: %.o
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDLIBS)
+$(BUILD): %: %.o $(DEPS)
+ $(CC) $(CFLAGS) -o $@ $< $(LIBCAPLIB) $(LDFLAGS)
%.o: %.c $(INCS)
- $(CC) $(IPATH) $(CFLAGS) -c $< -o $@
+ $(CC) $(IPATH) $(CAPSH_SHELL) $(CFLAGS) -c $< -o $@
install: all
mkdir -p -m 0755 $(FAKEROOT)$(SBINDIR)
@@ -30,6 +39,14 @@ ifeq ($(RAISE_SETFCAP),yes)
$(FAKEROOT)$(SBINDIR)/setcap cap_setfcap=i $(FAKEROOT)$(SBINDIR)/setcap
endif
+test: $(PROGS)
+
+tcapsh-static: capsh.c $(DEPS)
+ $(CC) $(IPATH) $(CAPSH_SHELL) $(CFLAGS) -o $@ $< $(LIBCAPLIB) $(LDFLAGS) --static
+
+sudotest: test tcapsh-static
+ sudo $(LDPATH) ./quicktest.sh
+
clean:
$(LOCALCLEAN)
- rm -f *.o $(BUILD) tcapsh ping hack.sh
+ rm -f *.o $(BUILD) tcapsh* privileged ping hack.sh compare-cap
diff --git a/progs/capsh.c b/progs/capsh.c
index 3ceadcd..a39ceeb 100644
--- a/progs/capsh.c
+++ b/progs/capsh.c
@@ -1,33 +1,38 @@
/*
- * Copyright (c) 2008-11 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 2008-11,16,19,2020 Andrew G. Morgan <morgan@kernel.org>
*
- * This is a simple 'bash' wrapper program that can be used to
- * raise and lower both the bset and pI capabilities before invoking
- * /bin/bash (hardcoded right now).
+ * This is a multifunction shell wrapper tool that can be used to
+ * launch capable files in various ways with a variety of settings. It
+ * also supports some testing modes, which are used extensively as
+ * part of the libcap build system.
*
* The --print option can be used as a quick test whether various
* capability manipulations work as expected (or not).
*/
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
-#include <sys/prctl.h>
#include <sys/types.h>
-#include <unistd.h>
#include <pwd.h>
#include <grp.h>
#include <errno.h>
#include <ctype.h>
#include <sys/capability.h>
+#include <sys/prctl.h>
#include <sys/securebits.h>
#include <sys/wait.h>
-#include <sys/prctl.h>
+#include <unistd.h>
-#define MAX_GROUPS 100 /* max number of supplementary groups for user */
+#ifndef SHELL
+#define SHELL "/bin/bash"
+#endif /* ndef SHELL */
-static const cap_value_t raise_setpcap[1] = { CAP_SETPCAP };
-static const cap_value_t raise_chroot[1] = { CAP_SYS_CHROOT };
+#define MAX_GROUPS 100 /* max number of supplementary groups for user */
static char *binary(unsigned long value)
{
@@ -43,86 +48,372 @@ static char *binary(unsigned long value)
return string + i;
}
-int main(int argc, char *argv[], char *envp[])
+static void display_prctl_set(const char *name, int (*fn)(cap_value_t))
{
- pid_t child;
- unsigned i;
+ unsigned cap;
+ const char *sep;
+ int set;
+
+ printf("%s set =", name);
+ for (sep = "", cap=0; (set = fn(cap)) >= 0; cap++) {
+ char *ptr;
+ if (!set) {
+ continue;
+ }
- child = 0;
+ ptr = cap_to_name(cap);
+ if (ptr == NULL) {
+ printf("%s%u", sep, cap);
+ } else {
+ printf("%s%s", sep, ptr);
+ cap_free(ptr);
+ }
+ sep = ",";
+ }
+ if (!cap) {
+ printf(" <unsupported>\n");
+ } else {
+ printf("\n");
+ }
+}
- for (i=1; i<argc; ++i) {
- if (!memcmp("--drop=", argv[i], 4)) {
- char *ptr;
- cap_t orig, raised_for_setpcap;
+/* arg_print displays the current capability state of the process */
+static void arg_print(void)
+{
+ long set;
+ int status, j;
+ cap_t all;
+ char *text;
+ const char *sep;
+ struct group *g;
+ gid_t groups[MAX_GROUPS], gid;
+ uid_t uid, euid;
+ struct passwd *u, *eu;
+ cap_iab_t iab;
+
+ all = cap_get_proc();
+ text = cap_to_text(all, NULL);
+ printf("Current: %s\n", text);
+ cap_free(text);
+ cap_free(all);
+
+ display_prctl_set("Bounding", cap_get_bound);
+ display_prctl_set("Ambient", cap_get_ambient);
+ iab = cap_iab_get_proc();
+ text = cap_iab_to_text(iab);
+ printf("Current IAB: %s\n", text);
+ cap_free(text);
+ cap_free(iab);
+
+ set = cap_get_secbits();
+ if (set >= 0) {
+ const char *b = binary(set); /* verilog convention for binary string */
+ printf("Securebits: 0%lo/0x%lx/%u'b%s (no-new-privs=%d)\n", set, set,
+ (unsigned) strlen(b), b,
+ prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0));
+ printf(" secure-noroot: %s (%s)\n",
+ (set & SECBIT_NOROOT) ? "yes":"no",
+ (set & SECBIT_NOROOT_LOCKED) ? "locked":"unlocked");
+ printf(" secure-no-suid-fixup: %s (%s)\n",
+ (set & SECBIT_NO_SETUID_FIXUP) ? "yes":"no",
+ (set & SECBIT_NO_SETUID_FIXUP_LOCKED) ? "locked":"unlocked");
+ printf(" secure-keep-caps: %s (%s)\n",
+ (set & SECBIT_KEEP_CAPS) ? "yes":"no",
+ (set & SECBIT_KEEP_CAPS_LOCKED) ? "locked":"unlocked");
+ if (CAP_AMBIENT_SUPPORTED()) {
+ printf(" secure-no-ambient-raise: %s (%s)\n",
+ (set & SECBIT_NO_CAP_AMBIENT_RAISE) ? "yes":"no",
+ (set & SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED) ?
+ "locked":"unlocked");
+ }
+ } else {
+ printf("[Securebits ABI not supported]\n");
+ set = prctl(PR_GET_KEEPCAPS);
+ if (set >= 0) {
+ printf(" prctl-keep-caps: %s (locking not supported)\n",
+ set ? "yes":"no");
+ } else {
+ printf("[Keepcaps ABI not supported]\n");
+ }
+ }
+ uid = getuid();
+ u = getpwuid(uid);
+ euid = geteuid();
+ eu = getpwuid(euid);
+ printf("uid=%u(%s) euid=%u(%s)\n", uid, u ? u->pw_name : "???", euid, eu ? eu->pw_name : "???");
+ gid = getgid();
+ g = getgrgid(gid);
+ printf("gid=%u(%s)\n", gid, g ? g->gr_name : "???");
+ printf("groups=");
+ status = getgroups(MAX_GROUPS, groups);
+ sep = "";
+ for (j=0; j < status; j++) {
+ g = getgrgid(groups[j]);
+ printf("%s%u(%s)", sep, groups[j], g ? g->gr_name : "???");
+ sep = ",";
+ }
+ printf("\n");
+ cap_mode_t mode = cap_get_mode();
+ printf("Guessed mode: %s (%d)\n", cap_mode_name(mode), mode);
+}
- /*
- * We need to do this here because --inh=XXX may have reset
- * orig and it isn't until we are within the --drop code that
- * we know what the prevailing (orig) pI value is.
- */
- orig = cap_get_proc();
- if (orig == NULL) {
- perror("Capabilities not available");
+static const cap_value_t raise_setpcap[1] = { CAP_SETPCAP };
+static const cap_value_t raise_chroot[1] = { CAP_SYS_CHROOT };
+
+static void push_pcap(cap_t *orig_p, cap_t *raised_for_setpcap_p)
+{
+ /*
+ * We need to do this here because --inh=XXX may have reset
+ * orig and it isn't until we are within the --drop code that
+ * we know what the prevailing (orig) pI value is.
+ */
+ *orig_p = cap_get_proc();
+ if (NULL == *orig_p) {
+ perror("Capabilities not available");
+ exit(1);
+ }
+
+ *raised_for_setpcap_p = cap_dup(*orig_p);
+ if (NULL == *raised_for_setpcap_p) {
+ fprintf(stderr, "modification requires CAP_SETPCAP\n");
+ exit(1);
+ }
+ if (cap_set_flag(*raised_for_setpcap_p, CAP_EFFECTIVE, 1,
+ raise_setpcap, CAP_SET) != 0) {
+ perror("unable to select CAP_SETPCAP");
+ exit(1);
+ }
+}
+
+static void pop_pcap(cap_t orig, cap_t raised_for_setpcap)
+{
+ cap_free(raised_for_setpcap);
+ cap_free(orig);
+}
+
+static void arg_drop(const char *arg_names)
+{
+ char *ptr;
+ cap_t orig, raised_for_setpcap;
+ char *names;
+
+ push_pcap(&orig, &raised_for_setpcap);
+ if (strcmp("all", arg_names) == 0) {
+ unsigned j = 0;
+ while (CAP_IS_SUPPORTED(j)) {
+ int status;
+ if (cap_set_proc(raised_for_setpcap) != 0) {
+ perror("unable to raise CAP_SETPCAP for BSET changes");
+ exit(1);
+ }
+ status = cap_drop_bound(j);
+ if (cap_set_proc(orig) != 0) {
+ perror("unable to lower CAP_SETPCAP post BSET change");
exit(1);
}
+ if (status != 0) {
+ char *name_ptr;
- raised_for_setpcap = cap_dup(orig);
- if (raised_for_setpcap == NULL) {
- fprintf(stderr, "BSET modification requires CAP_SETPCAP\n");
+ name_ptr = cap_to_name(j);
+ fprintf(stderr, "Unable to drop bounding capability [%s]\n",
+ name_ptr);
+ cap_free(name_ptr);
exit(1);
}
+ j++;
+ }
+ pop_pcap(orig, raised_for_setpcap);
+ return;
+ }
+
+ names = strdup(arg_names);
+ if (NULL == names) {
+ fprintf(stderr, "failed to allocate names\n");
+ exit(1);
+ }
+ for (ptr = names; (ptr = strtok(ptr, ",")); ptr = NULL) {
+ /* find name for token */
+ cap_value_t cap;
+ int status;
- if (cap_set_flag(raised_for_setpcap, CAP_EFFECTIVE, 1,
- raise_setpcap, CAP_SET) != 0) {
- perror("unable to select CAP_SETPCAP");
+ if (cap_from_name(ptr, &cap) != 0) {
+ fprintf(stderr, "capability [%s] is unknown to libcap\n", ptr);
+ exit(1);
+ }
+ if (cap_set_proc(raised_for_setpcap) != 0) {
+ perror("unable to raise CAP_SETPCAP for BSET changes");
+ exit(1);
+ }
+ status = cap_drop_bound(cap);
+ if (cap_set_proc(orig) != 0) {
+ perror("unable to lower CAP_SETPCAP post BSET change");
+ exit(1);
+ }
+ if (status != 0) {
+ fprintf(stderr, "failed to drop [%s=%u]\n", ptr, cap);
+ exit(1);
+ }
+ }
+ pop_pcap(orig, raised_for_setpcap);
+ free(names);
+}
+
+static void arg_change_amb(const char *arg_names, cap_flag_value_t set)
+{
+ char *ptr;
+ cap_t orig, raised_for_setpcap;
+ char *names;
+
+ push_pcap(&orig, &raised_for_setpcap);
+ if (strcmp("all", arg_names) == 0) {
+ unsigned j = 0;
+ while (CAP_IS_SUPPORTED(j)) {
+ int status;
+ if (cap_set_proc(raised_for_setpcap) != 0) {
+ perror("unable to raise CAP_SETPCAP for AMBIENT changes");
+ exit(1);
+ }
+ status = cap_set_ambient(j, set);
+ if (cap_set_proc(orig) != 0) {
+ perror("unable to lower CAP_SETPCAP post AMBIENT change");
+ exit(1);
+ }
+ if (status != 0) {
+ char *name_ptr;
+
+ name_ptr = cap_to_name(j);
+ fprintf(stderr, "Unable to %s ambient capability [%s]\n",
+ set == CAP_CLEAR ? "clear":"raise", name_ptr);
+ cap_free(name_ptr);
exit(1);
}
+ j++;
+ }
+ pop_pcap(orig, raised_for_setpcap);
+ return;
+ }
- if (strcmp("all", argv[i]+7) == 0) {
- unsigned j = 0;
- while (CAP_IS_SUPPORTED(j)) {
- if (cap_drop_bound(j) != 0) {
- char *name_ptr;
+ names = strdup(arg_names);
+ if (NULL == names) {
+ fprintf(stderr, "failed to allocate names\n");
+ exit(1);
+ }
+ for (ptr = names; (ptr = strtok(ptr, ",")); ptr = NULL) {
+ /* find name for token */
+ cap_value_t cap;
+ int status;
- name_ptr = cap_to_name(j);
- fprintf(stderr,
- "Unable to drop bounding capability [%s]\n",
- name_ptr);
- cap_free(name_ptr);
- exit(1);
- }
- j++;
- }
- } else {
- for (ptr = argv[i]+7; (ptr = strtok(ptr, ",")); ptr = NULL) {
- /* find name for token */
- cap_value_t cap;
- int status;
-
- if (cap_from_name(ptr, &cap) != 0) {
- fprintf(stderr,
- "capability [%s] is unknown to libcap\n",
- ptr);
- exit(1);
- }
- if (cap_set_proc(raised_for_setpcap) != 0) {
- perror("unable to raise CAP_SETPCAP for BSET changes");
- exit(1);
- }
- status = prctl(PR_CAPBSET_DROP, cap);
- if (cap_set_proc(orig) != 0) {
- perror("unable to lower CAP_SETPCAP post BSET change");
- exit(1);
- }
- if (status) {
- fprintf(stderr, "failed to drop [%s=%u]\n", ptr, cap);
- exit(1);
- }
- }
+ if (cap_from_name(ptr, &cap) != 0) {
+ fprintf(stderr, "capability [%s] is unknown to libcap\n", ptr);
+ exit(1);
+ }
+ if (cap_set_proc(raised_for_setpcap) != 0) {
+ perror("unable to raise CAP_SETPCAP for AMBIENT changes");
+ exit(1);
+ }
+ status = cap_set_ambient(cap, set);
+ if (cap_set_proc(orig) != 0) {
+ perror("unable to lower CAP_SETPCAP post AMBIENT change");
+ exit(1);
+ }
+ if (status != 0) {
+ fprintf(stderr, "failed to %s ambient [%s=%u]\n",
+ set == CAP_CLEAR ? "clear":"raise", ptr, cap);
+ exit(1);
+ }
+ }
+ pop_pcap(orig, raised_for_setpcap);
+ free(names);
+}
+
+/*
+ * find_self locates and returns the full pathname of the named binary
+ * that is running. Importantly, it looks in the context of the
+ * prevailing CHROOT. Further, it does not fail over to invoking a
+ * shell if the target binary looks like something other than a
+ * executable. If an executable is not found, the function terminates
+ * the program with an error.
+ */
+static char *find_self(const char *arg0)
+{
+ int i;
+ char *parts, *dir, *scratch;
+ const char *path;
+
+ for (i = strlen(arg0)-1; i >= 0 && arg0[i] != '/'; i--);
+ if (i >= 0) {
+ return strdup(arg0);
+ }
+
+ path = getenv("PATH");
+ if (path == NULL) {
+ fprintf(stderr, "no PATH environment variable found for re-execing\n");
+ exit(1);
+ }
+
+ parts = strdup(path);
+ scratch = malloc(2+strlen(path)+strlen(arg0));
+ if (parts == NULL || scratch == NULL) {
+ fprintf(stderr, "insufficient memory for path building\n");
+ exit(1);
+ }
+
+ for (i=0; (dir = strtok(parts, ":")); parts = NULL) {
+ sprintf(scratch, "%s/%s", dir, arg0);
+ if (access(scratch, X_OK) == 0) {
+ return scratch;
+ }
+ }
+
+ fprintf(stderr, "unable to find executable '%s' in PATH\n", arg0);
+ exit(1);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+ pid_t child;
+ unsigned i;
+ const char *shell = SHELL;
+
+ child = 0;
+
+ char *temp_name = cap_to_name(cap_max_bits() - 1);
+ if (temp_name[0] != 'c') {
+ printf("WARNING: libcap needs an update (cap=%d should have a name).\n",
+ cap_max_bits() - 1);
+ }
+ cap_free(temp_name);
+
+ for (i=1; i<argc; ++i) {
+ if (!strncmp("--drop=", argv[i], 7)) {
+ arg_drop(argv[i]+7);
+ } else if (!strncmp("--dropped=", argv[i], 10)) {
+ cap_value_t cap;
+ if (cap_from_name(argv[i]+10, &cap) < 0) {
+ fprintf(stderr, "cap[%s] not recognized by library\n",
+ argv[i] + 10);
+ exit(1);
}
- cap_free(raised_for_setpcap);
- cap_free(orig);
- } else if (!memcmp("--inh=", argv[i], 6)) {
+ if (cap_get_bound(cap) > 0) {
+ fprintf(stderr, "cap[%s] raised in bounding vector\n",
+ argv[i]+10);
+ exit(1);
+ }
+ } else if (!strcmp("--has-ambient", argv[i])) {
+ if (!CAP_AMBIENT_SUPPORTED()) {
+ fprintf(stderr, "ambient set not supported\n");
+ exit(1);
+ }
+ } else if (!strncmp("--addamb=", argv[i], 9)) {
+ arg_change_amb(argv[i]+9, CAP_SET);
+ } else if (!strncmp("--delamb=", argv[i], 9)) {
+ arg_change_amb(argv[i]+9, CAP_CLEAR);
+ } else if (!strncmp("--noamb", argv[i], 7)) {
+ if (cap_reset_ambient() != 0) {
+ fprintf(stderr, "failed to reset ambient set\n");
+ exit(1);
+ }
+ } else if (!strncmp("--inh=", argv[i], 6)) {
cap_t all, raised_for_setpcap;
char *text;
char *ptr;
@@ -191,7 +482,7 @@ int main(int argc, char *argv[], char *envp[])
*/
cap_free(all);
- } else if (!memcmp("--caps=", argv[i], 7)) {
+ } else if (!strncmp("--caps=", argv[i], 7)) {
cap_t all, raised_for_setpcap;
raised_for_setpcap = cap_get_proc();
@@ -234,7 +525,51 @@ int main(int argc, char *argv[], char *envp[])
*/
cap_free(all);
- } else if (!memcmp("--keep=", argv[i], 7)) {
+ } else if (!strcmp("--modes", argv[i])) {
+ cap_mode_t c;
+ printf("Supported modes:");
+ for (c = 1; ; c++) {
+ const char *m = cap_mode_name(c);
+ if (strcmp("UNKNOWN", m) == 0) {
+ break;
+ }
+ printf(" %s", m);
+ }
+ printf("\n");
+ } else if (!strncmp("--mode=", argv[i], 7)) {
+ const char *target = argv[i]+7;
+ cap_mode_t c;
+ int found = 0;
+ for (c = 1; ; c++) {
+ const char *m = cap_mode_name(c);
+ if (!strcmp("UNKNOWN", m)) {
+ found = 0;
+ break;
+ }
+ if (!strcmp(m, target)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ printf("unsupported mode: %s\n", target);
+ exit(1);
+ }
+ int ret = cap_set_mode(c);
+ if (ret != 0) {
+ printf("failed to set mode [%s]: %s\n",
+ target, strerror(errno));
+ exit(1);
+ }
+ } else if (!strncmp("--inmode=", argv[i], 9)) {
+ const char *target = argv[i]+9;
+ cap_mode_t c = cap_get_mode();
+ const char *m = cap_mode_name(c);
+ if (strcmp(m, target)) {
+ printf("mismatched mode got=%s want=%s\n", m, target);
+ exit(1);
+ }
+ } else if (!strncmp("--keep=", argv[i], 7)) {
unsigned value;
int set;
@@ -245,7 +580,7 @@ int main(int argc, char *argv[], char *envp[])
value, strerror(errno));
exit(1);
}
- } else if (!memcmp("--chroot=", argv[i], 9)) {
+ } else if (!strncmp("--chroot=", argv[i], 9)) {
int status;
cap_t orig, raised_for_chroot;
@@ -290,20 +625,22 @@ int main(int argc, char *argv[], char *envp[])
fprintf(stderr, "Unable to chroot/chdir to [%s]", argv[i]+9);
exit(1);
}
- } else if (!memcmp("--secbits=", argv[i], 10)) {
+ } else if (!strncmp("--secbits=", argv[i], 10)) {
unsigned value;
int status;
-
value = strtoul(argv[i]+10, NULL, 0);
- status = prctl(PR_SET_SECUREBITS, value);
+ status = cap_set_secbits(value);
if (status < 0) {
fprintf(stderr, "failed to set securebits to 0%o/0x%x\n",
value, value);
exit(1);
}
- } else if (!memcmp("--forkfor=", argv[i], 10)) {
+ } else if (!strncmp("--forkfor=", argv[i], 10)) {
unsigned value;
-
+ if (child != 0) {
+ fprintf(stderr, "already forked\n");
+ exit(1);
+ }
value = strtoul(argv[i]+10, NULL, 0);
if (value == 0) {
goto usage;
@@ -315,7 +652,7 @@ int main(int argc, char *argv[], char *envp[])
sleep(value);
exit(0);
}
- } else if (!memcmp("--killit=", argv[i], 9)) {
+ } else if (!strncmp("--killit=", argv[i], 9)) {
int retval, status;
pid_t result;
unsigned value;
@@ -341,7 +678,8 @@ int main(int argc, char *argv[], char *envp[])
, value, WTERMSIG(status));
exit(1);
}
- } else if (!memcmp("--uid=", argv[i], 6)) {
+ child = 0;
+ } else if (!strncmp("--uid=", argv[i], 6)) {
unsigned value;
int status;
@@ -352,7 +690,18 @@ int main(int argc, char *argv[], char *envp[])
value, strerror(errno));
exit(1);
}
- } else if (!memcmp("--gid=", argv[i], 6)) {
+ } else if (!strncmp("--cap-uid=", argv[i], 10)) {
+ unsigned value;
+ int status;
+
+ value = strtoul(argv[i]+10, NULL, 0);
+ status = cap_setuid(value);
+ if (status < 0) {
+ fprintf(stderr, "Failed to cap_setuid(%u): %s\n",
+ value, strerror(errno));
+ exit(1);
+ }
+ } else if (!strncmp("--gid=", argv[i], 6)) {
unsigned value;
int status;
@@ -363,7 +712,7 @@ int main(int argc, char *argv[], char *envp[])
value, strerror(errno));
exit(1);
}
- } else if (!memcmp("--groups=", argv[i], 9)) {
+ } else if (!strncmp("--groups=", argv[i], 9)) {
char *ptr, *buf;
long length, max_groups;
gid_t *group_list;
@@ -408,7 +757,7 @@ int main(int argc, char *argv[], char *envp[])
exit(1);
}
free(group_list);
- } else if (!memcmp("--user=", argv[i], 7)) {
+ } else if (!strncmp("--user=", argv[i], 7)) {
struct passwd *pwd;
const char *user;
gid_t groups[MAX_GROUPS];
@@ -426,24 +775,18 @@ int main(int argc, char *argv[], char *envp[])
perror("Unable to get group list for user");
exit(1);
}
- status = setgroups(ngroups, groups);
+ status = cap_setgroups(pwd->pw_gid, ngroups, groups);
if (status != 0) {
- perror("Unable to set group list for user");
- exit(1);
- }
- status = setgid(pwd->pw_gid);
- if (status < 0) {
- fprintf(stderr, "Failed to set gid=%u(user=%s): %s\n",
- pwd->pw_gid, user, strerror(errno));
+ perror("Unable to set group list for user");
exit(1);
}
- status = setuid(pwd->pw_uid);
+ status = cap_setuid(pwd->pw_uid);
if (status < 0) {
fprintf(stderr, "Failed to set uid=%u(user=%s): %s\n",
pwd->pw_uid, user, strerror(errno));
exit(1);
}
- } else if (!memcmp("--decode=", argv[i], 9)) {
+ } else if (!strncmp("--decode=", argv[i], 9)) {
unsigned long long value;
unsigned cap;
const char *sep = "";
@@ -468,7 +811,7 @@ int main(int argc, char *argv[], char *envp[])
}
}
printf("\n");
- } else if (!memcmp("--supports=", argv[i], 11)) {
+ } else if (!strncmp("--supports=", argv[i], 11)) {
cap_value_t cap;
if (cap_from_name(argv[i] + 11, &cap) < 0) {
@@ -482,111 +825,152 @@ int main(int argc, char *argv[], char *envp[])
exit(1);
}
} else if (!strcmp("--print", argv[i])) {
- unsigned cap;
- int set, status, j;
- cap_t all;
- char *text;
- const char *sep;
- struct group *g;
- gid_t groups[MAX_GROUPS], gid;
- uid_t uid;
- struct passwd *u;
-
- all = cap_get_proc();
- text = cap_to_text(all, NULL);
- printf("Current: %s\n", text);
- cap_free(text);
- cap_free(all);
+ arg_print();
+ } else if ((!strcmp("--", argv[i])) || (!strcmp("==", argv[i]))) {
+ if (argv[i][0] == '=') {
+ argv[i] = find_self(argv[0]);
+ } else {
+ argv[i] = strdup(shell);
+ }
+ argv[argc] = NULL;
+ execve(argv[i], argv+i, envp);
+ fprintf(stderr, "execve '%s' failed!\n", argv[i]);
+ exit(1);
+ } else if (!strncmp("--shell=", argv[i], 8)) {
+ shell = argv[i]+8;
+ } else if (!strncmp("--has-p=", argv[i], 8)) {
+ cap_value_t cap;
+ cap_flag_value_t enabled;
+ cap_t orig;
- printf("Bounding set =");
- sep = "";
- for (cap=0; (set = cap_get_bound(cap)) >= 0; cap++) {
- char *ptr;
- if (!set) {
- continue;
- }
+ if (cap_from_name(argv[i]+8, &cap) < 0) {
+ fprintf(stderr, "cap[%s] not recognized by library\n",
+ argv[i] + 8);
+ exit(1);
+ }
+ orig = cap_get_proc();
+ if (cap_get_flag(orig, cap, CAP_PERMITTED, &enabled) || !enabled) {
+ fprintf(stderr, "cap[%s] not permitted\n", argv[i]+8);
+ exit(1);
+ }
+ cap_free(orig);
+ } else if (!strncmp("--has-i=", argv[i], 8)) {
+ cap_value_t cap;
+ cap_flag_value_t enabled;
+ cap_t orig;
- ptr = cap_to_name(cap);
- if (ptr == NULL) {
- printf("%s%u", sep, cap);
- } else {
- printf("%s%s", sep, ptr);
- cap_free(ptr);
- }
- sep = ",";
+ if (cap_from_name(argv[i]+8, &cap) < 0) {
+ fprintf(stderr, "cap[%s] not recognized by library\n",
+ argv[i] + 8);
+ exit(1);
}
- printf("\n");
- set = prctl(PR_GET_SECUREBITS);
- if (set >= 0) {
- const char *b;
- b = binary(set); /* use verilog convention for binary string */
- printf("Securebits: 0%o/0x%x/%u'b%s\n", set, set,
- (unsigned) strlen(b), b);
- printf(" secure-noroot: %s (%s)\n",
- (set & 1) ? "yes":"no",
- (set & 2) ? "locked":"unlocked");
- printf(" secure-no-suid-fixup: %s (%s)\n",
- (set & 4) ? "yes":"no",
- (set & 8) ? "locked":"unlocked");
- printf(" secure-keep-caps: %s (%s)\n",
- (set & 16) ? "yes":"no",
- (set & 32) ? "locked":"unlocked");
- } else {
- printf("[Securebits ABI not supported]\n");
- set = prctl(PR_GET_KEEPCAPS);
- if (set >= 0) {
- printf(" prctl-keep-caps: %s (locking not supported)\n",
- set ? "yes":"no");
- } else {
- printf("[Keepcaps ABI not supported]\n");
- }
+ orig = cap_get_proc();
+ if (cap_get_flag(orig, cap, CAP_INHERITABLE, &enabled)
+ || !enabled) {
+ fprintf(stderr, "cap[%s] not inheritable\n", argv[i]+8);
+ exit(1);
+ }
+ cap_free(orig);
+ } else if (!strncmp("--has-a=", argv[i], 8)) {
+ cap_value_t cap;
+ if (cap_from_name(argv[i]+8, &cap) < 0) {
+ fprintf(stderr, "cap[%s] not recognized by library\n",
+ argv[i] + 8);
+ exit(1);
+ }
+ if (!cap_get_ambient(cap)) {
+ fprintf(stderr, "cap[%s] not in ambient vector\n", argv[i]+8);
+ exit(1);
}
+ } else if (!strncmp("--is-uid=", argv[i], 9)) {
+ unsigned value;
+ uid_t uid;
+ value = strtoul(argv[i]+9, NULL, 0);
uid = getuid();
- u = getpwuid(uid);
- printf("uid=%u(%s)\n", getuid(), u ? u->pw_name : "???");
+ if (uid != value) {
+ fprintf(stderr, "uid: got=%d, want=%d\n", uid, value);
+ exit(1);
+ }
+ } else if (!strncmp("--is-gid=", argv[i], 9)) {
+ unsigned value;
+ gid_t gid;
+ value = strtoul(argv[i]+9, NULL, 0);
gid = getgid();
- g = getgrgid(gid);
- printf("gid=%u(%s)\n", gid, g ? g->gr_name : "???");
- printf("groups=");
- status = getgroups(MAX_GROUPS, groups);
- sep = "";
- for (j=0; j < status; j++) {
- g = getgrgid(groups[j]);
- printf("%s%u(%s)", sep, groups[j], g ? g->gr_name : "???");
- sep = ",";
+ if (gid != value) {
+ fprintf(stderr, "gid: got=%d, want=%d\n", gid, value);
+ exit(1);
}
- printf("\n");
- } else if ((!strcmp("--", argv[i])) || (!strcmp("==", argv[i]))) {
- argv[i] = strdup(argv[i][0] == '-' ? "/bin/bash" : argv[0]);
- argv[argc] = NULL;
- execve(argv[i], argv+i, envp);
- fprintf(stderr, "execve /bin/bash failed!\n");
- exit(1);
+ } else if (!strncmp("--iab=", argv[i], 6)) {
+ cap_iab_t iab = cap_iab_from_text(argv[i]+6);
+ if (iab == NULL) {
+ fprintf(stderr, "iab: '%s' malformed\n", argv[i]+6);
+ exit(1);
+ }
+ if (cap_iab_set_proc(iab)) {
+ perror("unable to set IAP vectors");
+ exit(1);
+ }
+ cap_free(iab);
+ } else if (!strcmp("--no-new-privs", argv[i])) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0) != 0) {
+ perror("unable to set no-new-privs");
+ exit(1);
+ }
+ } else if (!strcmp("--has-no-new-privs", argv[i])) {
+ if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0) != 1) {
+ fprintf(stderr, "no-new-privs not set\n");
+ exit(1);
+ }
+ } else if (!strcmp("--license", argv[i])) {
+ printf(
+ "%s has a you choose license: BSD 3-clause or GPL2\n"
+ "Copyright (c) 2008-11,16,19,2020 Andrew G. Morgan"
+ " <morgan@kernel.org>\n", argv[0]);
+ exit(0);
} else {
usage:
printf("usage: %s [args ...]\n"
- " --help this message (or try 'man capsh')\n"
- " --print display capability relevant state\n"
- " --decode=xxx decode a hex string to a list of caps\n"
- " --supports=xxx exit 1 if capability xxx unsupported\n"
- " --drop=xxx remove xxx,.. capabilities from bset\n"
+ " --has-a=xxx exit 1 if capability xxx not ambient\n"
+ " --has-ambient exit 1 unless ambient vector supported\n"
+ " --addamb=xxx add xxx,... capabilities to ambient set\n"
+ " --cap-uid=<n> use libcap cap_setuid() to change uid\n"
" --caps=xxx set caps as per cap_from_text()\n"
- " --inh=xxx set xxx,.. inheritiable set\n"
- " --secbits=<n> write a new value for securebits\n"
- " --keep=<n> set keep-capabability bit to <n>\n"
- " --uid=<n> set uid to <n> (hint: id <username>)\n"
+ " --chroot=path chroot(2) to this path\n"
+ " --decode=xxx decode a hex string to a list of caps\n"
+ " --delamb=xxx remove xxx,... capabilities from ambient\n"
+ " --forkfor=<n> fork and make child sleep for <n> sec\n"
" --gid=<n> set gid to <n> (hint: id <username>)\n"
" --groups=g,... set the supplemental groups\n"
- " --user=<name> set uid,gid and groups to that of user\n"
- " --chroot=path chroot(2) to this path\n"
+ " --has-p=xxx exit 1 if capability xxx not permitted\n"
+ " --has-i=xxx exit 1 if capability xxx not inheritable\n"
+ " --has-no-new-privs exit 1 if privs not limited\n"
+ " --help, -h this message (or try 'man capsh')\n"
+ " --iab=... use cap_iab_from_text() to set iab\n"
+ " --inh=xxx set xxx,.. inheritable set\n"
+ " --inmode=<xxx> exit 1 if current mode is not <xxx>\n"
+ " --is-uid=<n> exit 1 if uid != <n>\n"
+ " --is-gid=<n> exit 1 if gid != <n>\n"
+ " --keep=<n> set keep-capability bit to <n>\n"
" --killit=<n> send signal(n) to child\n"
- " --forkfor=<n> fork and make child sleep for <n> sec\n"
+ " --license display license info\n"
+ " --modes list libcap named capability modes\n"
+ " --mode=<xxx> set capability mode to <xxx>\n"
+ " --no-new-privs set sticky process privilege limiter\n"
+ " --noamb reset (drop) all ambient capabilities\n"
+ " --print display capability relevant state\n"
+ " --secbits=<n> write a new value for securebits\n"
+ " --shell=/xx/yy use /xx/yy instead of " SHELL " for --\n"
+ " --supports=xxx exit 1 if capability xxx unsupported\n"
+ " --uid=<n> set uid to <n> (hint: id <username>)\n"
+ " --user=<name> set uid,gid and groups to that of user\n"
" == re-exec(capsh) with args as for --\n"
- " -- remaing arguments are for /bin/bash\n"
+ " -- remaining arguments are for " SHELL "\n"
" (without -- [%s] will simply exit(0))\n",
argv[0], argv[0]);
-
- exit(strcmp("--help", argv[i]) != 0);
+ if (strcmp("--help", argv[1]) && strcmp("-h", argv[1])) {
+ exit(1);
+ }
+ exit(0);
}
}
diff --git a/progs/getcap.c b/progs/getcap.c
index f6debc0..208bd6a 100644
--- a/progs/getcap.c
+++ b/progs/getcap.c
@@ -1,9 +1,10 @@
/*
- * Copyright (c) 1997,2007 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997,2007 Andrew G. Morgan <morgan@kernel.org>
*
* This displays the capabilities of a given file.
*/
+#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#include <errno.h>
@@ -20,15 +21,16 @@
static int verbose = 0;
static int recursive = 0;
+static int namespace = 0;
-static void usage(void)
+static void usage(int code)
{
fprintf(stderr,
- "usage: getcap [-v] [-r] [-h] <filename> [<filename> ...]\n"
- "\n"
- "\tdisplays the capabilities on the queried file(s).\n"
+ "usage: getcap [-h] [-l] [-n] [-r] [-v] <filename> [<filename> ...]\n"
+ "\n"
+ "\tdisplays the capabilities on the queried file(s).\n"
);
- exit(1);
+ exit(code);
}
static int do_getcap(const char *fname, const struct stat *stbuf,
@@ -36,6 +38,7 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
{
cap_t cap_d;
char *result;
+ uid_t rootid;
if (tflag != FTW_F) {
if (verbose) {
@@ -47,7 +50,7 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
cap_d = cap_get_file(fname);
if (cap_d == NULL) {
if (errno != ENODATA) {
- fprintf(stderr, "Failed to get capabilities of file `%s' (%s)\n",
+ fprintf(stderr, "Failed to get capabilities of file '%s' (%s)\n",
fname, strerror(errno));
} else if (verbose) {
printf("%s\n", fname);
@@ -58,12 +61,17 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
result = cap_to_text(cap_d, NULL);
if (!result) {
fprintf(stderr,
- "Failed to get capabilities of human readable format at `%s' (%s)\n",
+ "Failed to get capabilities of human readable format at '%s' (%s)\n",
fname, strerror(errno));
cap_free(cap_d);
return 0;
}
- printf("%s %s\n", fname, result);
+ rootid = cap_get_nsowner(cap_d);
+ if (namespace && (rootid+1 > 1)) {
+ printf("%s %s [rootid=%d]\n", fname, result, rootid);
+ } else {
+ printf("%s %s\n", fname, result);
+ }
cap_free(cap_d);
cap_free(result);
@@ -74,7 +82,7 @@ int main(int argc, char **argv)
{
int i, c;
- while ((c = getopt(argc, argv, "rvh")) > 0) {
+ while ((c = getopt(argc, argv, "rvhnl")) > 0) {
switch(c) {
case 'r':
recursive = 1;
@@ -82,13 +90,23 @@ int main(int argc, char **argv)
case 'v':
verbose = 1;
break;
+ case 'n':
+ namespace = 1;
+ break;
+ case 'h':
+ usage(0);
+ case 'l':
+ printf("%s has a you choose license: BSD 3-clause or GPL2\n"
+ "Copyright (c) 1997,2007 Andrew G. Morgan"
+ " <morgan@kernel.org>\n", argv[0]);
+ exit(0);
default:
- usage();
+ usage(1);
}
}
if (!argv[optind])
- usage();
+ usage(1);
for (i=optind; argv[i] != NULL; i++) {
struct stat stbuf;
diff --git a/progs/getpcaps.c b/progs/getpcaps.c
index e405a92..5bc511e 100644
--- a/progs/getpcaps.c
+++ b/progs/getpcaps.c
@@ -11,23 +11,28 @@
#include <stdlib.h>
#include <sys/capability.h>
-static void usage(void)
+static void usage(int code)
{
fprintf(stderr,
"usage: getcaps <pid> [<pid> ...]\n\n"
" This program displays the capabilities on the queried process(es).\n"
-" The capabilities are displayed in the cap_from_text(3) format.\n\n"
-"[Copyright (c) 1997-8,2007 Andrew G. Morgan <morgan@kernel.org>]\n"
- );
- exit(1);
+ " The capabilities are displayed in the cap_from_text(3) format.\n"
+ "\n"
+ " Optional arguments:\n"
+ " --help, -h or --usage display this message.\n"
+ " --verbose use a more verbose output format.\n"
+ " --ugly or --legacy use the archaic legacy output format.\n"
+ " --license display license info\n");
+ exit(code);
}
int main(int argc, char **argv)
{
int retval = 0;
+ int verbose = 0;
if (argc < 2) {
- usage();
+ usage(1);
}
for ( ++argv; --argc > 0; ++argv ) {
@@ -35,17 +40,39 @@ int main(int argc, char **argv)
int pid;
cap_t cap_d;
+ if (!strcmp(argv[0], "--help") || !strcmp(argv[0], "--usage") ||
+ !strcmp(argv[0], "-h")) {
+ usage(0);
+ } else if (!strcmp(argv[0], "--license")) {
+ printf("%s has a you choose license: BSD 3-clause or GPL2\n"
+"[Copyright (c) 1997-8,2007,2019 Andrew G. Morgan <morgan@kernel.org>]\n",
+ argv[0]);
+ exit(0);
+ } else if (!strcmp(argv[0], "--verbose")) {
+ verbose = 1;
+ continue;
+ } else if (!strcmp(argv[0], "--ugly") || !strcmp(argv[0], "--legacy")) {
+ verbose = 2;
+ continue;
+ }
+
pid = atoi(argv[0]);
cap_d = cap_get_pid(pid);
if (cap_d == NULL) {
- fprintf(stderr, "Failed to get cap's for proccess %d:"
+ fprintf(stderr, "Failed to get cap's for process %d:"
" (%s)\n", pid, strerror(errno));
retval = 1;
continue;
} else {
char *result = cap_to_text(cap_d, &length);
- fprintf(stderr, "Capabilities for `%s': %s\n", *argv, result);
+ if (verbose == 1) {
+ printf("Capabilities for '%s': %s\n", *argv, result);
+ } else if (verbose == 2) {
+ fprintf(stderr, "Capabilities for `%s': %s\n", *argv, result);
+ } else {
+ printf("%s: %s\n", *argv, result);
+ }
cap_free(result);
result = NULL;
cap_free(cap_d);
diff --git a/progs/quicktest.sh b/progs/quicktest.sh
index e8b2c8e..6aa2598 100755
--- a/progs/quicktest.sh
+++ b/progs/quicktest.sh
@@ -44,9 +44,15 @@ pass_capsh () {
pass_capsh --print
+# Validate that PATH expansion works
+PATH=$(/bin/pwd)/junk:$(/bin/pwd) capsh == == == --modes
+if [ $? -ne 0 ]; then
+ echo "Failed to execute capsh consecutively for capability manipulation"
+ exit 1
+fi
# Make a local non-setuid-0 version of capsh and call it privileged
-cp ./capsh ./privileged && chmod -s ./privileged
+cp ./tcapsh-static ./privileged && /bin/chmod -s ./privileged
if [ $? -ne 0 ]; then
echo "Failed to copy capsh for capability manipulation"
exit 1
@@ -64,21 +70,37 @@ if [ $? -ne 0 ]; then
exit 1
fi
+# validate libcap modes:
+pass_capsh --inh=cap_chown --mode=PURE1E --print --inmode=PURE1E
+pass_capsh --mode=NOPRIV --print --inmode=NOPRIV
+pass_capsh --mode=PURE1E --print --mode=NOPRIV --inmode=NOPRIV
+fail_capsh --mode=NOPRIV --print --mode=PURE1E
+fail_capsh --user=nobody --mode=NOPRIV --print -- ./privileged
+
+# simple IAB setting (no ambient) in pure1e mode.
+pass_capsh --mode=PURE1E --iab='!%cap_chown,cap_sys_admin'
+
# Explore keep_caps support
pass_capsh --keep=0 --keep=1 --keep=0 --keep=1 --print
-rm -f tcapsh
-cp capsh tcapsh
-chown root.root tcapsh
-chmod u+s tcapsh
-ls -l tcapsh
+/bin/rm -f tcapsh
+/bin/cp tcapsh-static tcapsh
+/bin/chown root.root tcapsh
+/bin/chmod u+s tcapsh
+/bin/ls -l tcapsh
-# leverage keep caps maintain capabilities accross a change of uid
+# leverage keep caps to maintain capabilities accross a change of euid
# from setuid root to capable luser (as per wireshark/dumpcap 0.99.7)
-pass_capsh --uid=500 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --uid=500 --caps=\"cap_net_raw,cap_net_admin=pie\" --print"
+# This test is subtle. It is testing that a change to self, dropping
+# euid=0 back to that of the luser keeps capabilities.
+pass_capsh --uid=1 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --print --uid=1 --print --caps=\"cap_net_raw,cap_net_admin=pie\" --print"
+
+# this test is a change of user to a new user, note we need to raise
+# the cap_setuid capability (libcap has a function for that) in this case.
+pass_capsh --uid=1 -- -c "./tcapsh --caps=\"cap_net_raw,cap_net_admin=ip cap_setuid=p\" --print --cap-uid=2 --print --caps=\"cap_net_raw,cap_net_admin=pie\" --print"
# This fails, on 2.6.24, but shouldn't
-pass_capsh --uid=500 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --uid=500 --forkfor=10 --caps= --print --killit=9 --print"
+pass_capsh --uid=1 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --uid=1 --forkfor=10 --caps= --print --killit=9 --print"
# only continue with these if --secbits is supported
./capsh --secbits=0x2f > /dev/null 2>&1
@@ -98,7 +120,7 @@ fail_capsh --secbits=32 --keep=1 --keep=0 --print
pass_capsh --secbits=10 --keep=0 --keep=1 --print
fail_capsh --secbits=47 -- -c "./tcapsh --uid=$nouid"
-rm -f tcapsh
+/bin/rm -f tcapsh
# Suppress uid=0 privilege
fail_capsh --secbits=47 --print -- -c "./capsh --uid=$nouid"
@@ -115,12 +137,10 @@ fail_capsh --drop=cap_setuid --secbits=0x2f --print -- -c "./privileged --uid=$n
# Note, the bounding set (edited with --drop) only limits p
# capabilities, not i's.
pass_capsh --secbits=47 --inh=cap_setuid,cap_setgid --drop=cap_setuid \
- --uid=500 --print -- -c "./privileged --uid=$nouid"
-
-rm -f ./privileged
+ --uid=1 --print -- -c "./privileged --uid=$nouid"
# test that we do not support capabilities on setuid shell-scripts
-cat > hack.sh <<EOF
+/bin/cat > hack.sh <<EOF
#!/bin/bash
/usr/bin/id
mypid=\$\$
@@ -134,20 +154,109 @@ else
fi
exit 0
EOF
-chmod +xs hack.sh
-./capsh --uid=500 --inh=none --print -- ./hack.sh
+/bin/chmod +xs hack.sh
+./capsh --uid=1 --inh=none --print -- ./hack.sh
status=$?
-rm -f ./hack.sh
+/bin/rm -f ./hack.sh
if [ $status -ne 0 ]; then
echo "shell scripts can have capabilities (bug)"
exit 1
fi
-# Max lockdown
+# Max lockdown (ie., pure capability model as POSIX.1e intended).
+secbits=0x2f
+if ./capsh --has-ambient ; then
+ secbits="0xef --noamb"
+fi
pass_capsh --keep=1 --uid=$nouid --caps=cap_setpcap=ep \
- --drop=all --secbits=0x2f --caps= --print
+ --drop=all --secbits=$secbits --caps= --print
# Verify we can chroot
pass_capsh --chroot=$(/bin/pwd)
-pass_capsh --chroot=$(/bin/pwd) ==
+pass_capsh -- -c "./tcapsh-static --chroot=$(/bin/pwd) =="
fail_capsh --chroot=$(/bin/pwd) -- -c "echo oops"
+
+./capsh --has-ambient
+if [ $? -eq 0 ]; then
+ echo "test ambient capabilities"
+
+ # Ambient capabilities (any file can inherit capabilities)
+ pass_capsh --noamb
+
+ # test that shell scripts can inherit through ambient capabilities
+ /bin/cat > hack.sh <<EOF
+#!/bin/bash
+/usr/bin/id
+mypid=\$\$
+caps=\$(./getpcaps \$mypid 2>&1 | /usr/bin/cut -d: -f2)
+if [ "\$caps" != " = cap_setuid+i" ]; then
+ echo "Shell script got [\$caps]"
+ exit 0
+fi
+ls -l \$0
+echo "no capabilities [\$caps] for this shell script"
+exit 1
+EOF
+ /bin/chmod +x hack.sh
+ pass_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- ./hack.sh
+
+ /bin/rm -f hack.sh
+
+ # Next force the privileged binary to have an empty capability set.
+ # This is sort of the opposite of privileged - it should ensure that
+ # the file can never aquire privilege by the ambient method.
+ ./setcap = ./privileged
+ fail_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- -c "./privileged --print --uid=1"
+
+ # finally remove the capability from the privileged binary and try again.
+ ./setcap -r ./privileged
+ pass_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- -c "./privileged --print --uid=1"
+
+ # validate IAB setting with an ambient capability
+ pass_capsh --iab='!%cap_chown,^cap_setpcap,cap_sys_admin'
+ fail_capsh --mode=PURE1E --iab='!%cap_chown,^cap_sys_admin'
+fi
+/bin/rm -f ./privileged
+
+echo "testing namespaced file caps"
+
+# nsprivileged capsh will have an ns rootid value (this is
+# the same setup as an earlier test but with a ns file cap).
+rm -f nsprivileged
+cp ./tcapsh-static ./nsprivileged && /bin/chmod -s ./nsprivileged
+./setcap -n 1 all=ep ./nsprivileged
+if [ $? -eq 0 ]; then
+ ./getcap -n ./nsprivileged | fgrep "[rootid=1]"
+ if [ $? -ne 0 ]; then
+ echo "FAILED setting ns rootid on file"
+ exit 1
+ fi
+ # since this is a ns file cap and not a regular one, it should not
+ # lead to a privilege escalation outside of the namespace it
+ # refers to. We suppress uid=0 privilege and confirm this
+ # nsprivileged binary does not have the power to change uid.
+ fail_capsh --secbits=$secbits --print -- -c "./nsprivileged --uid=$nouid"
+else
+ echo "ns file caps not supported - skipping test"
+fi
+rm -f nsprivileged
+
+# If the build tree compiled the Go cap package.
+if [ -f ../go/compare-cap ]; then
+ cp ../go/compare-cap .
+ LD_LIBRARY_PATH=../libcap ./compare-cap
+ if [ $? -ne 0 ]; then
+ echo "FAILED to execute go binary"
+ exit 1
+ fi
+ LD_LIBRARY_PATH=../libcap ./compare-cap 2>&1 | grep "skipping file cap tests"
+ if [ $? -eq 0 ]; then
+ echo "FAILED not engaging file cap tests"
+ fi
+ echo "PASSED"
+else
+ echo "no Go support compiled, so skipping Go tests"
+fi
+rm -f compare-cap
+
+echo "ALL TESTS PASSED!"
diff --git a/progs/setcap.c b/progs/setcap.c
index 7304343..930429a 100644
--- a/progs/setcap.c
+++ b/progs/setcap.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997,2007-8 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997,2007-8,2020 Andrew G. Morgan <morgan@kernel.org>
*
* This sets/verifies the capabilities of a given file.
*/
@@ -11,15 +11,24 @@
#include <sys/capability.h>
#include <unistd.h>
-static void usage(void)
+static void usage(int status)
{
fprintf(stderr,
- "usage: setcap [-q] [-v] (-r|-|<caps>) <filename> "
+ "usage: setcap [-h] [-q] [-v] [-n <rootid>] (-r|-|<caps>) <filename> "
"[ ... (-r|-|<capsN>) <filenameN> ]\n"
"\n"
" Note <filename> must be a regular (non-symlink) file.\n"
+ " -r remove capability from file\n"
+ " - read capability text from stdin\n"
+ " <capsN> cap_from_text(3) formatted file capability\n"
+ "\n"
+ " -h this message and exit status 0\n"
+ " -q quietly\n"
+ " -v validate supplied capability matches file\n"
+ " -n <rootid> write a user namespace limited capability\n"
+ " --license display the license info\n"
);
- exit(1);
+ exit(status);
}
#define MAXCAP 2048
@@ -60,12 +69,13 @@ int main(int argc, char **argv)
{
int tried_to_cap_setfcap = 0;
char buffer[MAXCAP+1];
- int retval, quiet=0, verify=0;
+ int retval, quiet = 0, verify = 0;
cap_t mycaps;
cap_value_t capflag;
+ uid_t rootid = 0, f_rootid;
- if (argc < 3) {
- usage();
+ if (argc < 2) {
+ usage(1);
}
mycaps = cap_get_proc();
@@ -82,10 +92,33 @@ int main(int argc, char **argv)
quiet = 1;
continue;
}
+ if (!strcmp("--license", *argv)) {
+ printf(
+ "%s has a you choose license: BSD 3-clause or GPL2\n"
+ "Copyright (c) 1997,2007-8,2020 Andrew G. Morgan"
+ " <morgan@kernel.org>\n", argv[0]);
+ exit(0);
+ }
+ if (!strcmp(*argv, "-h")) {
+ usage(0);
+ }
if (!strcmp(*argv, "-v")) {
verify = 1;
continue;
}
+ if (!strcmp(*argv, "-n")) {
+ if (argc < 2) {
+ fprintf(stderr, "usage: .. -n <rootid> .. - rootid!=0 file caps");
+ exit(1);
+ }
+ --argc;
+ rootid = (uid_t) atoi(*++argv);
+ if (rootid+1 < 2) {
+ fprintf(stderr, "invalid rootid!=0 of '%s'", *argv);
+ exit(1);
+ }
+ continue;
+ }
if (!strcmp(*argv, "-r")) {
cap_d = NULL;
@@ -93,7 +126,7 @@ int main(int argc, char **argv)
if (!strcmp(*argv,"-")) {
retval = read_caps(quiet, *argv, buffer);
if (retval)
- usage();
+ usage(1);
text = buffer;
} else {
text = *argv;
@@ -102,7 +135,11 @@ int main(int argc, char **argv)
cap_d = cap_from_text(text);
if (cap_d == NULL) {
perror("fatal error");
- usage();
+ usage(1);
+ }
+ if (cap_set_nsowner(cap_d, rootid)) {
+ perror("unable to set nsowner");
+ exit(1);
}
#ifdef DEBUG
{
@@ -116,7 +153,7 @@ int main(int argc, char **argv)
}
if (--argc <= 0)
- usage();
+ usage(1);
/*
* Set the filesystem capability for this file.
*/
@@ -135,10 +172,14 @@ int main(int argc, char **argv)
}
cmp = cap_compare(cap_on_file, cap_d);
+ f_rootid = cap_get_nsowner(cap_on_file);
cap_free(cap_on_file);
- if (cmp != 0) {
+ if (cmp != 0 || rootid != f_rootid) {
if (!quiet) {
+ if (rootid != f_rootid) {
+ printf("nsowner[got=%d, want=%d],", f_rootid, rootid);
+ }
printf("%s differs in [%s%s%s]\n", *argv,
CAP_DIFFERS(cmp, CAP_PERMITTED) ? "p" : "",
CAP_DIFFERS(cmp, CAP_INHERITABLE) ? "i" : "",
@@ -172,6 +213,7 @@ int main(int argc, char **argv)
if (retval != 0) {
int explained = 0;
int oerrno = errno;
+ int somebits = 0;
#ifdef linux
cap_value_t cap;
cap_flag_value_t per_state;
@@ -179,24 +221,28 @@ int main(int argc, char **argv)
for (cap = 0;
cap_get_flag(cap_d, cap, CAP_PERMITTED, &per_state) != -1;
cap++) {
- cap_flag_value_t inh_state, eff_state;
+ cap_flag_value_t inh_state, eff_state, combined;
cap_get_flag(cap_d, cap, CAP_INHERITABLE, &inh_state);
cap_get_flag(cap_d, cap, CAP_EFFECTIVE, &eff_state);
- if ((inh_state | per_state) != eff_state) {
- fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n"
- " exactly match the union of selected permitted and inheritable bits.\n");
+ combined = (inh_state | per_state);
+ somebits |= !!eff_state;
+ if (combined != eff_state) {
explained = 1;
break;
}
}
+ if (somebits && explained) {
+ fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n"
+ " exactly match the union of selected permitted and inheritable bits.\n");
+ }
#endif /* def linux */
-
+
fprintf(stderr,
"Failed to set capabilities on file `%s' (%s)\n",
argv[0], strerror(oerrno));
if (!explained) {
- usage();
+ usage(1);
}
}
}
diff --git a/psx/LICENSE b/psx/LICENSE
new file mode 100644
index 0000000..e2574a7
--- /dev/null
+++ b/psx/LICENSE
@@ -0,0 +1,385 @@
+Unless otherwise *explicitly* stated, the following text describes the
+licensed conditions under which the contents of this libcap/psx release
+may be used and distributed:
+
+-------------------------------------------------------------------------
+Redistribution and use in source and binary forms of libcap/psx, with
+or without modification, are permitted provided that the following
+conditions are met:
+
+1. Redistributions of source code must retain any existing copyright
+ notice, and this entire permission notice in its entirety,
+ including the disclaimer of warranties.
+
+2. Redistributions in binary form must reproduce all prior and current
+ copyright notices, this list of conditions, and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+3. The name of any author may not be used to endorse or promote
+ products derived from this software without their specific prior
+ written permission.
+
+ALTERNATIVELY, this product may be distributed under the terms of the
+GNU General Public License (v2.0 - see below), in which case the
+provisions of the GNU GPL are required INSTEAD OF the above
+restrictions. (This clause is necessary due to a potential conflict
+between the GNU GPL and the restrictions contained in a BSD-style
+copyright.)
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+-------------------------------------------------------------------------
+
+-------------------------
+Full text of gpl-2.0.txt:
+-------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/psx/README b/psx/README
new file mode 100644
index 0000000..cd9c651
--- /dev/null
+++ b/psx/README
@@ -0,0 +1,28 @@
+Package "psx" provides an API for invoking system calls in a way that
+each system call is mirrored on all OS threads of the combined Go/CGo
+runtime. Since the Go runtime treats OS threads as interchangeable, a
+feature like this is needed to meaningfully change process privilege
+(including dropping privilege) in a Go program running on Linux. This
+package is required by:
+
+ "kernel.org/pub/linux/libs/security/libcap/cap"
+
+When compiled CGO_ENABLED=0, the functionality requires go1.16+ to
+build. That release of Go introduced syscall.AllThreadsSyscall*()
+APIs. When compiled this way, the "psx" package functions
+psx.Syscall3() and psx.Syscall6() are aliased to
+syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6()
+respectively.
+
+When compiled CGO_ENABLED=1, the functionality is implemented by C
+code, [lib]psx, which is distributed with libcap.
+
+The official release announcement site for libcap and libpsx is:
+
+ https://sites.google.com/site/fullycapable/
+
+Like libcap/libpsx itself, the "psx" package is distributed with a
+"you choose" License. Specifically: BSD three clause, or GPL2. See the
+LICENSE file.
+
+Andrew G. Morgan <morgan@kernel.org>
diff --git a/psx/doc.go b/psx/doc.go
new file mode 100644
index 0000000..e6f9013
--- /dev/null
+++ b/psx/doc.go
@@ -0,0 +1,60 @@
+// Package psx provides support for system calls that are run
+// simultanously on all threads under Linux.
+//
+// This property can be used to work around a historical lack of
+// native Go support for such a feature. Something that is the subject
+// of:
+//
+// https://github.com/golang/go/issues/1435
+//
+// The package works differently depending on whether or not
+// CGO_ENABLED is 0 or 1.
+//
+// In the former case, psx is a low overhead wrapper for the two
+// native go calls: syscall.AllThreadsSyscall() and
+// syscall.AllThreadsSyscall6() [expected to be] introduced in
+// go1.16. We provide this wrapping to minimize client source code
+// changes when compiling with or without CGo enabled.
+//
+// In the latter case, and toolchains prior to go1.16, it works via
+// CGo wrappers for system call functions that call the C [lib]psx
+// functions of these names. This ensures that the system calls
+// execute simultaneously on all the pthreads of the Go (and CGo)
+// combined runtime.
+//
+// With CGo, the psx support works in the following way: the pthread
+// that is first asked to execute the syscall does so, and determines
+// if it succeeds or fails. If it fails, it returns immediately
+// without attempting the syscall on other pthreads. If the initial
+// attempt succeeds, however, then the runtime is stopped in order for
+// the same system call to be performed on all the remaining pthreads
+// of the runtime. Once all pthreads have completed the syscall, the
+// return codes are those obtained by the first pthread's invocation
+// of the syscall.
+//
+// Note, there is no need to use this variant of syscall where the
+// syscalls only read state from the kernel. However, since Go's
+// runtime freely migrates code execution between pthreads, support of
+// this type is required for any successful attempt to fully drop or
+// modify the privilege of a running Go program under Linux.
+//
+// More info on how Linux privilege works and examples of using this
+// package can be found here:
+//
+// https://sites.google.com/site/fullycapable
+//
+// WARNING: For older go toolchains (prior to go1.15), correct
+// compilation of this package may require an extra workaround step:
+//
+// The workaround is to build with the following CGO_LDFLAGS_ALLOW in
+// effect (here the syntax is that of bash for defining an environment
+// variable):
+//
+// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+//
+//
+// Copyright (c) 2019,20 Andrew G. Morgan <morgan@kernel.org>
+//
+// The psx package is licensed with a (you choose) BSD 3-clause or
+// GPL2. See LICENSE file for details.
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
diff --git a/psx/go.mod b/psx/go.mod
new file mode 100644
index 0000000..c5f34b6
--- /dev/null
+++ b/psx/go.mod
@@ -0,0 +1,3 @@
+module kernel.org/pub/linux/libs/security/libcap/psx
+
+go 1.11
diff --git a/psx/psx.c b/psx/psx.c
new file mode 100644
index 0000000..4de3653
--- /dev/null
+++ b/psx/psx.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 2019-21 Andrew G Morgan <morgan@kernel.org>
+ *
+ * This file contains a collection of routines that perform thread
+ * synchronization to ensure that a whole process is running as a
+ * single privilege entity - independent of the number of pthreads.
+ *
+ * The whole file would be unnecessary if glibc exported an explicit
+ * psx_syscall()-like function that leveraged the nptl:setxid
+ * mechanism to synchronize thread state over the whole process.
+ */
+#undef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "psx_syscall.h"
+
+/*
+ * psx_load_syscalls() can be weakly defined in dependent libraries to
+ * provide a mechanism for a library to optionally leverage this psx
+ * mechanism. Specifically, when libcap calls psx_load_sycalls() it
+ * provides a weakly declared default that maps its system calls to
+ * the regular system call functions. However, when linked with psx,
+ * this function here overrides the syscalls to be the psx ones.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+ long int, long int, long int),
+ long int (**syscall6_fn)(long int,
+ long int, long int, long int,
+ long int, long int, long int))
+{
+ *syscall_fn = psx_syscall3;
+ *syscall6_fn = psx_syscall6;
+}
+
+/*
+ * type to keep track of registered threads.
+ */
+typedef struct registered_thread_s {
+ struct registered_thread_s *next, *prev;
+ pthread_t thread;
+ pthread_mutex_t mu;
+ int pending;
+ int gone;
+} registered_thread_t;
+
+static pthread_once_t psx_tracker_initialized = PTHREAD_ONCE_INIT;
+
+typedef enum {
+ _PSX_IDLE = 0,
+ _PSX_SETUP = 1,
+ _PSX_SYSCALL = 2,
+ _PSX_CREATE = 3,
+ _PSX_INFORK = 4,
+ _PSX_EXITING = 5,
+} psx_tracker_state_t;
+
+/*
+ * This global structure holds the global coordination state for
+ * libcap's psx_posix_syscall() support.
+ */
+static struct psx_tracker_s {
+ int has_forked;
+
+ pthread_mutex_t state_mu;
+ pthread_cond_t cond; /* this is only used to wait on 'state' changes */
+ psx_tracker_state_t state;
+ int initialized;
+ int psx_sig;
+
+ struct {
+ long syscall_nr;
+ long arg1, arg2, arg3, arg4, arg5, arg6;
+ int six;
+ int active;
+ } cmd;
+
+ struct sigaction sig_action;
+ struct sigaction chained_action;
+ registered_thread_t *root;
+} psx_tracker;
+
+/*
+ * psx_action_key is used for thread local storage of the thread's
+ * registration.
+ */
+pthread_key_t psx_action_key;
+
+/*
+ * psx_do_registration called locked and creates a tracker entry for
+ * the current thread with a TLS specific key pointing at the threads
+ * specific tracker.
+ */
+static void *psx_do_registration(void) {
+ registered_thread_t *node = calloc(1, sizeof(registered_thread_t));
+ pthread_mutex_init(&node->mu, NULL);
+ node->thread = pthread_self();
+ pthread_setspecific(psx_action_key, node);
+ node->next = psx_tracker.root;
+ if (node->next) {
+ node->next->prev = node;
+ }
+ psx_tracker.root = node;
+ return node;
+}
+
+/*
+ * psx_posix_syscall_actor performs the system call on the targeted
+ * thread and signals it is no longer pending.
+ */
+static void psx_posix_syscall_actor(int signum, siginfo_t *info, void *ignore) {
+ /* bail early if this isn't something we recognize */
+ if (signum != psx_tracker.psx_sig || !psx_tracker.cmd.active ||
+ info == NULL || info->si_code != SI_TKILL || info->si_pid != getpid()) {
+ if (psx_tracker.chained_action.sa_sigaction != 0) {
+ psx_tracker.chained_action.sa_sigaction(signum, info, ignore);
+ }
+ return;
+ }
+
+ if (!psx_tracker.cmd.six) {
+ (void) syscall(psx_tracker.cmd.syscall_nr,
+ psx_tracker.cmd.arg1,
+ psx_tracker.cmd.arg2,
+ psx_tracker.cmd.arg3);
+ } else {
+ (void) syscall(psx_tracker.cmd.syscall_nr,
+ psx_tracker.cmd.arg1,
+ psx_tracker.cmd.arg2,
+ psx_tracker.cmd.arg3,
+ psx_tracker.cmd.arg4,
+ psx_tracker.cmd.arg5,
+ psx_tracker.cmd.arg6);
+ }
+
+ /*
+ * This handler can only be called on registered threads which
+ * have had this specific defined at start-up. (But see the
+ * subsequent test.)
+ */
+ registered_thread_t *ref = pthread_getspecific(psx_action_key);
+ if (ref) {
+ pthread_mutex_lock(&ref->mu);
+ ref->pending = 0;
+ pthread_mutex_unlock(&ref->mu);
+ } /*
+ * else thread must be dying and its psx_action_key has already
+ * been cleaned up.
+ */
+}
+
+/*
+ * Some forward declarations for the initialization
+ * psx_syscall_start() routine.
+ */
+static void _psx_prepare_fork(void);
+static void _psx_fork_completed(void);
+static void _psx_forked_child(void);
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg);
+
+/*
+ * psx requires this function to be provided by the linkage wrapping.
+ */
+extern int __real_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg);
+
+/*
+ * psx_confirm_sigaction reconfirms that the psx handler is the first
+ * handler to respond to the psx signal. It assumes that
+ * psx_tracker.psx_sig has been set.
+ */
+static void psx_confirm_sigaction(void) {
+ sigset_t mask, orig;
+ struct sigaction existing_sa;
+
+ /*
+ * Block interrupts while potentially rewriting the handler.
+ */
+ sigemptyset(&mask);
+ sigaddset(&mask, psx_tracker.psx_sig);
+ sigprocmask(SIG_BLOCK, &mask, &orig);
+
+ sigaction(psx_tracker.psx_sig, NULL, &existing_sa);
+ if (existing_sa.sa_sigaction != psx_posix_syscall_actor) {
+ memcpy(&psx_tracker.chained_action, &existing_sa, sizeof(struct sigaction));
+ psx_tracker.sig_action.sa_sigaction = psx_posix_syscall_actor;
+ sigemptyset(&psx_tracker.sig_action.sa_mask);
+ psx_tracker.sig_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
+ sigaction(psx_tracker.psx_sig, &psx_tracker.sig_action, NULL);
+ }
+
+ sigprocmask(SIG_SETMASK, &orig, NULL);
+}
+
+/*
+ * psx_syscall_start initializes the subsystem including initializing
+ * the mutex.
+ */
+static void psx_syscall_start(void) {
+ pthread_mutex_init(&psx_tracker.state_mu, NULL);
+ pthread_cond_init(&psx_tracker.cond, NULL);
+ pthread_key_create(&psx_action_key, NULL);
+ pthread_atfork(_psx_prepare_fork, _psx_fork_completed, _psx_forked_child);
+
+ /*
+ * All sorts of things are assumed by Linux and glibc and/or musl
+ * about signal handlers and which can be blocked. Go has its own
+ * idiosyncrasies too. We tried SIGRTMAX until
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=210533
+ *
+ * Our current strategy is to aggressively intercept SIGSYS.
+ */
+ psx_tracker.psx_sig = SIGSYS;
+
+ psx_confirm_sigaction();
+ psx_do_registration(); // register the main thread.
+
+ psx_tracker.initialized = 1;
+}
+
+/*
+ * This is the only way this library globally locks. Note, this is not
+ * to be confused with psx_sig (interrupt) blocking - which is
+ * performed around thread creation and when the signal handler is
+ * being confirmed.
+ */
+static void psx_lock(void)
+{
+ pthread_once(&psx_tracker_initialized, psx_syscall_start);
+ pthread_mutex_lock(&psx_tracker.state_mu);
+}
+
+/*
+ * This is the only way this library unlocks.
+ */
+static void psx_unlock(void)
+{
+ pthread_mutex_unlock(&psx_tracker.state_mu);
+}
+
+/*
+ * under lock perform a state transition.
+ */
+static void psx_new_state(psx_tracker_state_t was, psx_tracker_state_t is)
+{
+ psx_lock();
+ while (psx_tracker.state != was) {
+ pthread_cond_wait(&psx_tracker.cond, &psx_tracker.state_mu);
+ }
+ psx_tracker.state = is;
+ if (is == _PSX_IDLE) {
+ /* only announce newly idle states since that is all we wait for */
+ pthread_cond_signal(&psx_tracker.cond);
+ }
+ psx_unlock();
+}
+
+long int psx_syscall3(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3) {
+ return psx_syscall(syscall_nr, arg1, arg2, arg3);
+}
+
+long int psx_syscall6(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3,
+ long int arg4, long int arg5, long int arg6) {
+ return psx_syscall(syscall_nr, arg1, arg2, arg3, arg4, arg5, arg6);
+}
+
+static void _psx_prepare_fork(void) {
+ /*
+ * obtain global lock - we don't want any syscalls while the fork
+ * is occurring since it may interfere with the preparation for
+ * the fork.
+ */
+ psx_new_state(_PSX_IDLE, _PSX_INFORK);
+}
+
+static void _psx_fork_completed(void) {
+ /*
+ * The only way we can get here is if state is _PSX_INFORK and was
+ * previously _PSX_IDLE. Now that the fork has completed, the
+ * parent can continue as if it hadn't happened - the forked child
+ * does not tie its security state to that of the parent process
+ * and threads.
+ *
+ * We don't strictly need to change the psx_tracker.state since we
+ * hold the mutex over the fork, but we do to make deadlock
+ * debugging easier.
+ */
+ psx_new_state(_PSX_INFORK, _PSX_IDLE);
+}
+
+static void _psx_forked_child(void) {
+ /*
+ * The only way we can get here is if state is _PSX_INFORK and was
+ * previously _PSX_IDLE. However, none of the registered threads
+ * exist in this newly minted child process, so we have to reset
+ * the tracking structure to avoid any confusion. We also scuttle
+ * any chance of the PSX API working on more than one thread in
+ * the child by leaving the state as _PSX_INFORK. We do support
+ * all psx_syscall()s by reverting to them being direct in the
+ * fork()ed child.
+ *
+ * We do this because the glibc man page for fork() suggests that
+ * only a subset of things will work post fork(). Specifically,
+ * only a "async-signal-safe functions (see signal- safety(7))
+ * until such time as it calls execve(2)" can be relied upon. That
+ * man page suggests that you can't expect mutexes to work: "not
+ * async-signal-safe because it uses pthread_mutex_lock(3)
+ * internally.".
+ */
+ registered_thread_t *next, *old_root;
+ old_root = psx_tracker.root;
+ psx_tracker.root = NULL;
+
+ psx_tracker.has_forked = 1;
+
+ for (; old_root; old_root = next) {
+ next = old_root->next;
+ memset(old_root, 0, sizeof(*old_root));
+ free(old_root);
+ }
+}
+
+/*
+ * called locked to unregister a node from the tracker.
+ */
+static void psx_do_unregister(registered_thread_t *node) {
+ if (psx_tracker.root == node) {
+ psx_tracker.root = node->next;
+ }
+ if (node->next) {
+ node->next->prev = node->prev;
+ }
+ if (node->prev) {
+ node->prev->next = node->next;
+ }
+ pthread_mutex_destroy(&node->mu);
+ memset(node, 0, sizeof(*node));
+ free(node);
+}
+
+typedef struct {
+ void *(*fn)(void *);
+ void *arg;
+ sigset_t sigbits;
+} psx_starter_t;
+
+/*
+ * _psx_exiting is used to cleanup the node for the thread on its exit
+ * path. This is needed for musl libc:
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=208477
+ *
+ * and likely wise for glibc too:
+ *
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=12889
+ */
+static void _psx_exiting(void *node) {
+ /*
+ * Until we are in the _PSX_EXITING state, we must not block the
+ * psx_sig interrupt for this dying thread. That is, until this
+ * exiting thread can set ref->gone to 1, this dying thread is
+ * still participating in the psx syscall distribution.
+ *
+ * See https://github.com/golang/go/issues/42494 for a situation
+ * where this code is called with psx_tracker.psx_sig blocked.
+ */
+ sigset_t sigbit, orig_sigbits;
+ sigemptyset(&sigbit);
+ pthread_sigmask(SIG_UNBLOCK, &sigbit, &orig_sigbits);
+ sigaddset(&sigbit, psx_tracker.psx_sig);
+ pthread_sigmask(SIG_UNBLOCK, &sigbit, NULL);
+
+ /*
+ * With psx_tracker.psx_sig unblocked we can wait until this
+ * thread can enter the _PSX_EXITING state.
+ */
+ psx_new_state(_PSX_IDLE, _PSX_EXITING);
+
+ /*
+ * We now indicate that this thread is no longer participating in
+ * the psx mechanism.
+ */
+ registered_thread_t *ref = node;
+ pthread_mutex_lock(&ref->mu);
+ ref->gone = 1;
+ pthread_mutex_unlock(&ref->mu);
+
+ /*
+ * At this point, we can restore the calling sigmask to whatever
+ * the caller thought was appropriate for a dying thread to have.
+ */
+ pthread_sigmask(SIG_SETMASK, &orig_sigbits, NULL);
+
+ /*
+ * Allow the rest of the psx system carry on as per normal.
+ */
+ psx_new_state(_PSX_EXITING, _PSX_IDLE);
+}
+
+/*
+ * _psx_start_fn is a trampoline for the intended start function, it
+ * is called blocked (_PSX_CREATE), but releases the block before
+ * calling starter->fn. Before releasing the block, the TLS specific
+ * attributes are initialized for use by the interrupt handler under
+ * the psx mutex, so it doesn't race with an interrupt received by
+ * this thread and the interrupt handler does not need to poll for
+ * that specific attribute to be present (which is problematic during
+ * thread shutdown).
+ */
+static void *_psx_start_fn(void *data) {
+ void *node = psx_do_registration();
+
+ psx_new_state(_PSX_CREATE, _PSX_IDLE);
+
+ psx_starter_t *starter = data;
+ pthread_sigmask(SIG_SETMASK, &starter->sigbits, NULL);
+ void *(*fn)(void *) = starter->fn;
+ void *arg = starter->arg;
+
+ memset(data, 0, sizeof(*starter));
+ free(data);
+
+ void *ret;
+
+ pthread_cleanup_push(_psx_exiting, node);
+ ret = fn(arg);
+ pthread_cleanup_pop(1);
+
+ return ret;
+}
+
+/*
+ * __wrap_pthread_create is the wrapped destination of all regular
+ * pthread_create calls.
+ */
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg) {
+ psx_starter_t *starter = calloc(1, sizeof(psx_starter_t));
+ starter->fn = start_routine;
+ starter->arg = arg;
+ /*
+ * Until we are in the _PSX_IDLE state and locked, we must not
+ * block the psx_sig interrupt for this parent thread. Arrange
+ * that parent thread and newly created one can restore signal
+ * mask.
+ */
+ sigset_t sigbit, orig_sigbits;
+ sigemptyset(&sigbit);
+ pthread_sigmask(SIG_UNBLOCK, &sigbit, &starter->sigbits);
+ sigaddset(&sigbit, psx_tracker.psx_sig);
+ pthread_sigmask(SIG_UNBLOCK, &sigbit, &orig_sigbits);
+
+ psx_new_state(_PSX_IDLE, _PSX_CREATE);
+
+ /*
+ * until the child thread has been blessed with its own TLS
+ * specific attribute(s) we prevent either the parent thread or
+ * the new one from experiencing a PSX interrupt.
+ */
+ pthread_sigmask(SIG_BLOCK, &sigbit, NULL);
+
+ int ret = __real_pthread_create(thread, attr, _psx_start_fn, starter);
+ if (ret == -1) {
+ psx_new_state(_PSX_CREATE, _PSX_IDLE);
+ memset(starter, 0, sizeof(*starter));
+ free(starter);
+ } /* else unlock happens in _psx_start_fn */
+
+ /* the parent can once again receive psx interrupt signals */
+ pthread_sigmask(SIG_SETMASK, &orig_sigbits, NULL);
+
+ return ret;
+}
+
+/*
+ * __psx_immediate_syscall does one syscall using the current
+ * process.
+ */
+static long int __psx_immediate_syscall(long int syscall_nr,
+ int count, long int *arg) {
+ psx_tracker.cmd.syscall_nr = syscall_nr;
+ psx_tracker.cmd.arg1 = count > 0 ? arg[0] : 0;
+ psx_tracker.cmd.arg2 = count > 1 ? arg[1] : 0;
+ psx_tracker.cmd.arg3 = count > 2 ? arg[2] : 0;
+
+ if (count > 3) {
+ psx_tracker.cmd.six = 1;
+ psx_tracker.cmd.arg4 = arg[3];
+ psx_tracker.cmd.arg5 = count > 4 ? arg[4] : 0;
+ psx_tracker.cmd.arg6 = count > 5 ? arg[5] : 0;
+ return syscall(syscall_nr,
+ psx_tracker.cmd.arg1,
+ psx_tracker.cmd.arg2,
+ psx_tracker.cmd.arg3,
+ psx_tracker.cmd.arg4,
+ psx_tracker.cmd.arg5,
+ psx_tracker.cmd.arg6);
+ }
+
+ psx_tracker.cmd.six = 0;
+ return syscall(syscall_nr, psx_tracker.cmd.arg1,
+ psx_tracker.cmd.arg2, psx_tracker.cmd.arg3);
+}
+
+/*
+ * __psx_syscall performs the syscall on the current thread and if no
+ * error is detected it ensures that the syscall is also performed on
+ * all (other) registered threads. The return code is the value for
+ * the first invocation. It uses a trick to figure out how many
+ * arguments the user has supplied. The other half of the trick is
+ * provided by the macro psx_syscall() in the <sys/psx_syscall.h>
+ * file. The trick is the 7th optional argument (8th over all) to
+ * __psx_syscall is the count of arguments supplied to psx_syscall.
+ *
+ * User:
+ * psx_syscall(nr, a, b);
+ * Expanded by macro to:
+ * __psx_syscall(nr, a, b, 6, 5, 4, 3, 2, 1, 0);
+ * The eighth arg is now ------------------------------------^
+ */
+long int __psx_syscall(long int syscall_nr, ...) {
+ long int arg[7];
+ int i;
+
+ va_list aptr;
+ va_start(aptr, syscall_nr);
+ for (i = 0; i < 7; i++) {
+ arg[i] = va_arg(aptr, long int);
+ }
+ va_end(aptr);
+
+ int count = arg[6];
+ if (count < 0 || count > 6) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (psx_tracker.has_forked) {
+ return __psx_immediate_syscall(syscall_nr, count, arg);
+ }
+
+ psx_new_state(_PSX_IDLE, _PSX_SETUP);
+ psx_confirm_sigaction();
+
+ long int ret;
+
+ ret = __psx_immediate_syscall(syscall_nr, count, arg);
+ if (ret == -1 || !psx_tracker.initialized) {
+ psx_new_state(_PSX_SETUP, _PSX_IDLE);
+ goto defer;
+ }
+
+ int restore_errno = errno;
+
+ psx_new_state(_PSX_SETUP, _PSX_SYSCALL);
+ psx_tracker.cmd.active = 1;
+
+ pthread_t self = pthread_self();
+ registered_thread_t *next = NULL, *ref;
+
+ psx_lock();
+ for (ref = psx_tracker.root; ref; ref = next) {
+ next = ref->next;
+ if (ref->thread == self) {
+ continue;
+ }
+ pthread_mutex_lock(&ref->mu);
+ ref->pending = 1;
+ int gone = ref->gone;
+ if (!gone) {
+ gone = pthread_kill(ref->thread, psx_tracker.psx_sig) != 0;
+ }
+ pthread_mutex_unlock(&ref->mu);
+ if (!gone) {
+ continue;
+ }
+ /*
+ * need to remove invalid thread id from linked list
+ */
+ psx_do_unregister(ref);
+ }
+ psx_unlock();
+
+ for (;;) {
+ int waiting = 0;
+ psx_lock();
+ for (ref = psx_tracker.root; ref; ref = next) {
+ next = ref->next;
+ if (ref->thread == self) {
+ continue;
+ }
+
+ pthread_mutex_lock(&ref->mu);
+ int pending = ref->pending;
+ int gone = ref->gone;
+ if (pending && !gone) {
+ gone = (pthread_kill(ref->thread, 0) != 0);
+ }
+ pthread_mutex_unlock(&ref->mu);
+ if (!gone) {
+ waiting += pending;
+ continue;
+ }
+ /*
+ * need to remove invalid thread id from linked list
+ */
+ psx_do_unregister(ref);
+ }
+ psx_unlock();
+ if (!waiting) {
+ break;
+ }
+ sched_yield();
+ }
+
+ errno = restore_errno;
+ psx_tracker.cmd.active = 0;
+ psx_new_state(_PSX_SYSCALL, _PSX_IDLE);
+
+defer:
+ return ret;
+}
diff --git a/psx/psx.go b/psx/psx.go
new file mode 100644
index 0000000..529f19d
--- /dev/null
+++ b/psx/psx.go
@@ -0,0 +1,15 @@
+// +build linux,!cgo
+// +build go1.16
+
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
+
+import (
+ "syscall"
+)
+
+// Syscall3 and Syscall6 are aliases for syscall.AllThreadsSyscall*
+// when compiled CGO_ENABLED=0.
+var (
+ Syscall3 = syscall.AllThreadsSyscall
+ Syscall6 = syscall.AllThreadsSyscall6
+)
diff --git a/psx/psx_cgo.go b/psx/psx_cgo.go
new file mode 100644
index 0000000..c17b4f3
--- /dev/null
+++ b/psx/psx_cgo.go
@@ -0,0 +1,65 @@
+// +build linux,cgo
+
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
+
+import (
+ "runtime"
+ "syscall"
+)
+
+// #cgo LDFLAGS: -lpthread -Wl,-wrap,pthread_create
+//
+// #include <errno.h>
+// #include "psx_syscall.h"
+//
+// long __errno_too(long set_errno) {
+// long v = errno;
+// if (set_errno >= 0) {
+// errno = set_errno;
+// }
+// return v;
+// }
+import "C"
+
+// setErrno returns the current C.errno value and, if v >= 0, sets the
+// CGo errno for a random pthread to value v. If you want some
+// consistency, this needs to be called from runtime.LockOSThread()
+// code. This function is only defined for testing purposes. The psx.c
+// code should properly handle the case that a non-zero errno is saved
+// and restored independently of what these Syscall[36]() functions
+// observe.
+func setErrno(v int) int {
+ return int(C.__errno_too(C.long(v)))
+}
+
+// Syscall3 performs a 3 argument syscall using the libpsx C function
+// psx_syscall3(). Syscall3 differs from syscall.[Raw]Syscall()
+// insofar as it is simultaneously executed on every pthread of the
+// combined Go and CGo runtimes.
+func Syscall3(syscallnr, arg1, arg2, arg3 uintptr) (uintptr, uintptr, syscall.Errno) {
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ v := C.psx_syscall3(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3))
+ var errno syscall.Errno
+ if v < 0 {
+ errno = syscall.Errno(C.__errno_too(-1))
+ }
+ return uintptr(v), uintptr(v), errno
+}
+
+// Syscall6 performs a 6 argument syscall using the libpsx C function
+// psx_syscall6(). Syscall6 differs from syscall.[Raw]Syscall6() insofar as
+// it is simultaneously executed on every pthread of the combined Go
+// and CGo runtimes.
+func Syscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6 uintptr) (uintptr, uintptr, syscall.Errno) {
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ v := C.psx_syscall6(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3), C.long(arg4), C.long(arg5), C.long(arg6))
+ var errno syscall.Errno
+ if v < 0 {
+ errno = syscall.Errno(C.__errno_too(-1))
+ }
+ return uintptr(v), uintptr(v), errno
+}
diff --git a/psx/psx_cgo_test.go b/psx/psx_cgo_test.go
new file mode 100644
index 0000000..090a96a
--- /dev/null
+++ b/psx/psx_cgo_test.go
@@ -0,0 +1,40 @@
+// +build cgo
+
+package psx
+
+import (
+ "runtime"
+ "syscall"
+ "testing"
+)
+
+// The man page for errno indicates that it is never set to zero, so
+// validate that it retains its value over a successful Syscall[36]()
+// and is overwritten on a failing syscall.
+func TestErrno(t *testing.T) {
+ // This testing is much easier if we don't have to guess which
+ // thread is running this Go code.
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ // Start from a known bad state and clean up afterwards.
+ setErrno(int(syscall.EPERM))
+ defer setErrno(0)
+
+ v3, _, errno := Syscall3(syscall.SYS_GETUID, 0, 0, 0)
+ if errno != 0 {
+ t.Fatalf("psx getuid failed: %v", errno)
+ }
+ v6, _, errno := Syscall6(syscall.SYS_GETUID, 0, 0, 0, 0, 0, 0)
+ if errno != 0 {
+ t.Fatalf("psx getuid failed: %v", errno)
+ }
+
+ if v3 != v6 {
+ t.Errorf("psx getuid failed to match v3=%d, v6=%d", v3, v6)
+ }
+
+ if v := setErrno(-1); v != int(syscall.EPERM) {
+ t.Errorf("psx changes prevailing errno got=%v(%d) want=%v", syscall.Errno(v), v, syscall.EPERM)
+ }
+}
diff --git a/psx/psx_syscall.h b/psx/psx_syscall.h
new file mode 100644
index 0000000..4aacfab
--- /dev/null
+++ b/psx/psx_syscall.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * This header, and the -lpsx library, provide a number of things to
+ * support POSIX semantics for syscalls associated with the pthread
+ * library. Linking this code is tricky and is done as follows:
+ *
+ * ld ... -lpsx -lpthread --wrap=pthread_create
+ * or, gcc ... -lpsx -lpthread -Wl,-wrap,pthread_create
+ *
+ * glibc provides a subset of this functionality natively through the
+ * nptl:setxid mechanism and could implement psx_syscall() directly
+ * using that style of functionality but, as of 2019-11-30, the setxid
+ * mechanism is limited to 9 specific set*() syscalls that do not
+ * support the syscall6 API (needed for prctl functions and the ambient
+ * capabilities set for example).
+ */
+
+#ifndef _SYS_PSX_SYSCALL_H
+#define _SYS_PSX_SYSCALL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pthread.h>
+
+/*
+ * psx_syscall performs the specified syscall on all psx registered
+ * threads. The mechanism by which this occurs is much less efficient
+ * than a standard system call on Linux, so it should only be used
+ * when POSIX semantics are required to change process relevant
+ * security state.
+ *
+ * Glibc has native support for POSIX semantics on setgroups() and the
+ * 8 set*[gu]id() functions. So, there is no need to use psx_syscall()
+ * for these calls. This call exists for all the other system calls
+ * that need to maintain parity on all pthreads of a program.
+ *
+ * Some macrology is used to allow the caller to provide only as many
+ * arguments as needed, thus psx_syscall() cannot be used as a
+ * function pointer. For those situations, we define psx_syscall3()
+ * and psx_syscall6().
+ */
+#define psx_syscall(syscall_nr, ...) \
+ __psx_syscall(syscall_nr, __VA_ARGS__, (long int) 6, (long int) 5, \
+ (long int) 4, (long int) 3, (long int) 2, \
+ (long int) 1, (long int) 0)
+long int __psx_syscall(long int syscall_nr, ...);
+long int psx_syscall3(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3);
+long int psx_syscall6(long int syscall_nr,
+ long int arg1, long int arg2, long int arg3,
+ long int arg4, long int arg5, long int arg6);
+
+/*
+ * This function should be used by systems to obtain pointers to the
+ * two syscall functions provided by the PSX library. A linkage trick
+ * is to define this function as weak in a library that can optionally
+ * use libpsx and then, should the caller link -lpsx, that library can
+ * implicitly use these POSIX semantics syscalls. See libcap for an
+ * example of this useage.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+ long int, long int, long int),
+ long int (**syscall6_fn)(long int,
+ long int, long int, long int,
+ long int, long int, long int));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PSX_SYSCALL_H */
diff --git a/psx/psx_test.go b/psx/psx_test.go
new file mode 100644
index 0000000..4b90f63
--- /dev/null
+++ b/psx/psx_test.go
@@ -0,0 +1,68 @@
+package psx
+
+import (
+ "runtime"
+ "syscall"
+ "testing"
+)
+
+func TestSyscall3(t *testing.T) {
+ want := syscall.Getpid()
+ if got, _, err := Syscall3(syscall.SYS_GETPID, 0, 0, 0); err != 0 {
+ t.Errorf("failed to get PID via libpsx: %v", err)
+ } else if int(got) != want {
+ t.Errorf("pid mismatch: got=%d want=%d", got, want)
+ }
+ if got, _, err := Syscall3(syscall.SYS_CAPGET, 0, 0, 0); err != 14 {
+ t.Errorf("malformed capget returned %d: %v (want 14: %v)", err, err, syscall.Errno(14))
+ } else if ^got != 0 {
+ t.Errorf("malformed capget did not return -1, got=%d", got)
+ }
+}
+
+func TestSyscall6(t *testing.T) {
+ want := syscall.Getpid()
+ if got, _, err := Syscall6(syscall.SYS_GETPID, 0, 0, 0, 0, 0, 0); err != 0 {
+ t.Errorf("failed to get PID via libpsx: %v", err)
+ } else if int(got) != want {
+ t.Errorf("pid mismatch: got=%d want=%d", got, want)
+ }
+ if got, _, err := Syscall6(syscall.SYS_CAPGET, 0, 0, 0, 0, 0, 0); err != 14 {
+ t.Errorf("malformed capget errno %d: %v (want 14: %v)", err, err, syscall.Errno(14))
+ } else if ^got != 0 {
+ t.Errorf("malformed capget did not return -1, got=%d", got)
+ }
+}
+
+// killAThread locks the goroutine to a thread and exits. This has the
+// effect of making the go runtime terminate the thread.
+func killAThread(c <-chan struct{}) {
+ runtime.LockOSThread()
+ <-c
+}
+
+// Test to confirm no regression against:
+//
+// https://github.com/golang/go/issues/42494
+func TestThreadChurn(t *testing.T) {
+ const prSetKeepCaps = 8
+
+ for j := 0; j < 4; j++ {
+ kill := (j & 1) != 0
+ sysc := (j & 2) != 0
+ t.Logf("[%d] testing kill=%v, sysc=%v", j, kill, sysc)
+ for i := 50; i > 0; i-- {
+ if kill {
+ c := make(chan struct{})
+ go killAThread(c)
+ close(c)
+ }
+ if sysc {
+ if _, _, e := Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0); e != 0 {
+ t.Fatalf("[%d] psx:prctl(SET_KEEPCAPS, %d) failed: %v", i, i&1, syscall.Errno(e))
+ }
+ }
+ }
+ t.Logf("[%d] PASSED kill=%v, sysc=%v", j, kill, sysc)
+ }
+}
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..ac7ffb0
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,7 @@
+noop
+psx_test
+libcap_psx_test
+libcap_launch_test
+libcap_psx_launch_test
+exploit
+noexploit
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 0000000..1e7039d
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,116 @@
+#
+# NOTE the built tests are all designed to be run from this
+# working directory when built DYNAMIC=yes. That is, they
+# link to the shared libraries in ../libcap/ .
+#
+topdir=$(shell pwd)/..
+include ../Make.Rules
+#
+
+all:
+ make libcap_launch_test
+ifeq ($(PTHREADS),yes)
+ make psx_test libcap_psx_test libcap_psx_launch_test
+endif
+
+install: all
+
+ifeq ($(DYNAMIC),yes)
+LINKEXTRA=-Wl,-rpath,../libcap
+DEPS=../libcap/libcap.so
+ifeq ($(PTHREADS),yes)
+DEPS += ../libcap/libpsx.so
+endif
+else
+LDFLAGS += --static
+DEPS=../libcap/libcap.a
+ifeq ($(PTHREADS),yes)
+DEPS += ../libcap/libpsx.a
+endif
+endif
+
+../libcap/libcap.so:
+ make -C ../libcap libcap.so
+
+../libcap/libcap.a:
+ make -C ../libcap libcap.a
+
+ifeq ($(PTHREADS),yes)
+../libcap/libpsx.so:
+ make -C ../libcap libpsx.so
+
+../libcap/libpsx.a:
+ make -C ../libcap libpsx.a
+endif
+
+../progs/tcapsh-static:
+ make -C ../progs tcapsh-static
+
+test:
+ifeq ($(PTHREADS),yes)
+ make run_psx_test run_libcap_psx_test
+endif
+
+sudotest: test
+ make run_libcap_launch_test
+ifeq ($(PTHREADS),yes)
+ make run_libcap_psx_launch_test run_exploit_test
+endif
+
+# unprivileged
+run_psx_test: psx_test
+ ./psx_test
+
+psx_test: psx_test.c $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBPSXLIB) $(LDFLAGS)
+
+run_libcap_psx_test: libcap_psx_test
+ ./libcap_psx_test
+
+libcap_psx_test: libcap_psx_test.c $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LIBPSXLIB) $(LDFLAGS)
+
+# privileged
+run_libcap_launch_test: libcap_launch_test noop ../progs/tcapsh-static
+ sudo ./libcap_launch_test
+
+run_libcap_psx_launch_test: libcap_psx_launch_test ../progs/tcapsh-static
+ sudo ./libcap_psx_launch_test
+
+libcap_launch_test: libcap_launch_test.c $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LDFLAGS)
+
+# This varies only slightly from the above insofar as it currently
+# only links in the pthreads fork support. TODO() we need to change
+# the source to do something interesting with pthreads.
+libcap_psx_launch_test: libcap_launch_test.c $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) -DWITH_PTHREADS $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LIBPSXLIB) $(LDFLAGS)
+
+
+# This test demonstrates that libpsx is needed to secure multithreaded
+# programs that link against libcap.
+run_exploit_test: exploit noexploit
+ @echo exploit should succeed
+ sudo ./exploit ; if [ $$? -ne 0 ]; then exit 0; else exit 1 ; fi
+ @echo exploit should fail
+ sudo ./noexploit ; if [ $$? -eq 0 ]; then exit 0; else exit 1 ; fi
+
+exploit.o: exploit.c
+ $(CC) $(CFLAGS) $(IPATH) -c $<
+
+exploit: exploit.o $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) -lpthread $(LDFLAGS)
+
+# Note, for some reason, the order of libraries is important to avoid
+# the exploit working for dynamic linking.
+noexploit: exploit.o $(DEPS)
+ $(CC) $(CFLAGS) $(IPATH) $< -o $@ $(LINKEXTRA) $(LIBPSXLIB) $(LIBCAPLIB) $(LDFLAGS)
+
+# This one runs in a chroot with no shared library files.
+noop: noop.c
+ $(CC) $(CFLAGS) $< -o $@ --static
+
+clean:
+ rm -f psx_test libcap_psx_test libcap_launch_test *~
+ rm -f libcap_launch_test libcap_psx_launch_test core noop
+ rm -f exploit noexploit exploit.o
diff --git a/tests/exploit.c b/tests/exploit.c
new file mode 100644
index 0000000..28bac88
--- /dev/null
+++ b/tests/exploit.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2020 Andrew G Morgan <morgan@kernel.org>
+ *
+ * This program exploit demonstrates why libcap alone in a
+ * multithreaded C/C++ program is inherently vulnerable to privilege
+ * escalation.
+ *
+ * The code also serves as a demonstration of how linking with libpsx
+ * can eliminate this vulnerability by maintaining a process wide
+ * common security state.
+ *
+ * The basic idea (which is well known and why POSIX stipulates "posix
+ * semantics" for security relevant state at the abstraction of a
+ * process) is that, because of shared memory, if a single thread alone
+ * is vulnerable to code injection, then it can cause any other thread
+ * to execute arbitrary code. As such, if all but one thread drops
+ * privilege, privilege escalation is somewhat trivial.
+ */
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+/* thread coordination */
+pthread_mutex_t mu;
+pthread_cond_t cond;
+int hits;
+
+/* evidence of highest privilege attained */
+ssize_t greatest_len;
+char *text;
+
+/*
+ * interrupt handler - potentially watching for an opportunity to
+ * perform an exploit when invoked as a privileged thread.
+ */
+static void handler(int signum, siginfo_t *info, void *ignore) {
+ ssize_t length;
+ char *working;
+ pthread_mutex_lock(&mu);
+
+ cap_t caps = cap_get_proc();
+ working = cap_to_text(caps, &length);
+ if (length > greatest_len) {
+ /*
+ * This is where the exploit code might go.
+ */
+ cap_free(text);
+ text = working;
+ greatest_len = length;
+ }
+ cap_free(caps);
+ hits++;
+
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mu);
+
+}
+
+/*
+ * privileged thread code (imagine it doing whatever needs privilege).
+ */
+static void *victim(void *args) {
+ pthread_mutex_lock(&mu);
+ hits = 1;
+ printf("started privileged thread\n");
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mu);
+
+ pthread_mutex_lock(&mu);
+ while (hits < 2) {
+ pthread_cond_wait(&cond, &mu);
+ }
+ pthread_mutex_unlock(&mu);
+
+ return NULL;
+}
+
+int main(int argc, char **argv) {
+ pthread_t peer;
+ cap_t caps = cap_init();
+ struct sigaction sig_action;
+
+ printf("program starting\n");
+ if (pthread_create(&peer, NULL, victim, NULL)) {
+ perror("unable to start the victim thread");
+ exit(1);
+ }
+
+ /*
+ * Wait until the peer thread is fully up.
+ */
+ pthread_mutex_lock(&mu);
+ while (hits < 1) {
+ pthread_cond_wait(&cond, &mu);
+ }
+ pthread_mutex_unlock(&mu);
+
+ printf("dropping privilege from main process thread\n");
+
+ if (cap_set_proc(caps)) {
+ perror("unable to drop capabilities from main process thread");
+ exit(1);
+ }
+ cap_free(caps);
+
+ /* confirm the low privilege of the process' main thread */
+
+ caps = cap_get_proc();
+ text = cap_to_text(caps, &greatest_len);
+ cap_free(caps);
+
+ printf("no privilege in main process thread: len:%ld, caps:\"%s\"\n",
+ greatest_len, text);
+ if (greatest_len != 1) {
+ printf("failed to lower privilege as expected\n");
+ exit(1);
+ }
+
+ /*
+ * So, we have confirmed that this running thread has no
+ * privilege. From this thread we setup an interrupt handler and
+ * then trigger it on the privileged peer thread.
+ */
+
+ sig_action.sa_sigaction = &handler;
+ sigemptyset(&sig_action.sa_mask);
+ sig_action.sa_flags = SA_SIGINFO | SA_RESTART;;
+ sigaction(SIGRTMIN, &sig_action, NULL);
+
+ pthread_kill(peer, SIGRTMIN);
+
+ /*
+ * Wait for the thread to exit.
+ */
+ pthread_join(peer, NULL);
+
+ /*
+ * Let's see how we did with the exploit.
+ */
+
+ printf("greatest privilege in main process thread: len:%ld, caps:\"%s\"\n",
+ greatest_len, text);
+
+ cap_free(text);
+ if (greatest_len != 1) {
+ printf("exploit succeeded\n");
+ exit(1);
+ } else {
+ printf("exploit failed\n");
+ }
+}
diff --git a/tests/libcap_launch_test.c b/tests/libcap_launch_test.c
new file mode 100644
index 0000000..bba38c6
--- /dev/null
+++ b/tests/libcap_launch_test.c
@@ -0,0 +1,173 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/*
+ * tests for cap_launch.
+ */
+
+#define MORE_THAN_ENOUGH 20
+#define NO_MORE 1
+
+struct test_case_s {
+ int pass_on;
+ const char *chroot;
+ uid_t uid;
+ gid_t gid;
+ int ngroups;
+ const gid_t groups[MORE_THAN_ENOUGH];
+ const char *args[MORE_THAN_ENOUGH];
+ const char **envp;
+ const char *iab;
+ cap_mode_t mode;
+ int result;
+};
+
+#ifdef WITH_PTHREADS
+#include <pthread.h>
+#else /* WITH_PTHREADS */
+#endif /* WITH_PTHREADS */
+
+int main(int argc, char **argv) {
+ static struct test_case_s vs[] = {
+ {
+ .args = { "../progs/tcapsh-static", "--", "-c", "echo hello" },
+ .result = 0
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--is-uid=123" },
+ .result = 256
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--is-uid=123" },
+ .result = 0,
+ .uid = 123,
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--is-gid=123" },
+ .result = 0,
+ .gid = 123,
+ .ngroups = 1,
+ .groups = { 456 },
+ .iab = "",
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--dropped=cap_chown",
+ "--has-i=cap_chown" },
+ .result = 0,
+ .iab = "!%cap_chown"
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--dropped=cap_chown",
+ "--has-i=cap_chown", "--is-uid=234",
+ "--has-a=cap_chown", "--has-p=cap_chown" },
+ .uid = 234,
+ .result = 0,
+ .iab = "!^cap_chown"
+ },
+ {
+ .args = { "../progs/tcapsh-static", "--inmode=NOPRIV",
+ "--has-no-new-privs" },
+ .result = 0,
+ .mode = CAP_MODE_NOPRIV
+ },
+ {
+ .args = { "/noop" },
+ .result = 0,
+ .chroot = ".",
+ },
+ {
+ .pass_on = NO_MORE
+ },
+ };
+
+ cap_t orig = cap_get_proc();
+
+ int success = 1, i;
+ for (i=0; vs[i].pass_on != NO_MORE; i++) {
+ const struct test_case_s *v = &vs[i];
+ printf("[%d] test should %s\n", i,
+ v->result ? "generate error" : "work");
+ cap_launch_t attr = cap_new_launcher(v->args[0], v->args, v->envp);
+ if (v->chroot) {
+ cap_launcher_set_chroot(attr, v->chroot);
+ }
+ if (v->uid) {
+ cap_launcher_setuid(attr, v->uid);
+ }
+ if (v->gid) {
+ cap_launcher_setgroups(attr, v->gid, v->ngroups, v->groups);
+ }
+ if (v->iab) {
+ cap_iab_t iab = cap_iab_from_text(v->iab);
+ if (iab == NULL) {
+ fprintf(stderr, "[%d] failed to decode iab [%s]", i, v->iab);
+ perror(":");
+ success = 0;
+ continue;
+ }
+ cap_iab_t old = cap_launcher_set_iab(attr, iab);
+ if (cap_free(old)) {
+ fprintf(stderr, "[%d] failed to decode iab [%s]", i, v->iab);
+ perror(":");
+ success = 0;
+ continue;
+ }
+ }
+ if (v->mode) {
+ cap_launcher_set_mode(attr, v->mode);
+ }
+
+ pid_t child = cap_launch(attr, NULL);
+
+ if (child <= 0) {
+ fprintf(stderr, "[%d] failed to launch", i);
+ perror(":");
+ success = 0;
+ continue;
+ }
+ if (cap_free(attr)) {
+ fprintf(stderr, "[%d] failed to free launcher", i);
+ perror(":");
+ success = 0;
+ }
+ int result;
+ int ret = waitpid(child, &result, 0);
+ if (ret != child) {
+ fprintf(stderr, "[%d] failed to wait", i);
+ perror(":");
+ success = 0;
+ continue;
+ }
+ if (result != v->result) {
+ fprintf(stderr, "[%d] bad result: got=%d want=%d", i, result,
+ v->result);
+ perror(":");
+ success = 0;
+ continue;
+ }
+ }
+
+ cap_t final = cap_get_proc();
+ if (cap_compare(orig, final)) {
+ char *was = cap_to_text(orig, NULL);
+ char *is = cap_to_text(final, NULL);
+ printf("cap_launch_test: orig:'%s' != final:'%s'\n", was, is);
+ cap_free(is);
+ cap_free(was);
+ success = 0;
+ }
+ cap_free(final);
+ cap_free(orig);
+
+ if (success) {
+ printf("cap_launch_test: PASSED\n");
+ } else {
+ printf("cap_launch_test: FAILED\n");
+ exit(1);
+ }
+}
diff --git a/tests/libcap_psx_test.c b/tests/libcap_psx_test.c
new file mode 100644
index 0000000..9f53f06
--- /dev/null
+++ b/tests/libcap_psx_test.c
@@ -0,0 +1,50 @@
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/psx_syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static void *thread_fork_exit(void *data) {
+ usleep(1234);
+ pid_t pid = fork();
+ cap_t start = cap_get_proc();
+ if (pid == 0) {
+ cap_set_proc(start);
+ exit(0);
+ }
+ int res;
+ if (waitpid(pid, &res, 0) != pid || res != 0) {
+ printf("FAILED: pid=%d wait returned %d and/or error: %d\n",
+ pid, res, errno);
+ exit(1);
+ }
+ cap_set_proc(start);
+ return NULL;
+}
+
+int main(int argc, char **argv) {
+ int i;
+ printf("hello libcap and libpsx ");
+ fflush(stdout);
+ cap_t start = cap_get_proc();
+ pthread_t ignored[10];
+ for (i = 0; i < 10; i++) {
+ pthread_create(&ignored[i], NULL, thread_fork_exit, NULL);
+ }
+ for (i = 0; i < 10; i++) {
+ printf("."); /* because of fork, this may print double */
+ fflush(stdout); /* try to limit the above effect */
+ cap_set_proc(start);
+ usleep(1000);
+ }
+ printf(" PASSED\n");
+ exit(0);
+}
diff --git a/tests/noop.c b/tests/noop.c
new file mode 100644
index 0000000..ad51c89
--- /dev/null
+++ b/tests/noop.c
@@ -0,0 +1,5 @@
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+ exit(0);
+}
diff --git a/tests/psx_test.c b/tests/psx_test.c
new file mode 100644
index 0000000..7f16175
--- /dev/null
+++ b/tests/psx_test.c
@@ -0,0 +1,150 @@
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/psx_syscall.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+typedef union tp {
+ long long unsigned raw;
+ pthread_t pt;
+} thread_ptr;
+
+static void say_hello_expecting(const char *title, int n, int kept) {
+ int keeper = prctl(PR_GET_KEEPCAPS);
+ thread_ptr tp;
+ tp.pt = pthread_self();
+
+ printf("hello [%d], %s<%d> %llx (keepcaps=%d vs. want=%d)\n",
+ getpid(), title, n, tp.raw, keeper, kept);
+ if (keeper != kept) {
+ printf("--> FAILURE %s thread=%llx has wrong keepcaps: got=%d want=%d\n",
+ title, tp.raw, keeper, kept);
+ exit(1);
+ }
+}
+
+pthread_mutex_t mu;
+pthread_cond_t cond;
+
+int global_kept = 0;
+int step = 0;
+int replies = 0;
+int launched = 0;
+int started = 0;
+
+static void *say_hello(void *args) {
+ int count = 0;
+
+ pthread_mutex_lock(&mu);
+ started++;
+ int this_step = step+1;
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mu);
+
+ pthread_mutex_lock(&mu);
+ do {
+ while (this_step > step) {
+ pthread_cond_wait(&cond, &mu);
+ }
+ say_hello_expecting("thread", count, global_kept);
+
+ replies++;
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mu);
+
+ this_step++;
+ pthread_mutex_lock(&mu);
+ } while (++count != 3);
+ pthread_mutex_unlock(&mu);
+
+ return NULL;
+}
+
+int main(int argc, char **argv) {
+ pthread_t tid[3];
+ int i;
+ pid_t child = 0;
+ char * const stop_argv[3] = { argv[0], strdup("stop"), NULL };
+
+ if (argc != 1) {
+ printf("child %d starting\n", getpid());
+ usleep(2000);
+ printf("child %d exiting\n", getpid());
+ exit(0);
+ }
+
+ for (i = 0; i<10; i++) {
+ printf("iteration [%d]: %d\n", getpid(), i);
+
+ pthread_mutex_lock(&mu);
+ global_kept = !global_kept;
+ replies = 0;
+ step = i;
+ pthread_mutex_unlock(&mu);
+
+ psx_syscall(SYS_prctl, PR_SET_KEEPCAPS, global_kept);
+
+ pthread_mutex_lock(&mu);
+ step++;
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mu);
+
+ say_hello_expecting("main", i, global_kept);
+
+ pthread_mutex_lock(&mu);
+ while (replies < launched) {
+ pthread_cond_wait(&cond, &mu);
+ }
+ pthread_mutex_unlock(&mu);
+
+ if (i < 3) {
+ if (!child) {
+ child = fork();
+ if (!child) {
+ usleep(2000);
+ execve(argv[0], stop_argv, NULL);
+ perror("failed to exec");
+ exit(1);
+ } else {
+ printf("pid=%d forked -> %d\n", getpid(), child);
+ }
+ }
+ launched++;
+ pthread_create(&tid[i], NULL, say_hello, NULL);
+ /* Confirm that the thread is started. */
+ pthread_mutex_lock(&mu);
+ while (started < launched) {
+ printf("[%d] started=%d vs %d\n", getpid(), started, launched);
+ pthread_cond_wait(&cond, &mu);
+ }
+ printf("[%d] started=%d vs %d\n", getpid(), started, launched);
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mu);
+ } else if (i < 6) {
+ /* Confirm one thread has finished. */
+ pthread_join(tid[i-3], NULL);
+ launched--;
+ }
+ }
+
+ if (child) {
+ int status;
+ waitpid(child, &status, 0);
+ if (status) {
+ printf("child %d FAILED: %d\n", child, status);
+ exit(1);
+ }
+ }
+ printf("%s PASSED\n", argv[0]);
+ exit(0);
+}