linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Oren Laadan <orenl@librato.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@osdl.org>,
	containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-api@vger.kernel.org, Serge Hallyn <serue@us.ibm.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Pavel Emelyanov <xemul@openvz.org>,
	Alexey Dobriyan <adobriyan@gmail.com>
Subject: [RFC v17][PATCH 57/60] c/r: capabilities: define checkpoint and restore fns
Date: Wed, 22 Jul 2009 06:00:19 -0400	[thread overview]
Message-ID: <1248256822-23416-58-git-send-email-orenl@librato.com> (raw)
In-Reply-To: <1248256822-23416-1-git-send-email-orenl@librato.com>

From: Serge E. Hallyn <serue@us.ibm.com>

[ Andrew: I am punting on dealing with the subsystem cooperation
issues in this version, in favor of trying to get LSM issues
straightened out ]

An application checkpoint image will store capability sets
(and the bounding set) as __u64s.  Define checkpoint and
restart functions to translate between those and kernel_cap_t's.

Define a common function do_capset_tocred() which applies capability
set changes to a passed-in struct cred.

The restore function uses do_capset_tocred() to apply the restored
capabilities to the struct cred being crafted, subject to the
current task's (task executing sys_restart()) permissions.

Changelog:
	Jun 09: Can't choose securebits or drop bounding set if
		file capabilities aren't compiled into the kernel.
		Also just store caps in __u32s (looks cleaner).
	Jun 01: Made the checkpoint and restore functions and the
		ckpt_hdr_capabilities struct more opaque to the
		rest of the c/r code, as suggested by Andrew Morgan,
		and using naming suggested by Oren.
	Jun 01: Add commented BUILD_BUG_ON() to point out that the
		current implementation depends on 64-bit capabilities.
		(Andrew Morgan and Alexey Dobriyan).
	May 28: add helpers to c/r securebits

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
 include/linux/capability.h     |    6 ++
 include/linux/checkpoint_hdr.h |   11 +++
 kernel/capability.c            |  164 +++++++++++++++++++++++++++++++++++++---
 security/commoncap.c           |   19 +----
 4 files changed, 172 insertions(+), 28 deletions(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c302110..3a74655 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -568,6 +568,12 @@ extern int capable(int cap);
 struct dentry;
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
 
+struct cred;
+int apply_securebits(unsigned securebits, struct cred *new);
+struct ckpt_capabilities;
+int restore_capabilities(struct ckpt_capabilities *h, struct cred *new);
+void checkpoint_capabilities(struct ckpt_capabilities *h, struct cred * cred);
+
 #endif /* __KERNEL__ */
 
 #endif /* !_LINUX_CAPABILITY_H */
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 3671e72..1f6a33d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -60,6 +60,7 @@ enum {
 	CKPT_HDR_NS,
 	CKPT_HDR_UTS_NS,
 	CKPT_HDR_IPC_NS,
+	CKPT_HDR_CAPABILITIES,
 
 	/* 201-299: reserved for arch-dependent */
 
@@ -191,6 +192,16 @@ struct ckpt_hdr_task {
 	__u64 robust_futex_list; /* a __user ptr */
 } __attribute__((aligned(8)));
 
+/* Posix capabilities */
+struct ckpt_capabilities {
+	__u32 cap_i_0, cap_i_1; /* inheritable set */
+	__u32 cap_p_0, cap_p_1; /* permitted set */
+	__u32 cap_e_0, cap_e_1; /* effective set */
+	__u32 cap_b_0, cap_b_1; /* bounding set */
+	__u32 securebits;
+	__u32 padding;
+} __attribute__((aligned(8)));
+
 /* namespaces */
 struct ckpt_hdr_task_ns {
 	struct ckpt_hdr h;
diff --git a/kernel/capability.c b/kernel/capability.c
index 4e17041..4f58454 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,8 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
+#include <linux/securebits.h>
+#include <linux/checkpoint.h>
 #include <asm/uaccess.h>
 #include "cred-internals.h"
 
@@ -217,6 +219,45 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
 	return ret;
 }
 
+static int do_capset_tocred(kernel_cap_t *effective, kernel_cap_t *inheritable,
+			kernel_cap_t *permitted, struct cred *new)
+{
+	int ret;
+
+	ret = security_capset(new, current_cred(),
+			      effective, inheritable, permitted);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * for checkpoint-restart, do we want to wait until end of restart?
+	 * not sure we care */
+	audit_log_capset(current->pid, new, current_cred());
+
+	return 0;
+}
+
+static int do_capset(kernel_cap_t *effective, kernel_cap_t *inheritable,
+			kernel_cap_t *permitted)
+{
+	struct cred *new;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = do_capset_tocred(effective, inheritable, permitted, new);
+	if (ret < 0)
+		goto error;
+
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return ret;
+}
+
 /**
  * sys_capset - set capabilities for a process or (*) a group of processes
  * @header: pointer to struct that contains capability version and
@@ -240,7 +281,6 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
-	struct cred *new;
 	int ret;
 	pid_t pid;
 
@@ -271,23 +311,125 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 		i++;
 	}
 
-	new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
+	return do_capset(&effective, &inheritable, &permitted);
 
-	ret = security_capset(new, current_cred(),
-			      &effective, &inheritable, &permitted);
+}
+
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+int apply_securebits(unsigned securebits, struct cred *new)
+{
+	if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
+	     & (new->securebits ^ securebits))				/*[1]*/
+	    || ((new->securebits & SECURE_ALL_LOCKS & ~securebits))	/*[2]*/
+	    || (securebits & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
+	    || (cap_capable(current, current_cred(), CAP_SETPCAP,
+			    SECURITY_CAP_AUDIT) != 0)			/*[4]*/
+		/*
+		 * [1] no changing of bits that are locked
+		 * [2] no unlocking of locks
+		 * [3] no setting of unsupported bits
+		 * [4] doing anything requires privilege (go read about
+		 *     the "sendmail capabilities bug")
+		 */
+	    )
+		/* cannot change a locked bit */
+		return -EPERM;
+	new->securebits = securebits;
+	return 0;
+}
+
+static void do_capbset_drop(struct cred *cred, int cap)
+{
+	cap_lower(cred->cap_bset, cap);
+}
+
+static inline int restore_cap_bset(kernel_cap_t bset, struct cred *cred)
+{
+	int i, may_dropbcap = capable(CAP_SETPCAP);
+
+	for (i = 0; i < CAP_LAST_CAP; i++) {
+		if (cap_raised(bset, i))
+			continue;
+		if (!cap_raised(current_cred()->cap_bset, i))
+			continue;
+		if (!may_dropbcap)
+			return -EPERM;
+		do_capbset_drop(cred, i);
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_SECURITY_FILE_CAPABILITIES */
+
+int apply_securebits(unsigned securebits, struct cred *new)
+{
+	/* settable securebits not supported */
+	return 0;
+}
+
+static inline int restore_cap_bset(kernel_cap_t bset, struct cred *cred)
+{
+	/* bounding sets not supported */
+	return 0;
+}
+#endif /* CONFIG_SECURITY_FILE_CAPABILITIES */
+
+#ifdef CONFIG_CHECKPOINT
+static int do_restore_caps(struct ckpt_capabilities *h, struct cred *cred)
+{
+	kernel_cap_t effective, inheritable, permitted, bset;
+	int ret;
+
+	effective.cap[0] = h->cap_e_0;
+	effective.cap[1] = h->cap_e_1;
+	inheritable.cap[0] = h->cap_i_0;
+	inheritable.cap[1] = h->cap_i_1;
+	permitted.cap[0] = h->cap_p_0;
+	permitted.cap[1] = h->cap_p_1;
+	bset.cap[0] = h->cap_b_0;
+	bset.cap[1] = h->cap_b_1;
+
+	ret = do_capset_tocred(&effective, &inheritable, &permitted, cred);
 	if (ret < 0)
-		goto error;
+		return ret;
+
+	ret = restore_cap_bset(bset, cred);
+	return ret;
+}
 
-	audit_log_capset(pid, new, current_cred());
+void checkpoint_capabilities(struct ckpt_capabilities *h, struct cred * cred)
+{
+	BUILD_BUG_ON(CAP_LAST_CAP >= 64);
+	h->securebits = cred->securebits;
+	h->cap_i_0 = cred->cap_inheritable.cap[0];
+	h->cap_i_1 = cred->cap_inheritable.cap[1];
+	h->cap_p_0 = cred->cap_permitted.cap[0];
+	h->cap_p_1 = cred->cap_permitted.cap[1];
+	h->cap_e_0 = cred->cap_effective.cap[0];
+	h->cap_e_1 = cred->cap_effective.cap[1];
+	h->cap_b_0 = cred->cap_bset.cap[0];
+	h->cap_b_1 = cred->cap_bset.cap[1];
+}
 
-	return commit_creds(new);
+/*
+ * restore_capabilities: called by restore_creds() to set the
+ * restored capabilities (if permitted) in a new struct cred which
+ * will be attached at the end of the sys_restart().
+ * struct cred *new is prepared by caller (using prepare_creds())
+ * (and aborted by caller on error)
+ * return 0 on success, < 0 on error
+ */
+int restore_capabilities(struct ckpt_capabilities *h, struct cred *new)
+{
+	int ret = do_restore_caps(h, new);
+
+	if (!ret)
+		ret = apply_securebits(h->securebits, new);
 
-error:
-	abort_creds(new);
 	return ret;
 }
+#endif /* CONFIG_CHECKPOINT */
 
 /**
  * capable - Determine if the current task has a superior capability in effect
diff --git a/security/commoncap.c b/security/commoncap.c
index 48b7e02..2456b46 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -893,24 +893,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 	 * capability-based-privilege environment.
 	 */
 	case PR_SET_SECUREBITS:
-		error = -EPERM;
-		if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
-		     & (new->securebits ^ arg2))			/*[1]*/
-		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
-		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, current_cred(), CAP_SETPCAP,
-				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
-			/*
-			 * [1] no changing of bits that are locked
-			 * [2] no unlocking of locks
-			 * [3] no setting of unsupported bits
-			 * [4] doing anything requires privilege (go read about
-			 *     the "sendmail capabilities bug")
-			 */
-		    )
-			/* cannot change a locked bit */
+		error = apply_securebits(arg2, new);
+		if (error)
 			goto error;
-		new->securebits = arg2;
 		goto changed;
 
 	case PR_GET_SECUREBITS:
-- 
1.6.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-07-22 10:10 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-22  9:59 [RFC v17][PATCH 00/60] Kernel based checkpoint/restart Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 01/60] c/r: extend arch_setup_additional_pages() Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 02/60] x86: ptrace debugreg checks rewrite Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 03/60] c/r: break out new_user_ns() Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 04/60] c/r: split core function out of some set*{u,g}id functions Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 05/60] cgroup freezer: Fix buggy resume test for tasks frozen with cgroup freezer Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 06/60] cgroup freezer: Update stale locking comments Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 07/60] cgroup freezer: Add CHECKPOINTING state to safeguard container checkpoint Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 08/60] cgroup freezer: interface to freeze a cgroup from within the kernel Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 09/60] Namespaces submenu Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 10/60] c/r: make file_pos_read/write() public Oren Laadan
2009-07-23  2:33   ` KAMEZAWA Hiroyuki
2009-07-22  9:59 ` [RFC v17][PATCH 11/60] pids 1/7: Factor out code to allocate pidmap page Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 12/60] pids 2/7: Have alloc_pidmap() return actual error code Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 13/60] pids 3/7: Add target_pid parameter to alloc_pidmap() Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 14/60] pids 4/7: Add target_pids parameter to alloc_pid() Oren Laadan
2009-08-03 18:22   ` Serge E. Hallyn
2009-07-22  9:59 ` [RFC v17][PATCH 15/60] pids 5/7: Add target_pids parameter to copy_process() Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 16/60] pids 6/7: Define do_fork_with_pids() Oren Laadan
2009-08-03 18:26   ` Serge E. Hallyn
2009-08-04  8:37     ` Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 17/60] pids 7/7: Define clone_with_pids syscall Oren Laadan
2009-07-29  0:44   ` Sukadev Bhattiprolu
2009-07-22  9:59 ` [RFC v17][PATCH 18/60] c/r: create syscalls: sys_checkpoint, sys_restart Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 19/60] c/r: documentation Oren Laadan
2009-07-23 14:24   ` Serge E. Hallyn
2009-07-23 15:24     ` Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 20/60] c/r: basic infrastructure for checkpoint/restart Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 21/60] c/r: x86_32 support " Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 22/60] c/r: external checkpoint of a task other than ourself Oren Laadan
2009-07-22 17:52   ` Serge E. Hallyn
2009-07-23  4:32     ` Oren Laadan
2009-07-23 13:12       ` Serge E. Hallyn
2009-07-23 14:14         ` Oren Laadan
2009-07-23 14:54       ` Serge E. Hallyn
2009-07-23 14:47   ` Serge E. Hallyn
2009-07-23 15:33     ` Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 23/60] c/r: export functionality used in next patch for restart-blocks Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 24/60] c/r: restart-blocks Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 25/60] c/r: checkpoint multiple processes Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 26/60] c/r: restart " Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 27/60] c/r: introduce PF_RESTARTING, and skip notification on exit Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 28/60] c/r: support for zombie processes Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 29/60] c/r: Save and restore the [compat_]robust_list member of the task struct Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 30/60] c/r: infrastructure for shared objects Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 31/60] c/r: detect resource leaks for whole-container checkpoint Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 32/60] c/r: introduce '->checkpoint()' method in 'struct file_operations' Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 33/60] c/r: dump open file descriptors Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 34/60] c/r: restore " Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 35/60] c/r: add generic '->checkpoint' f_op to ext fses Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 36/60] c/r: add generic '->checkpoint()' f_op to simple devices Oren Laadan
2009-07-22  9:59 ` [RFC v17][PATCH 37/60] c/r: introduce method '->checkpoint()' in struct vm_operations_struct Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 38/60] c/r: dump memory address space (private memory) Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 39/60] c/r: restore " Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 40/60] c/r: export shmem_getpage() to support shared memory Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 41/60] c/r: dump anonymous- and file-mapped- " Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 42/60] c/r: restore " Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 43/60] splice: export pipe/file-to-pipe/file functionality Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 44/60] c/r: support for open pipes Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 45/60] c/r: make ckpt_may_checkpoint_task() check each namespace individually Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 46/60] c/r: support for UTS namespace Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 47/60] deferqueue: generic queue to defer work Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 48/60] c/r (ipc): allow allocation of a desired ipc identifier Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 49/60] c/r: save and restore sysvipc namespace basics Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 50/60] c/r: support share-memory sysv-ipc Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 51/60] c/r: support message-queues sysv-ipc Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 52/60] c/r: support semaphore sysv-ipc Oren Laadan
2009-07-22 17:25   ` Cyrill Gorcunov
2009-07-23  3:46     ` Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 53/60] c/r: (s390): expose a constant for the number of words (CRs) Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 54/60] c/r: add CKPT_COPY() macro Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 55/60] c/r: define s390-specific checkpoint-restart code Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 56/60] c/r: clone_with_pids: define the s390 syscall Oren Laadan
2009-07-22 10:00 ` Oren Laadan [this message]
2009-07-22 10:00 ` [RFC v17][PATCH 58/60] c/r: checkpoint and restore task credentials Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 59/60] c/r: restore file->f_cred Oren Laadan
2009-07-22 10:00 ` [RFC v17][PATCH 60/60] c/r: checkpoint and restore (shared) task's sighand_struct Oren Laadan
2009-07-24 19:09 ` [RFC v17][PATCH 00/60] Kernel based checkpoint/restart Serge E. Hallyn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1248256822-23416-58-git-send-email-orenl@librato.com \
    --to=orenl@librato.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hpa@zytor.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=serue@us.ibm.com \
    --cc=torvalds@osdl.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox