From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, netdev@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <trond.myklebust@fys.uio.no>,
Thomas Graf <tgraf@suug.ch>, David Miller <davem@davemloft.net>,
James Bottomley <James.Bottomley@SteelEye.com>,
Mike Christie <michaelc@cs.wisc.edu>,
Andrew Morton <akpm@linux-foundation.org>,
Daniel Phillips <phillips@google.com>
Subject: [PATCH 28/40] nfs: enable swap on NFS
Date: Fri, 04 May 2007 12:27:19 +0200 [thread overview]
Message-ID: <20070504103201.987245048@chello.nl> (raw)
In-Reply-To: <20070504102651.923946304@chello.nl>
[-- Attachment #1: nfs-swapfile.patch --]
[-- Type: text/plain, Size: 7972 bytes --]
Provide an a_ops->swapfile() implementation for NFS. This will set the
NFS socket to SOCK_VMIO and run socket reconnect under PF_MEMALLOC as well
as reset SOCK_VMIO before engaging the protocol ->connect() method.
PF_MEMALLOC should allow the allocation of struct socket and related objects
and the early (re)setting of SOCK_VMIO should allow us to receive the packets
required for the TCP connection buildup.
(swapping continues over a server reset during heavy network traffic)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
---
fs/Kconfig | 19 +++++++++++++++
fs/nfs/file.c | 10 ++++++++
include/linux/sunrpc/xprt.h | 5 +++-
net/sunrpc/sched.c | 7 ++++-
net/sunrpc/xprtsock.c | 54 ++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 92 insertions(+), 3 deletions(-)
Index: linux-2.6-git/fs/nfs/file.c
===================================================================
--- linux-2.6-git.orig/fs/nfs/file.c
+++ linux-2.6-git/fs/nfs/file.c
@@ -324,6 +324,13 @@ static int nfs_launder_page(struct page
return nfs_wb_page(page_file_mapping(page)->host, page);
}
+#ifdef CONFIG_NFS_SWAP
+static int nfs_swapfile(struct address_space *mapping, int enable)
+{
+ return xs_swapper(NFS_CLIENT(mapping->host)->cl_xprt, enable);
+}
+#endif
+
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -338,6 +345,9 @@ const struct address_space_operations nf
.direct_IO = nfs_direct_IO,
#endif
.launder_page = nfs_launder_page,
+#ifdef CONFIG_NFS_SWAP
+ .swapfile = nfs_swapfile,
+#endif
};
static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
Index: linux-2.6-git/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6-git.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6-git/include/linux/sunrpc/xprt.h
@@ -151,7 +151,9 @@ struct rpc_xprt {
unsigned int max_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
- resvport : 1; /* use a reserved port */
+ resvport : 1, /* use a reserved port */
+ swapper : 1; /* we're swapping over this
+ transport */
unsigned int bind_index; /* bind function index */
/*
@@ -244,6 +246,7 @@ void xprt_disconnect(struct rpc_xprt *
*/
struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
+int xs_swapper(struct rpc_xprt *xprt, int enable);
/*
* Reserved bit positions in xprt->state
Index: linux-2.6-git/net/sunrpc/sched.c
===================================================================
--- linux-2.6-git.orig/net/sunrpc/sched.c
+++ linux-2.6-git/net/sunrpc/sched.c
@@ -755,7 +755,10 @@ static void rpc_async_schedule(struct wo
void *rpc_malloc(struct rpc_task *task, size_t size)
{
size_t *buf;
- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT;
+
+ if (RPC_IS_SWAPPER(task))
+ gfp |= __GFP_EMERGENCY;
size += sizeof(size_t);
if (size <= RPC_BUFFER_MAXSIZE)
@@ -837,7 +840,7 @@ void rpc_init_task(struct rpc_task *task
static struct rpc_task *
rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
}
static void rpc_free_task(struct rcu_head *rcu)
Index: linux-2.6-git/net/sunrpc/xprtsock.c
===================================================================
--- linux-2.6-git.orig/net/sunrpc/xprtsock.c
+++ linux-2.6-git/net/sunrpc/xprtsock.c
@@ -1215,11 +1215,15 @@ static void xs_udp_connect_worker(struct
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || !xprt_bound(xprt))
goto out;
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
/* Start by resetting any existing state */
xs_close(xprt);
@@ -1257,6 +1261,9 @@ static void xs_udp_connect_worker(struct
transport->sock = sock;
transport->inet = sk;
+ if (xprt->swapper)
+ sk_set_vmio(sk);
+
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1264,6 +1271,7 @@ static void xs_udp_connect_worker(struct
out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
}
/*
@@ -1302,11 +1310,15 @@ static void xs_tcp_connect_worker(struct
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || !xprt_bound(xprt))
goto out;
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
if (!sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
@@ -1356,6 +1368,10 @@ static void xs_tcp_connect_worker(struct
write_unlock_bh(&sk->sk_callback_lock);
}
+
+ if (xprt->swapper)
+ sk_set_vmio(transport->inet);
+
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
@@ -1383,6 +1399,7 @@ out:
xprt_wake_pending_tasks(xprt, status);
out_clear:
xprt_clear_connecting(xprt);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
}
/**
@@ -1642,6 +1659,43 @@ int init_socket_xprt(void)
return 0;
}
+#ifdef CONFIG_SUNRPC_SWAP
+#define RPC_BUF_RESERVE_PAGES \
+ DIV_ROUND_UP((RPC_MAX_SLOT_TABLE * \
+ kobjsize(sizeof(struct rpc_rqst))), \
+ PAGE_SIZE)
+#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
+
+/**
+ * xs_swapper - Tag this transport as being used for swap.
+ * @xprt: transport to tag
+ * @enable: enable/disable
+ *
+ */
+int xs_swapper(struct rpc_xprt *xprt, int enable)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ int err = 0;
+
+ if (enable) {
+ /*
+ * keep one extra sock reference so the reserve won't dip
+ * when the socket gets reconnected.
+ */
+ sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
+ sk_set_vmio(transport->inet);
+ xprt->swapper = 1;
+ } else if (xprt->swapper) {
+ xprt->swapper = 0;
+ sk_clear_vmio(transport->inet);
+ sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xs_swapper);
+#endif
+
/**
* cleanup_socket_xprt - remove xprtsock's sysctls
*
Index: linux-2.6-git/fs/Kconfig
===================================================================
--- linux-2.6-git.orig/fs/Kconfig
+++ linux-2.6-git/fs/Kconfig
@@ -1621,6 +1621,18 @@ config NFS_DIRECTIO
causes open() to return EINVAL if a file residing in NFS is
opened with the O_DIRECT flag.
+config NFS_SWAP
+ bool "Provide swap over NFS support"
+ default n
+ depends on NFS_FS
+ select SUNRPC_SWAP
+ help
+ This option enables swapon to work on files located on NFS mounts.
+
+ For more details, see Documentation/vm_deadlock.txt
+
+ If unsure, say N.
+
config NFSD
tristate "NFS server support"
depends on INET
@@ -1746,6 +1758,13 @@ config SUNRPC_BIND34
If unsure, say N to get traditional behavior (version 2 rpcbind
requests only).
+config SUNRPC_SWAP
+ def_bool n
+ depends on SUNRPC
+ select SLAB_FAIR
+ select NETVM
+ select SWAP_FILE
+
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-05-04 10:27 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-04 10:26 [PATCH 00/40] Swap over Networked storage -v12 Peter Zijlstra
2007-05-04 10:26 ` [PATCH 01/40] mm: page allocation rank Peter Zijlstra
2007-05-04 10:26 ` [PATCH 02/40] mm: slab allocation fairness Peter Zijlstra
2007-05-16 20:41 ` Christoph Lameter
2007-05-04 10:26 ` [PATCH 03/40] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2007-05-04 10:26 ` [PATCH 04/40] mm: serialize access to min_free_kbytes Peter Zijlstra
2007-05-04 10:26 ` [PATCH 05/40] mm: emergency pool Peter Zijlstra
2007-05-04 10:26 ` [PATCH 06/40] mm: __GFP_EMERGENCY Peter Zijlstra
2007-05-04 10:26 ` [PATCH 07/40] mm: allow mempool to fall back to memalloc reserves Peter Zijlstra
2007-05-04 10:26 ` [PATCH 08/40] mm: kmem_cache_objsize Peter Zijlstra
2007-05-04 10:54 ` Pekka Enberg
2007-05-04 16:09 ` Christoph Lameter
2007-05-04 16:15 ` Peter Zijlstra
2007-05-04 16:23 ` Christoph Lameter
2007-05-04 16:30 ` Peter Zijlstra
2007-05-04 16:36 ` Christoph Lameter
2007-05-04 17:59 ` Peter Zijlstra
2007-05-04 18:04 ` Christoph Lameter
2007-05-04 18:21 ` Peter Zijlstra
2007-05-04 18:30 ` Christoph Lameter
2007-05-04 18:32 ` Peter Zijlstra
2007-05-04 18:45 ` Pekka Enberg
2007-05-04 18:47 ` Christoph Lameter
2007-05-04 18:54 ` Pekka Enberg
2007-05-04 19:59 ` Christoph Lameter
2007-05-05 9:00 ` Pekka J Enberg
2007-05-04 18:41 ` Pekka Enberg
2007-05-04 18:46 ` Christoph Lameter
2007-05-04 18:53 ` Pekka Enberg
2007-05-04 19:58 ` Christoph Lameter
2007-05-04 10:27 ` [PATCH 09/40] mm: optimize gfp_to_rank() Peter Zijlstra
2007-05-04 10:27 ` [PATCH 10/40] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2007-05-04 10:27 ` [PATCH 11/40] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2007-05-04 10:27 ` [PATCH 12/40] net: packet split receive api Peter Zijlstra
2007-05-04 10:27 ` [PATCH 13/40] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2007-05-04 10:27 ` [PATCH 14/40] netvm: link network to vm layer Peter Zijlstra
2007-05-04 10:27 ` [PATCH 15/40] netvm: INET reserves Peter Zijlstra
2007-05-04 10:27 ` [PATCH 16/40] netvm: hook skb allocation to reserves Peter Zijlstra
2007-05-04 14:07 ` Arnaldo Carvalho de Melo
2007-05-04 10:27 ` [PATCH 17/40] netvm: filter emergency skbs Peter Zijlstra
2007-05-04 10:27 ` [PATCH 18/40] netvm: prevent a TCP specific deadlock Peter Zijlstra
2007-05-04 10:27 ` [PATCH 19/40] netfilter: notify about NF_QUEUE vs emergency skbs Peter Zijlstra
2007-05-04 10:27 ` [PATCH 20/40] netvm: skb processing Peter Zijlstra
2007-05-04 10:27 ` [PATCH 21/40] uml: rename arch/um remove_mapping() Peter Zijlstra
2007-05-04 10:27 ` [PATCH 22/40] mm: prepare swap entry methods for use in page methods Peter Zijlstra
2007-05-04 10:27 ` [PATCH 23/40] mm: add support for non block device backed swap files Peter Zijlstra
2007-05-04 10:27 ` [PATCH 24/40] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2007-05-04 10:27 ` [PATCH 25/40] nfs: remove mempools Peter Zijlstra
2007-05-04 10:27 ` [PATCH 26/40] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2007-05-04 10:27 ` [PATCH 27/40] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2007-05-04 10:27 ` Peter Zijlstra [this message]
2007-05-04 10:27 ` [PATCH 29/40] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2007-05-04 10:27 ` [PATCH 30/40] nfs: fixup missing error code Peter Zijlstra
2007-05-04 13:10 ` Peter Staubach
2007-05-04 13:18 ` Peter Zijlstra
2007-05-04 10:27 ` [PATCH 31/40] mm: balance_dirty_pages() vs throttle_vm_writeout() deadlock Peter Zijlstra
2007-05-04 10:27 ` [PATCH 32/40] block: add a swapdev callback to the request_queue Peter Zijlstra
2007-05-04 10:27 ` [PATCH 33/40] uml: enable scsi and add iscsi config Peter Zijlstra
2007-05-04 10:27 ` [PATCH 34/40] sock: safely expose kernel sockets to userspace Peter Zijlstra
2007-05-04 10:27 ` [PATCH 35/40] From: Mike Christie <mchristi@redhat.com> Peter Zijlstra
2007-05-04 10:27 ` [PATCH 36/40] iscsi: fixup of the ep_connect patch Peter Zijlstra
2007-05-04 10:27 ` [PATCH 37/40] iscsi: ensure the iscsi kernel fd is not usable in userspace Peter Zijlstra
2007-05-04 10:27 ` [PATCH 38/40] netlink: add SOCK_VMIO support to AF_NETLINK Peter Zijlstra
2007-05-04 10:27 ` [PATCH 39/40] mm: a process flags to avoid blocking allocations Peter Zijlstra
2007-05-04 10:27 ` [PATCH 40/40] iscsi: support for swapping over iSCSI Peter Zijlstra
2007-05-04 15:22 ` [PATCH 00/40] Swap over Networked storage -v12 Daniel Walker
2007-05-04 15:38 ` Peter Zijlstra
2007-05-04 15:59 ` Daniel Walker
2007-05-04 18:09 ` Mike Snitzer
2007-05-04 19:31 ` Daniel Walker
2007-05-04 19:54 ` David Miller, Mike Snitzer
2007-05-04 21:36 ` Arnaldo Carvalho de Melo
2007-05-04 19:27 ` David Miller, Peter Zijlstra
2007-05-04 19:41 ` Peter Zijlstra
2007-05-04 20:02 ` David Miller, Peter Zijlstra
2007-05-04 20:29 ` Jeff Garzik
2007-05-05 9:43 ` Christoph Hellwig
2007-05-05 9:55 ` William Lee Irwin III
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070504103201.987245048@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=James.Bottomley@SteelEye.com \
--cc=akpm@linux-foundation.org \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=michaelc@cs.wisc.edu \
--cc=netdev@vger.kernel.org \
--cc=phillips@google.com \
--cc=tgraf@suug.ch \
--cc=trond.myklebust@fys.uio.no \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox