linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Garzik <jgarzik@pobox.com>
To: Daniel Phillips <phillips@istop.com>
Cc: netdev@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC] Net vm deadlock fix (take two)
Date: Sat, 6 Aug 2005 12:07:18 -0400	[thread overview]
Message-ID: <20050806160718.GB17136@havoc.gtf.org> (raw)
In-Reply-To: <200508061722.24106.phillips@istop.com>

On Sat, Aug 06, 2005 at 05:22:23PM +1000, Daniel Phillips wrote:
> Daniel
> 
> diff -up --recursive 2.6.12.3.clean/include/linux/gfp.h 2.6.12.3/include/linux/gfp.h
> --- 2.6.12.3.clean/include/linux/gfp.h	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/linux/gfp.h	2005-08-05 21:53:09.000000000 -0400
> @@ -39,6 +39,7 @@ struct vm_area_struct;
>  #define __GFP_COMP	0x4000u	/* Add compound page metadata */
>  #define __GFP_ZERO	0x8000u	/* Return zeroed page on success */
>  #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
> +#define __GFP_MEMALLOC  0x20000u /* Use emergency reserves */
>  
>  #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
>  #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
> diff -up --recursive 2.6.12.3.clean/include/linux/netdevice.h 2.6.12.3/include/linux/netdevice.h
> --- 2.6.12.3.clean/include/linux/netdevice.h	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/linux/netdevice.h	2005-08-06 01:06:18.000000000 -0400
> @@ -371,6 +371,8 @@ struct net_device
>  	struct Qdisc		*qdisc_ingress;
>  	struct list_head	qdisc_list;
>  	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
> +	int			rx_reserve;
> +	int			rx_reserve_used;
>  
>  	/* ingress path synchronizer */
>  	spinlock_t		ingress_lock;
> @@ -929,6 +931,28 @@ extern void		net_disable_timestamp(void)
>  extern char *net_sysctl_strdup(const char *s);
>  #endif
>  
> +static inline struct sk_buff *__dev_memalloc_skb(struct net_device *dev,
> +	unsigned length, int gfp_mask)
> +{
> +	struct sk_buff *skb = __dev_alloc_skb(length, gfp_mask);
> +	if (skb)
> +		goto done;
> +	if (dev->rx_reserve_used >= dev->rx_reserve)
> +		return NULL;
> +	if (!__dev_alloc_skb(length, gfp_mask|__GFP_MEMALLOC))
> +		return NULL;;
> +	dev->rx_reserve_used++;

why bother with rx_reserve at all?  Why not just let the second
allocation fail, without the rx_reserve_used test?

Additionally, I think the rx_reserve_used accounting is wrong, since I
could simply free the skb -- but doing so would cause a rx_reserve_used
leak in your code, since you only decrement the counter in the TCP IPv4
path.


> +done:
> +	skb->dev = dev;
> +	return skb;
> +}
> +
> +static inline struct sk_buff *dev_alloc_skb_reserve(struct net_device *dev,
> +	unsigned length)
> +{
> +	return __dev_memalloc_skb(dev, length, GFP_ATOMIC);
> +}

unused function


> +
>  #endif /* __KERNEL__ */
>  
>  #endif	/* _LINUX_DEV_H */
> diff -up --recursive 2.6.12.3.clean/include/net/sock.h 2.6.12.3/include/net/sock.h
> --- 2.6.12.3.clean/include/net/sock.h	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/net/sock.h	2005-08-05 21:53:09.000000000 -0400
> @@ -382,6 +382,7 @@ enum sock_flags {
>  	SOCK_NO_LARGESEND, /* whether to sent large segments or not */
>  	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
>  	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
> +	SOCK_MEMALLOC, /* protocol can use memalloc reserve */
>  };
>  
>  static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
> @@ -399,6 +400,11 @@ static inline int sock_flag(struct sock 
>  	return test_bit(flag, &sk->sk_flags);
>  }
>  
> +static inline int is_memalloc_sock(struct sock *sk)
> +{
> +	return sock_flag(sk, SOCK_MEMALLOC);
> +}
> +
>  static inline void sk_acceptq_removed(struct sock *sk)
>  {
>  	sk->sk_ack_backlog--;
> diff -up --recursive 2.6.12.3.clean/mm/page_alloc.c 2.6.12.3/mm/page_alloc.c
> --- 2.6.12.3.clean/mm/page_alloc.c	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/mm/page_alloc.c	2005-08-05 21:53:09.000000000 -0400
> @@ -802,8 +802,8 @@ __alloc_pages(unsigned int __nocast gfp_
>  
>  	/* This allocation should allow future memory freeing. */
>  
> -	if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> -			&& !in_interrupt()) {
> +	if ((((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> +			&& !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
>  		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
>  			/* go through the zonelist yet again, ignoring mins */
>  			for (i = 0; (z = zones[i]) != NULL; i++) {
> diff -up --recursive 2.6.12.3.clean/net/ethernet/eth.c 2.6.12.3/net/ethernet/eth.c
> --- 2.6.12.3.clean/net/ethernet/eth.c	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/net/ethernet/eth.c	2005-08-06 02:32:02.000000000 -0400
> @@ -281,6 +281,7 @@ void ether_setup(struct net_device *dev)
>  	dev->mtu		= 1500; /* eth_mtu */
>  	dev->addr_len		= ETH_ALEN;
>  	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
> +	dev->rx_reserve		= 50;
>  	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
>  	
>  	memset(dev->broadcast,0xFF, ETH_ALEN);
> diff -up --recursive 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 2.6.12.3/net/ipv4/tcp_ipv4.c
> --- 2.6.12.3.clean/net/ipv4/tcp_ipv4.c	2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/net/ipv4/tcp_ipv4.c	2005-08-06 00:45:07.000000000 -0400
> @@ -1766,6 +1766,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
>  	if (!sk)
>  		goto no_tcp_socket;
>  
> +	if (skb->dev->rx_reserve_used) {
> +		skb->dev->rx_reserve_used--; // racy

if its racy, use atomic_t or somesuch :)

	Jeff



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2005-08-06 16:07 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-08-06  7:22 Daniel Phillips
2005-08-06 16:07 ` Jeff Garzik [this message]
2005-08-06 17:46   ` Daniel Phillips

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050806160718.GB17136@havoc.gtf.org \
    --to=jgarzik@pobox.com \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=phillips@istop.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox