From: Jeff Garzik <jgarzik@pobox.com>
To: Daniel Phillips <phillips@istop.com>
Cc: netdev@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC] Net vm deadlock fix (take two)
Date: Sat, 6 Aug 2005 12:07:18 -0400 [thread overview]
Message-ID: <20050806160718.GB17136@havoc.gtf.org> (raw)
In-Reply-To: <200508061722.24106.phillips@istop.com>
On Sat, Aug 06, 2005 at 05:22:23PM +1000, Daniel Phillips wrote:
> Daniel
>
> diff -up --recursive 2.6.12.3.clean/include/linux/gfp.h 2.6.12.3/include/linux/gfp.h
> --- 2.6.12.3.clean/include/linux/gfp.h 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/linux/gfp.h 2005-08-05 21:53:09.000000000 -0400
> @@ -39,6 +39,7 @@ struct vm_area_struct;
> #define __GFP_COMP 0x4000u /* Add compound page metadata */
> #define __GFP_ZERO 0x8000u /* Return zeroed page on success */
> #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
> +#define __GFP_MEMALLOC 0x20000u /* Use emergency reserves */
>
> #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
> #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
> diff -up --recursive 2.6.12.3.clean/include/linux/netdevice.h 2.6.12.3/include/linux/netdevice.h
> --- 2.6.12.3.clean/include/linux/netdevice.h 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/linux/netdevice.h 2005-08-06 01:06:18.000000000 -0400
> @@ -371,6 +371,8 @@ struct net_device
> struct Qdisc *qdisc_ingress;
> struct list_head qdisc_list;
> unsigned long tx_queue_len; /* Max frames per queue allowed */
> + int rx_reserve;
> + int rx_reserve_used;
>
> /* ingress path synchronizer */
> spinlock_t ingress_lock;
> @@ -929,6 +931,28 @@ extern void net_disable_timestamp(void)
> extern char *net_sysctl_strdup(const char *s);
> #endif
>
> +static inline struct sk_buff *__dev_memalloc_skb(struct net_device *dev,
> + unsigned length, int gfp_mask)
> +{
> + struct sk_buff *skb = __dev_alloc_skb(length, gfp_mask);
> + if (skb)
> + goto done;
> + if (dev->rx_reserve_used >= dev->rx_reserve)
> + return NULL;
> + if (!__dev_alloc_skb(length, gfp_mask|__GFP_MEMALLOC))
> + return NULL;;
> + dev->rx_reserve_used++;
why bother with rx_reserve at all? Why not just let the second
allocation fail, without the rx_reserve_used test?
Additionally, I think the rx_reserve_used accounting is wrong, since I
could simply free the skb -- but doing so would cause a rx_reserve_used
leak in your code, since you only decrement the counter in the TCP IPv4
path.
> +done:
> + skb->dev = dev;
> + return skb;
> +}
> +
> +static inline struct sk_buff *dev_alloc_skb_reserve(struct net_device *dev,
> + unsigned length)
> +{
> + return __dev_memalloc_skb(dev, length, GFP_ATOMIC);
> +}
unused function
> +
> #endif /* __KERNEL__ */
>
> #endif /* _LINUX_DEV_H */
> diff -up --recursive 2.6.12.3.clean/include/net/sock.h 2.6.12.3/include/net/sock.h
> --- 2.6.12.3.clean/include/net/sock.h 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/net/sock.h 2005-08-05 21:53:09.000000000 -0400
> @@ -382,6 +382,7 @@ enum sock_flags {
> SOCK_NO_LARGESEND, /* whether to sent large segments or not */
> SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
> SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
> + SOCK_MEMALLOC, /* protocol can use memalloc reserve */
> };
>
> static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
> @@ -399,6 +400,11 @@ static inline int sock_flag(struct sock
> return test_bit(flag, &sk->sk_flags);
> }
>
> +static inline int is_memalloc_sock(struct sock *sk)
> +{
> + return sock_flag(sk, SOCK_MEMALLOC);
> +}
> +
> static inline void sk_acceptq_removed(struct sock *sk)
> {
> sk->sk_ack_backlog--;
> diff -up --recursive 2.6.12.3.clean/mm/page_alloc.c 2.6.12.3/mm/page_alloc.c
> --- 2.6.12.3.clean/mm/page_alloc.c 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/mm/page_alloc.c 2005-08-05 21:53:09.000000000 -0400
> @@ -802,8 +802,8 @@ __alloc_pages(unsigned int __nocast gfp_
>
> /* This allocation should allow future memory freeing. */
>
> - if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> - && !in_interrupt()) {
> + if ((((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> + && !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
> if (!(gfp_mask & __GFP_NOMEMALLOC)) {
> /* go through the zonelist yet again, ignoring mins */
> for (i = 0; (z = zones[i]) != NULL; i++) {
> diff -up --recursive 2.6.12.3.clean/net/ethernet/eth.c 2.6.12.3/net/ethernet/eth.c
> --- 2.6.12.3.clean/net/ethernet/eth.c 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/net/ethernet/eth.c 2005-08-06 02:32:02.000000000 -0400
> @@ -281,6 +281,7 @@ void ether_setup(struct net_device *dev)
> dev->mtu = 1500; /* eth_mtu */
> dev->addr_len = ETH_ALEN;
> dev->tx_queue_len = 1000; /* Ethernet wants good queues */
> + dev->rx_reserve = 50;
> dev->flags = IFF_BROADCAST|IFF_MULTICAST;
>
> memset(dev->broadcast,0xFF, ETH_ALEN);
> diff -up --recursive 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 2.6.12.3/net/ipv4/tcp_ipv4.c
> --- 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/net/ipv4/tcp_ipv4.c 2005-08-06 00:45:07.000000000 -0400
> @@ -1766,6 +1766,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
> if (!sk)
> goto no_tcp_socket;
>
> + if (skb->dev->rx_reserve_used) {
> + skb->dev->rx_reserve_used--; // racy
if its racy, use atomic_t or somesuch :)
Jeff
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2005-08-06 16:07 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-08-06 7:22 Daniel Phillips
2005-08-06 16:07 ` Jeff Garzik [this message]
2005-08-06 17:46 ` Daniel Phillips
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050806160718.GB17136@havoc.gtf.org \
--to=jgarzik@pobox.com \
--cc=linux-mm@kvack.org \
--cc=netdev@vger.kernel.org \
--cc=phillips@istop.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox