linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@osdl.org>
To: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH 1/1] network memory allocator.
Date: Tue, 15 Aug 2006 00:27:24 -0700	[thread overview]
Message-ID: <20060815002724.a635d775.akpm@osdl.org> (raw)
In-Reply-To: <20060814110359.GA27704@2ka.mipt.ru>

On Mon, 14 Aug 2006 15:04:03 +0400
Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:

> 
> Design of allocator allows to map all node's pages into userspace thus
> allows to have true zero-copy support for both sending and receiving
> dataflows.

If the pages can be order-1 or higher then they'll need to be compound
pages so that the kernel gets the page's refcounting correct if the user
tries to perform direct-io against them (get_user_pages()).

And compound pages use ->lru for internal metadata - see prep_compound_page().

> +static avl_t avl_node_id[NR_CPUS];
> +static struct avl_node **avl_node_array[NR_CPUS];
> +static struct list_head *avl_container_array[NR_CPUS];
> +static struct avl_node *avl_root[NR_CPUS];
> +static struct avl_free_list *avl_free_list_head[NR_CPUS];
> +static spinlock_t avl_free_lock[NR_CPUS];

There will be heaps of cacheline pingpong accessing these arrays.  I'd have
though that

static struct whatever {
	avl_t avl_node_id;
	struct avl_node **avl_node_array;
	struct list_head *avl_container_array;
	struct avl_node *avl_root;
	struct avl_free_list *avl_free_list_head;
	spinlock_t avl_free_lock;
} __cacheline_aligned_in_smp whatevers[NR_CPUS];

would be better.

> +typedef unsigned long value_t;
> +typedef u16 avl_t;

Do we really need the typedefs?

If so, these have too generic names for globally-scoped identifiers.

> +static inline struct avl_node *avl_get_node(avl_t id, int cpu)
> +{
> +	avl_t idx, off;
> +
> +	if (id >= AVL_NODE_NUM)
> +		return NULL;
> +	
> +	idx = id/AVL_NODES_ON_PAGE;
> +	off = id%AVL_NODES_ON_PAGE;
> +
> +	return &(avl_node_array[cpu][idx][off]);
> +}

Too big to inline.

> +static inline struct avl_node *avl_get_node_ptr(unsigned long ptr)
> +{
> +	struct page *page = virt_to_page(ptr);
> +	struct avl_node *node = (struct avl_node *)(page->lru.next);
> +
> +	return node;
> +}

Probably OK.

> +static inline void avl_set_node_ptr(unsigned long ptr, struct avl_node *node, int order)
> +{
> +	int nr_pages = 1<<order, i;
> +	struct page *page = virt_to_page(ptr);
> +	
> +	for (i=0; i<nr_pages; ++i) {
> +		page->lru.next = (void *)node;
> +		page++;
> +	}
> +}

Too big

> +static inline int avl_get_cpu_ptr(unsigned long ptr)
> +{
> +	struct page *page = virt_to_page(ptr);
> +	int cpu = (int)(unsigned long)(page->lru.prev);
> +
> +	return cpu;
> +}

Too big

> +static inline void avl_set_cpu_ptr(unsigned long ptr, int cpu, int order)
> +{
> +	int nr_pages = 1<<order, i;
> +	struct page *page = virt_to_page(ptr);
> +			
> +	for (i=0; i<nr_pages; ++i) {
> +		page->lru.prev = (void *)(unsigned long)cpu;
> +		page++;
> +	}
> +}

Too big

> +static inline enum avl_balance avl_compare(struct avl_node *a, struct avl_node *b)
> +{
> +	if (a->value == b->value)
> +		return AVL_BALANCED;
> +	else if (a->value > b->value)
> +		return AVL_RIGHT;
> +	else
> +		return AVL_LEFT;
> +}

Might be too big.

> +static inline void avl_set_left(struct avl_node *node, avl_t val)
> +{
> +	node->left = val;
> +}
> +
> +static inline void avl_set_right(struct avl_node *node, avl_t val)
> +{
> +	node->right = val;
> +}
> +
> +static inline void avl_set_parent(struct avl_node *node, avl_t val)
> +{
> +	node->parent = val;
> +}

Not too big ;)

> +static inline void avl_rotate_complete(struct avl_node *parent, struct avl_node *node, int cpu)
> +{
> +	avl_set_parent(node, parent->parent);
> +	if (parent->parent != AVL_NODE_EMPTY) {
> +		if (parent->pos == AVL_RIGHT)
> +			avl_set_right(avl_get_node(parent->parent, cpu), node->id);
> +		else
> +			avl_set_left(avl_get_node(parent->parent, cpu), node->id);
> +	}
> +	avl_set_parent(parent, node->id);
> +}
> +
> +static inline void avl_ll(struct avl_node *node, int cpu)
> +{
> +	struct avl_node *parent = avl_get_node(node->parent, cpu);
> +	struct avl_node *left = avl_get_node(node->left, cpu);
> +	
> +	avl_rotate_complete(parent, node, cpu);
> +	avl_set_left(node, parent->id);
> +	node->pos = parent->pos;
> +	parent->pos = AVL_LEFT;
> +	if (!left) {
> +		avl_set_right(parent, AVL_NODE_EMPTY);
> +	} else {
> +		avl_set_parent(left, parent->id);
> +		left->pos = AVL_RIGHT;
> +		avl_set_right(parent, left->id);
> +	}
> +}
> +
> +static inline void avl_rr(struct avl_node *node, int cpu)
> +{
> +	struct avl_node *parent = avl_get_node(node->parent, cpu);
> +	struct avl_node *right = avl_get_node(node->right, cpu);
> +
> +	avl_rotate_complete(parent, node, cpu);
> +	avl_set_right(node, parent->id);
> +	node->pos = parent->pos;
> +	parent->pos = AVL_RIGHT;
> +	if (!right)
> +		avl_set_left(parent, AVL_NODE_EMPTY);
> +	else {
> +		avl_set_parent(right, parent->id);
> +		right->pos = AVL_LEFT;
> +		avl_set_left(parent, right->id);
> +	}
> +}
> +
> +static inline void avl_rl_balance(struct avl_node *node, int cpu)
> +{
> +	avl_rr(node, cpu);
> +	avl_ll(node, cpu);
> +}
> +
> +static inline void avl_lr_balance(struct avl_node *node, int cpu)
> +{
> +	avl_ll(node, cpu);
> +	avl_rr(node, cpu);
> +}
> +
> +static inline void avl_balance_single(struct avl_node *node, struct avl_node *parent)
> +{
> +	node->balance = parent->balance = AVL_BALANCED;
> +}
> +
> +static inline void avl_ll_balance(struct avl_node *node, int cpu)
> +{
> +	struct avl_node *parent = avl_get_node(node->parent, cpu);
> +
> +	avl_ll(node, cpu);
> +	avl_balance_single(node, parent);
> +}
> +
> +static inline void avl_rr_balance(struct avl_node *node, int cpu)
> +{
> +	struct avl_node *parent = avl_get_node(node->parent, cpu);
> +
> +	avl_rr(node, cpu);
> +	avl_balance_single(node, parent);
> +}

All way too big.

(Yes, avl_rr_balance() has a single callsite, as does avl_ll_balance(), but
they each have separate copies of the very large avl_rr())

> +	/*
> +	 * NTA steals pages and never return them back to the system.
> +	 */

That's the only code comment in this entire 882-line file?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2006-08-15  7:27 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-14 11:04 Evgeniy Polyakov
2006-08-14 11:22 ` David Miller, Evgeniy Polyakov
2006-08-14 11:32   ` Evgeniy Polyakov
2006-08-14 11:40 ` Andi Kleen
2006-08-14 11:46   ` Evgeniy Polyakov
2006-08-14 12:07 ` Keith Owens
2006-08-14 12:20   ` Evgeniy Polyakov
2006-08-14 17:42     ` Rick Jones
2006-08-14 20:15       ` David Miller, Rick Jones
2006-08-14 12:25 ` Peter Zijlstra
2006-08-14 12:35   ` Evgeniy Polyakov
2006-08-14 12:38     ` Evgeniy Polyakov
2006-08-15 10:55     ` Peter Zijlstra
2006-08-15 11:26       ` Evgeniy Polyakov
2006-08-15 12:03         ` Peter Zijlstra
2006-08-15 12:34           ` Evgeniy Polyakov
2006-08-15 13:49             ` Peter Zijlstra
2006-08-15 14:15               ` Evgeniy Polyakov
2006-08-15 14:48                 ` Peter Zijlstra
2006-08-15 15:05                   ` Evgeniy Polyakov
2006-08-15 15:07                     ` Evgeniy Polyakov
2006-08-15 17:42                     ` Peter Zijlstra
2006-08-15 17:49                       ` Evgeniy Polyakov
2006-08-16  2:52                 ` Bill Fink
2006-08-16  5:38                   ` Evgeniy Polyakov
2006-08-14 17:46 ` Rick Jones
2006-08-14 19:42   ` Evgeniy Polyakov
2006-08-15  7:27 ` Andrew Morton [this message]
2006-08-15  8:08   ` Andi Kleen
2006-08-15 10:02     ` Evgeniy Polyakov
2006-08-15 10:27       ` David Miller, Evgeniy Polyakov
2006-08-15  9:20   ` Evgeniy Polyakov
2006-08-15 20:21 ` Arnd Bergmann
2006-08-16  5:35   ` Evgeniy Polyakov
2006-08-16  8:48     ` Christoph Hellwig
2006-08-16  9:00       ` Evgeniy Polyakov
2006-08-16  9:05         ` David Miller, Evgeniy Polyakov
2006-08-16  9:10           ` Christoph Hellwig
2006-08-16  9:32             ` Evgeniy Polyakov
2006-08-16  9:38               ` Christoph Hellwig
2006-08-16  9:40                 ` David Miller, Christoph Hellwig
2006-08-16  9:44                   ` Christoph Hellwig
2006-08-16  9:42         ` Christoph Hellwig
2006-08-16 11:27         ` Arnd Bergmann
2006-08-16 12:00           ` Evgeniy Polyakov
2006-08-16 12:25       ` Andi Kleen
2006-08-18  2:25         ` Christoph Lameter
2006-08-18  9:29           ` Andi Kleen
2006-08-18  8:51             ` David Miller, Andi Kleen
2006-08-18 17:04             ` Christoph Lameter
2006-08-16  7:51 ` [PATCH2 " Evgeniy Polyakov
2006-08-16 16:57   ` Stephen Hemminger
2006-08-16 19:27     ` Evgeniy Polyakov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060815002724.a635d775.akpm@osdl.org \
    --to=akpm@osdl.org \
    --cc=davem@davemloft.net \
    --cc=johnpol@2ka.mipt.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox