linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ray Bryant <raybry@sgi.com>
To: Hirokazu Takahashi <taka@valinux.co.jp>,
	Hugh DIckins <hugh@veritas.com>, Andrew Morton <akpm@osdl.org>,
	Dave Hansen <haveblue@us.ibm.com>,
	Marcello Tosatti <marcello@cyclades.com>
Cc: Ray Bryant <raybry@sgi.com>, Ray Bryant <raybry@austin.rr.com>,
	linux-mm <linux-mm@kvack.org>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [RFC 2.6.11-rc2-mm2 7/7] mm: manual page migration -- sys_page_migrate
Date: Fri, 11 Feb 2005 19:26:20 -0800 (PST)	[thread overview]
Message-ID: <20050212032620.18524.15178.29731@tomahawk.engr.sgi.com> (raw)
In-Reply-To: <20050212032535.18524.12046.26397@tomahawk.engr.sgi.com>

This patch introduces the sys_page_migrate() system call:

sys_page_migrate(pid, va_start, va_end, count, old_nodes, new_nodes);

Its intent is to cause the pages in the range given that are found on
old_nodes[i] to be moved to new_nodes[i].  Count is the the number of
entries in these two arrays of short.

Restrictions and limitations of this version:

(1)  va_start and va_end must be mapped by the same vma.  (The user
     can read /proc/pid/maps to find out the appropriate vma ranges.)
     This could easily be generalized, but has not been done for the
     moment.

(2)  There is no capability or authority checking being done here.
     Any process can migrate any other process.  This will be fixed
     in a future version, once we agree on what the authority model
     should be.

(3)  Eventually, we plan on adding a page_migrate entry to the 
     vm_operations_struct.  The problem is, in general, that only
     the object itself knows how to migrate its pages.  For the
     moment, we are only handling the case of anonymous private
     and memory mapped files, which handles practially all known
     cases, but there are som other cases that are peculiar to
     SN2 hardware that are not handled by the present code (e. g.
     fetch & op storage).  So for now, it is sufficient for us
     to test vma->vm_ops pointer; if this is null we are in the
     anonymoust private case, elsewise we are in the mapped file
     case.  The mapped file case handles mapped files, shared
     anonymouse storage, and shared segments.


Signed-off-by:Ray Bryant <raybry@sgi.com>

Index: linux-2.6.11-rc2-mm2/arch/ia64/kernel/entry.S
===================================================================
--- linux-2.6.11-rc2-mm2.orig/arch/ia64/kernel/entry.S	2005-02-11 08:18:58.000000000 -0800
+++ linux-2.6.11-rc2-mm2/arch/ia64/kernel/entry.S	2005-02-11 16:07:27.000000000 -0800
@@ -1581,6 +1581,6 @@ sys_call_table:
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
-	data8 sys_ni_syscall
+	data8 sys_page_migrate                  // 1279
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
Index: linux-2.6.11-rc2-mm2/mm/mmigrate.c
===================================================================
--- linux-2.6.11-rc2-mm2.orig/mm/mmigrate.c	2005-02-11 16:07:27.000000000 -0800
+++ linux-2.6.11-rc2-mm2/mm/mmigrate.c	2005-02-11 16:10:13.000000000 -0800
@@ -588,6 +588,228 @@ int try_to_migrate_pages(struct list_hea
 	return nr_busy;
 }
 
+static int
+migrate_vma_common(struct list_head *page_list, short *node_map, int count)
+{
+	int pass=0, remains, migrated;
+	struct page *page;
+
+	while(pass<10) {
+
+		remains = try_to_migrate_pages(page_list, node_map);
+
+		if (remains < 0)
+			return remains;
+
+		migrated = 0;
+		if (!list_empty(page_list))
+			list_for_each_entry(page, page_list, lru)
+				migrated++;
+		else {
+			migrated = count;
+			break;
+		}
+
+		pass++;
+
+		migrated = count - migrated;
+
+		/* wait a bit and try again */
+		msleep(10);
+
+	}
+	return migrated;
+}
+
+static int
+migrate_mapped_file_vma(struct task_struct *task, struct mm_struct *mm,
+	              struct vm_area_struct *vma, size_t va_start,
+		      size_t va_end, short *node_map)
+{
+	struct page *page;
+	struct zone *zone;
+	struct address_space *as;
+	int count = 0, nid, ret;
+	LIST_HEAD(page_list);
+	long idx, start_idx, end_idx;
+
+	va_start = va_start & PAGE_MASK;
+	va_end   = va_end   & PAGE_MASK;
+	start_idx = (va_start - vma->vm_start) >> PAGE_SHIFT;
+	end_idx   = (va_end   - vma->vm_start) >> PAGE_SHIFT;
+
+	if (!vma->vm_file || !vma->vm_file->f_mapping)
+		BUG();
+
+	as = vma->vm_file->f_mapping;
+
+	for (idx = start_idx; idx <= end_idx; idx++) {
+		page = find_get_page(as, idx);
+		if (page) {
+			page_cache_release(page);
+
+			if (!page_mapcount(page) && !page->mapping)
+				BUG();
+
+			nid = page_to_nid(page);
+			if (node_map[nid] > 0) {
+				zone = page_zone(page);
+				spin_lock_irq(&zone->lru_lock);
+				if (PageLRU(page) && 
+				    __steal_page_from_lru(zone, page)) {
+					count++;
+					list_add(&page->lru, &page_list);
+				} else 
+					BUG();
+				spin_unlock_irq(&zone->lru_lock);
+			}
+		} 
+	}
+
+	ret = migrate_vma_common(&page_list, node_map, count);
+
+	return ret;
+
+}
+
+static int
+migrate_anon_private_vma(struct task_struct *task, struct mm_struct *mm,
+	              struct vm_area_struct *vma, size_t va_start,
+		      size_t va_end, short *node_map)
+{
+	struct page *page;
+	struct zone *zone;
+	unsigned long vaddr;
+	int count = 0, nid, ret;
+	LIST_HEAD(page_list);
+
+	va_start = va_start & PAGE_MASK;
+	va_end   = va_end   & PAGE_MASK;
+
+	for (vaddr=va_start; vaddr<=va_end; vaddr += PAGE_SIZE) {
+		spin_lock(&mm->page_table_lock);
+		page = follow_page(mm, vaddr, 0);
+		spin_unlock(&mm->page_table_lock);
+		/* 
+		 * follow_page has been observed to return pages with zero 
+		 * mapcount and NULL mapping.  Skip those pages as well
+		 */
+		if (page && page_mapcount(page) && page->mapping) {
+			nid = page_to_nid(page);
+			if (node_map[nid] > 0) {
+				zone = page_zone(page);
+				spin_lock_irq(&zone->lru_lock);
+				if (PageLRU(page) &&
+			     	    __steal_page_from_lru(zone, page)) {
+					count++;
+					list_add(&page->lru, &page_list);
+				} else
+					BUG();
+				spin_unlock_irq(&zone->lru_lock);
+			}
+		}
+	}
+
+	ret = migrate_vma_common(&page_list, node_map, count);
+
+	return ret;
+}
+
+void lru_add_drain_per_cpu(void *info) {
+	lru_add_drain();
+}
+
+asmlinkage long
+sys_page_migrate(const pid_t pid, size_t va_start, size_t va_end,
+		const int count, caddr_t old_nodes, caddr_t new_nodes)
+{
+	int i, ret = 0;
+	short *tmp_old_nodes;
+	short *tmp_new_nodes;
+	short *node_map;
+	struct task_struct *task;
+	struct mm_struct *mm = 0;
+	size_t size = count*sizeof(short);
+	struct vm_area_struct *vma, *vma2;
+
+
+	tmp_old_nodes = (short *) kmalloc(size, GFP_KERNEL);
+	tmp_new_nodes = (short *) kmalloc(size, GFP_KERNEL);
+	node_map = (short *) kmalloc(MAX_NUMNODES*sizeof(short), GFP_KERNEL);
+
+	if (!tmp_old_nodes || !tmp_new_nodes || !node_map) {
+		ret = -ENOMEM;
+		goto out_nodec;
+	}
+
+	if (copy_from_user(tmp_old_nodes, old_nodes, size) || 
+	    copy_from_user(tmp_new_nodes, new_nodes, size)) {
+		ret = -EFAULT;
+		goto out_nodec;
+	}
+
+	read_lock(&tasklist_lock);
+	task = find_task_by_pid(pid);
+	if (task) {
+		task_lock(task);
+		mm = task->mm;
+		if (mm)
+			atomic_inc(&mm->mm_users);
+		task_unlock(task);
+	} else {
+		ret = -ESRCH;
+		goto out_nodec;
+	}
+	read_unlock(&tasklist_lock);
+	if (!mm) {
+		ret = -EINVAL;
+		goto out_nodec;
+	}
+
+	/* 
+	 * for now, we require both the start and end addresses to
+	 * be mapped by the same vma.
+	 */
+	vma = find_vma(mm, va_start);
+	vma2 = find_vma(mm, va_end);
+	if (!vma || !vma2 || (vma != vma2)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* set up the node_map array */
+	for(i=0; i<MAX_NUMNODES; i++)
+		node_map[i] = -1;
+	for(i=0; i<count; i++)
+		node_map[tmp_old_nodes[i]] = tmp_new_nodes[i];
+
+	/* prepare for lru list manipulation */
+ 	smp_call_function(&lru_add_drain_per_cpu, NULL, 0, 1);
+	lru_add_drain();
+
+	/* actually do the migration */
+	if (vma->vm_ops)
+		ret = migrate_mapped_file_vma(task, mm, vma, va_start, va_end,
+			node_map);
+	else
+		ret = migrate_anon_private_vma(task, mm, vma, va_start, va_end,
+			node_map);
+
+out:
+	atomic_dec(&mm->mm_users);
+
+out_nodec:
+	if (tmp_old_nodes)
+		kfree(tmp_old_nodes);
+	if (tmp_new_nodes)
+		kfree(tmp_new_nodes);
+	if (node_map)
+		kfree(node_map);
+
+	return ret;
+
+}
+
 EXPORT_SYMBOL(generic_migrate_page);
 EXPORT_SYMBOL(migrate_page_common);
 EXPORT_SYMBOL(migrate_page_buffer);

-- 
Best Regards,
Ray
-----------------------------------------------
Ray Bryant                       raybry@sgi.com
The box said: "Requires Windows 98 or better",
           so I installed Linux.
-----------------------------------------------
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

  parent reply	other threads:[~2005-02-12  3:26 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-02-12  3:25 [RFC 2.6.11-rc2-mm2 0/7] mm: manual page migration -- overview Ray Bryant
2005-02-12  3:25 ` [RFC 2.6.11-rc2-mm2 1/7] mm: manual page migration -- cleanup 1 Ray Bryant
2005-02-12  3:25 ` [RFC 2.6.11-rc2-mm2 2/7] mm: manual page migration -- cleanup 2 Ray Bryant
2005-02-12  3:25 ` [RFC 2.6.11-rc2-mm2 3/7] mm: manual page migration -- cleanup 3 Ray Bryant
2005-02-12  3:26 ` [RFC 2.6.11-rc2-mm2 4/7] mm: manual page migration -- cleanup 4 Ray Bryant
2005-02-12  3:26 ` [RFC 2.6.11-rc2-mm2 5/7] mm: manual page migration -- cleanup 5 Ray Bryant
2005-02-12  3:26 ` [RFC 2.6.11-rc2-mm2 6/7] mm: manual page migration -- add node_map arg to try_to_migrate_pages() Ray Bryant
2005-02-12  3:26 ` Ray Bryant [this message]
2005-02-12  8:08   ` [RFC 2.6.11-rc2-mm2 7/7] mm: manual page migration -- sys_page_migrate Paul Jackson
2005-02-12 12:34   ` Arjan van de Ven
2005-02-12 14:48     ` Andi Kleen
2005-02-12 20:51       ` Paul Jackson
2005-02-12 21:04   ` Dave Hansen
2005-02-12 21:44     ` Paul Jackson
2005-02-14 13:52     ` Robin Holt
2005-02-14 18:50       ` Dave Hansen
2005-02-14 22:01         ` Robin Holt
2005-02-14 22:22           ` Dave Hansen
2005-02-15 10:50             ` Robin Holt
2005-02-15 15:38               ` Paul Jackson
2005-02-15 18:39               ` Dave Hansen
2005-02-15 18:54                 ` Ray Bryant
2005-02-15 15:49           ` Paul Jackson
2005-02-15 16:21             ` Robin Holt
2005-02-15 16:35               ` Paul Jackson
2005-02-15 18:59                 ` Robin Holt
2005-02-15 20:54                   ` Dave Hansen
2005-02-15 21:58                   ` Peter Chubb
2005-02-15 22:10                     ` Paul Jackson
2005-02-15 22:51                     ` Robin Holt
2005-02-15 23:00                       ` Paul Jackson
2005-02-15 23:21                     ` Ray Bryant
2005-02-15 23:51                       ` Martin J. Bligh
2005-02-16  0:38                         ` Ray Bryant
2005-02-16  0:44                           ` Andi Kleen
2005-02-16  0:54                             ` Martin J. Bligh
2005-02-16 10:02                               ` Andi Kleen
2005-02-16 15:21                                 ` Martin J. Bligh
2005-02-16 15:49                                   ` Paul Jackson
2005-02-16 16:08                                     ` Andi Kleen
2005-02-16 16:55                                       ` Martin J. Bligh
2005-02-16 23:35                                         ` Ray Bryant
2005-02-16  0:50                           ` Martin J. Bligh
2005-02-15 15:40         ` Paul Jackson
2005-02-12 11:17 ` [RFC 2.6.11-rc2-mm2 0/7] mm: manual page migration -- overview Andi Kleen
2005-02-12 12:12   ` Robin Holt
2005-02-14 19:18     ` Andi Kleen
2005-02-15  1:02       ` Steve Longerbeam
2005-02-12 15:54   ` Marcelo Tosatti
2005-02-12 16:18     ` Marcelo Tosatti
2005-02-12 21:29     ` Andi Kleen
2005-02-14 16:38       ` Robin Holt
2005-02-14 19:15         ` Andi Kleen
2005-02-14 23:49           ` Ray Bryant
2005-02-15  3:16             ` Paul Jackson
2005-02-15  9:14               ` Ray Bryant
2005-02-15 15:21                 ` Paul Jackson
2005-02-15  0:29   ` Ray Bryant
2005-02-15 11:05     ` Robin Holt
2005-02-15 17:44       ` Ray Bryant
2005-02-15 11:53     ` Andi Kleen
2005-02-15 12:15       ` Robin Holt
2005-02-15 15:07         ` Paul Jackson
2005-02-15 15:11         ` Paul Jackson
2005-02-15 18:16       ` Ray Bryant
2005-02-15 18:24         ` Andi Kleen
2005-02-15 12:14     ` [RFC 2.6.11-rc2-mm2 0/7] mm: manual page migration -- overview II Andi Kleen
2005-02-15 18:38       ` Ray Bryant
2005-02-15 21:48         ` Andi Kleen
2005-02-15 22:37           ` Paul Jackson
2005-02-16  3:44           ` Ray Bryant
2005-02-17 23:54             ` Andi Kleen
2005-02-18  8:38               ` Ray Bryant
2005-02-18 13:02                 ` Andi Kleen
2005-02-18 16:18                   ` Paul Jackson
2005-02-18 16:20                   ` Paul Jackson
2005-02-18 16:22                   ` Paul Jackson
2005-02-18 16:25                   ` Paul Jackson
2005-02-19  1:01                   ` Ray Bryant
2005-02-20 21:49                     ` Andi Kleen
2005-02-20 22:30                       ` Paul Jackson
2005-02-20 22:35                         ` Andi Kleen
2005-02-21  1:50                           ` Paul Jackson
2005-02-21  7:39                             ` Ray Bryant
2005-02-21  7:29                           ` Ray Bryant
2005-02-21  9:57                             ` Andi Kleen
2005-02-21 12:02                               ` Paul Jackson
2005-02-21  8:42                           ` Ray Bryant
2005-02-21 12:10                             ` Andi Kleen
2005-02-21 17:12                               ` Ray Bryant
2005-02-22 18:03                                 ` Andi Kleen
2005-02-23  3:33                                   ` Ray Bryant
2005-02-22  6:40                               ` Ray Bryant
2005-02-22 18:01                                 ` Andi Kleen
2005-02-22 18:45                                   ` Ray Bryant
2005-02-22 18:49                                     ` Andi Kleen
2005-02-26 18:22                                       ` Ray Bryant
2005-02-22 22:04                                   ` Ray Bryant
2005-02-22  6:44                               ` Ray Bryant
2005-02-21  4:20                       ` Ray Bryant
2005-02-18 16:58               ` Ray Bryant
2005-02-18 17:02               ` Ray Bryant
2005-02-18 17:11               ` Ray Bryant

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050212032620.18524.15178.29731@tomahawk.engr.sgi.com \
    --to=raybry@sgi.com \
    --cc=akpm@osdl.org \
    --cc=haveblue@us.ibm.com \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=marcello@cyclades.com \
    --cc=raybry@austin.rr.com \
    --cc=taka@valinux.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox