linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC] kmap_permanent for 2.3.25
       [not found] <Pine.LNX.4.10.9911042338540.8880-100000@chiara.csoma.elte.hu>
@ 1999-11-05  2:53 ` Kanoj Sarcar
  0 siblings, 0 replies; only message in thread
From: Kanoj Sarcar @ 1999-11-05  2:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: ebiederm+eric, hans-christoph.rohland, sct, Kanoj Sarcar, linux-mm

> 
> 
> On Thu, 4 Nov 1999, Kanoj Sarcar wrote:
> 
> > Also, I just pulled out an implementation of what you call
> > kmap_permanent that I was working on previously. I am cleaning
> > it up against 2.3.25, and will post it soon. It basically tries
> > to work off the vmalloc space. Since I spent some time on it
> > previously, I think it deserves to at least be reviewed, maybe
> > we can pick some parts of it into yours ...
> 
> yes, definitely post it please.
> 
> Basically i have found two major variants, and i'm not sure which one is
> the more correct. First is to make permanent kmaps global, which either
> means vmalloc, or some dedicated virtual memory area. The problem here are
> SMP flushes and global constraints. Things can be optimized wrt. SMP
> flushes, but not eliminated. I'm more worried about global constraints - i
> dont really know wether we want to restrict the number of currently mapped
> permanent kmaps to 4M, 64M or 256M.
> 
> the other variant - which i implemented - adds per-thread kmaps, which get
> unmapped at schedule time. (but in 99.99% there are no permanent kmaps
> held). The schedule() code is very low, a single offline branch:
> 
> 	if (prev->kmap_count + next->kmap_count)
> 		goto do_kmap_switch;
> 
> anyway, both approaches have pros and cons, we will see. The interface
> should be similar/identical.
> 
> 	Ingo
> 
> 

Okay, here's my version of permanent global kmap implementation. I have
not stress tested it much, other than compiling the kernel after changing
all the kmaps to unconditional kmap_permanent's. 

As soon as you post your version, I will send out a comparative note
on both methods, and we can decide which one we should send to Linus. 
Basically, the kmap_permanent implementation should go in right now,
without having to wait for your PCI64 changes. 

Thanks.

Kanoj


--- /usr/tmp/p_rdiff_a005AH/vmalloc.c	Thu Nov  4 18:28:05 1999
+++ mm/vmalloc.c	Thu Nov  4 17:24:31 1999
@@ -3,15 +3,340 @@
  *
  *  Copyright (C) 1993  Linus Torvalds
  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *  Support for bitmaps and dynamic page mapping added by Kanoj Sarcar
+ *			(kanoj@sgi.com) Nov 1999.
  */
 
 #include <linux/malloc.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
+#include <asm/semaphore.h>
 
 static struct vm_struct * vmlist = NULL;
 
+#define BITSPERBYTE		8
+#define	BITSPERWORD		32
+#define BITSPERLONG		(BITSPERBYTE * sizeof(long))
+#define	WORDMASK		31
+#define BITSTOWORDS(x)		(x >> 5)
+#define rotor(size)		(rotor[0])
+#define updrotor(size, bitnum)	rotor[0] = (bitnum)
+
+static spinlock_t vm_addr_lock = SPIN_LOCK_UNLOCKED;
+static struct semaphore vm_wait_sema;
+static int vm_waiter = 0;
+static unsigned char *freemap, *stalemap;
+static long mapsize;
+static long stalehi, stalelo, freelo;
+static long rotor[1] = { 0 };
+
+/*
+ * Test a bit field of length len in bitmap bp starting at b to be all set.
+ * Return a count of the number of set bits found.
+ */
+static long bftstset(unsigned char *bp, long b, long len)
+{
+	unsigned int mask, w, *wp;
+	unsigned short i, n, flen, sflen;
+	long count;
+
+	wp = (unsigned int *)bp + BITSTOWORDS(b);
+	i = b & WORDMASK;
+
+	for (count = 0, w = *wp >> i, flen = BITSPERWORD - i;
+	     (len > 0) && (w & 1); w = *(++wp), flen = BITSPERWORD) {
+
+		for (mask = -1, i = BITSPERWORD, sflen = flen, n = 0;
+		     (len > 0) && (flen > 0) && (i > 0) && (w & 1);
+		     i >>= 1, mask >>= i) {
+
+			if ((len >= i) && (flen >= i) &&
+			    ((w & mask) == mask)) {
+				n += i;
+				len -= i;
+				flen -= i;
+				w >>= i;
+			}
+		}
+		count += n;
+		if (n < sflen)
+			break;
+	}
+	return (count);
+}
+
+/*
+ * Clear a bit field of length len in bitmap bp starting at b
+ */
+static void bfclr(unsigned char *bp, long b, long len)
+{
+	unsigned int mask, i, j, *wp;
+
+	wp = (unsigned int *)bp + BITSTOWORDS(b);
+	while (len) {
+		i = b & WORDMASK;
+		mask = -1 << i;
+		if (len < (j = BITSPERWORD - i)) {
+			mask &= mask >> (j - len);
+			*wp &= ~mask;
+			return;
+		}
+		else {
+			len -= (BITSPERWORD - i);
+			*wp &= ~mask;
+			wp++;
+			b = 0;
+		}
+	}
+}
+
+/*
+ * Test a bit field of length len in bitmap bp starting at b to be all clear.
+ * Return a count of the number of clear bits found
+ */
+static long bftstclr(unsigned char *bp, long b, long len)
+{
+	unsigned int mask, w, *wp;
+	unsigned short i, n, flen, sflen;
+	long count;
+
+	wp = (unsigned int *)bp + BITSTOWORDS(b);
+	i = b & WORDMASK;
+
+	for (count = 0, w = *wp >> i, flen = BITSPERWORD - i;
+	     (len > 0) && !(w & 1); w = *(++wp), flen = BITSPERWORD) {
+
+		for (mask = -1, i = BITSPERWORD, sflen = flen, n = 0;
+		     (len > 0) && (flen > 0) && (i > 0) && !(w & 1);
+		     i >>= 1, mask >>= i) {
+
+			if ((len >= i) && (flen >= i) &&
+			    ((w | ~mask) == ~mask)) {
+					n += i;
+					len -= i;
+					flen -= i;
+					w >>= i;
+			}
+		}
+		count += n;
+		if (n < sflen)
+			break;
+	}
+	return (count);
+}
+
+/*
+ * Set a bit field of length len in bitmap bp starting at b
+ */
+static void bfset(unsigned char *bp, long b, long len)
+{
+	unsigned int mask, i, j, *wp;
+
+	wp = (unsigned int *)bp + BITSTOWORDS(b);
+	while (len) {
+		i = b & WORDMASK;
+		mask = -1 << i;
+		if (len < (j = BITSPERWORD - i)) {
+			mask &= mask >> (j - len);
+			*wp |= mask;
+			return;
+		}
+		else {
+			len -= (BITSPERWORD - i);
+			*wp |= mask;
+			wp++;
+			b = 0;
+		}
+	}
+}
+
+static void init_kptbl(void)
+{
+	unsigned long addr = VMALLOC_START;
+	unsigned long end = VMALLOC_END;
+	pgd_t * dir = pgd_offset_k(addr);
+
+	do {
+		pmd_t *pmd = pmd_alloc_kernel(dir, addr);
+
+		if (!pmd)
+			panic("init_kptbl out of memory\n");
+		{
+			unsigned long addr2 = addr & ~PGDIR_MASK;
+			unsigned long end2 = addr2 + (end - addr);
+
+			if (end > PGDIR_SIZE)
+				end = PGDIR_SIZE;
+			do {
+				pte_t * pte = pte_alloc_kernel(pmd, addr2);
+
+				if (!pte)
+					panic("init_kptbl out of memory\n");
+				addr2 = (addr2 + PMD_SIZE) & PMD_MASK;
+				pmd++;
+			} while (addr2 < end2);
+		}
+		set_pgdir(addr, *dir);
+		addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (addr && (addr < end));
+	
+}
+
+void vinit(void)
+{
+	long m, size;
+	unsigned char *p;
+
+	if (((VMALLOC_START - VMALLOC_END) % PAGE_SIZE) != 0)
+		panic("VMALLOC_START .. VMALLOC_END not page aligned\n");
+	sema_init(&vm_wait_sema, 0);
+	mapsize = ((VMALLOC_END - VMALLOC_START) / PAGE_SIZE);
+
+	/*
+	 * Lets align the maps to "unsigned long" boundaries so
+	 * that recycle_vm_area goes fast.
+	 */
+	size = (mapsize + BITSPERLONG - 1)/BITSPERLONG;
+	size *= (sizeof (unsigned long));
+	freemap = (unsigned char *)kmalloc(size, GFP_KERNEL);
+	stalemap = (unsigned char *)kmalloc(size, GFP_KERNEL);
+
+	if ((freemap == 0) || (stalemap == 0))
+		panic("can not set up vmalloc maps\n");
+	
+	m = size;
+	p = freemap;
+	while (m--) *p++ = 0xff;
+
+	m = size;
+	p = stalemap;
+	while (m--) *p++ = 0;
+
+	stalehi = -1;
+	freelo = 0;
+	stalelo = mapsize;
+
+	/*
+	 * If there are high memory pages, vmalloc space is small enough
+	 * to be contained in a few page tables. Preallocate the kernel
+	 * page tables for faster/parallel kmap_permanent.
+	 */
+	if (nr_free_highpages)
+		init_kptbl();
+	return;
+} 
+
+static void recycle_vm_area(void)
+{
+	unsigned long *from, *to;
+	long i, tmp, size, first, last;
+
+	first = stalelo / BITSPERLONG;
+	last = (stalehi + BITSPERLONG - 1) / BITSPERLONG;
+	size = last - first;
+	if (size < 0)
+		return;
+	if (stalelo < freelo)
+		freelo = stalelo;
+	from = (unsigned long *)stalemap + first;
+	to = (unsigned long *)freemap + first;
+	for (i = 0; i < size; i++, from++, to++) {
+		if ((tmp = *from))
+			*to |= tmp;
+		*from = 0;
+	}
+	stalehi = -1;
+	stalelo = 0;
+	updrotor(1, freelo);
+	flush_tlb_all();
+	return;
+}
+
+static void * get_vm_addr(unsigned long want)
+{
+	long length, firstbit, bitlen;
+	int reps = 0;
+
+search:
+	firstbit = rotor(want);
+	bitlen = mapsize - firstbit;
+	while (bitlen >= want) {
+		if ((length = bftstset(freemap, firstbit, want)) >= want) {
+			bfclr(freemap, firstbit, want - 1);
+			updrotor(want, firstbit + want);
+			freelo = firstbit + want;
+			return((void *)(VMALLOC_START + 
+						(PAGE_SIZE * firstbit)));
+		}
+		firstbit += length;
+		bitlen -= length;
+		length = bftstclr(freemap, firstbit, bitlen);
+		firstbit += length;
+		bitlen -= length;
+	}
+	if (reps)
+		return(NULL);
+	reps++;
+	recycle_vm_area();
+	goto search;
+}
+
+static void free_vm_addr(void * addr, unsigned long size)
+{
+	long firstbit;
+	unsigned long flags;
+
+	firstbit = ((unsigned long)addr - VMALLOC_START) / PAGE_SIZE;
+	if (firstbit < 0 || firstbit >= mapsize) {
+		printk("Trying to free_vm_addr() bad address (%p)\n", addr);
+		return;
+	}
+	spin_lock_irqsave(&vm_addr_lock, flags);
+	bfset(stalemap, firstbit, size);
+	if (stalehi < firstbit)
+		stalehi = firstbit + size;
+	if (stalelo > firstbit)
+		stalelo = firstbit;
+	if (vm_waiter) {
+		while (vm_waiter--)
+			up(&vm_wait_sema);
+	}
+	spin_unlock_irqrestore(&vm_addr_lock, flags);
+	return;
+}
+
+unsigned long kmap_permanent(struct page * page)
+{
+	unsigned long addr = 0, flags;
+	pgd_t * dir;
+	pmd_t * pmd;
+	pte_t * pte;
+
+	while (addr == 0) {
+		spin_lock_irqsave(&vm_addr_lock, flags);
+		addr = (unsigned long)get_vm_addr(1);
+		if (!addr) {
+			vm_waiter++;
+			spin_unlock_irqrestore(&vm_addr_lock, flags);
+			down(&vm_wait_sema);
+		}
+	}
+	spin_unlock_irqrestore(&vm_addr_lock, flags);
+	dir = pgd_offset_k(addr);
+	pmd = pmd_offset(dir, addr);
+	pte = pte_offset(pmd, addr);
+	set_pte(pte, mk_pte(page, PAGE_KERNEL));
+	return(addr);
+}
+
+void kunmap_permanent(unsigned long addr)
+{
+	free_vm_addr((void *)addr, 1);
+}
+
 static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
 {
 	pte_t * pte;
@@ -82,7 +407,6 @@
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	flush_tlb_all();
 }
 
 static inline int alloc_area_pte(pte_t * pte, unsigned long address, unsigned long size)
@@ -148,27 +472,27 @@
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	flush_tlb_all();
 	return 0;
 }
 
 struct vm_struct * get_vm_area(unsigned long size)
 {
-	unsigned long addr;
+	unsigned long addr, flags;
 	struct vm_struct **p, *tmp, *area;
 
 	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
 	if (!area)
 		return NULL;
-	addr = VMALLOC_START;
+	spin_lock_irqsave(&vm_addr_lock, flags);
+	addr = (unsigned long)get_vm_addr(size/PAGE_SIZE + 1);
+	spin_unlock_irqrestore(&vm_addr_lock, flags);
+	if (!addr) {
+		kfree(area);
+		return NULL;
+	}
 	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
-		if (size + addr < (unsigned long) tmp->addr)
+		if ((unsigned long)tmp->addr > addr)
 			break;
-		addr = tmp->size + (unsigned long) tmp->addr;
-		if (addr > VMALLOC_END-size) {
-			kfree(area);
-			return NULL;
-		}
 	}
 	area->addr = (void *)addr;
 	area->size = size + PAGE_SIZE;
@@ -191,6 +515,7 @@
 		if (tmp->addr == addr) {
 			*p = tmp->next;
 			vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
+			free_vm_addr(addr, tmp->size/PAGE_SIZE);
 			kfree(tmp);
 			return;
 		}
--- /usr/tmp/p_rdiff_a005AQ/main.c	Thu Nov  4 18:28:20 1999
+++ init/main.c	Thu Nov  4 12:28:01 1999
@@ -25,6 +25,7 @@
 #include <linux/hdreg.h>
 #include <linux/iobuf.h>
 #include <linux/bootmem.h>
+#include <linux/vmalloc.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -495,6 +496,7 @@
 #endif
 	mem_init();
 	kmem_cache_sizes_init();
+	vinit();
 #ifdef CONFIG_PROC_FS
 	proc_root_init();
 #endif
--- /usr/tmp/p_rdiff_a005AZ/vmalloc.h	Thu Nov  4 18:28:32 1999
+++ include/linux/vmalloc.h	Thu Nov  4 16:28:08 1999
@@ -13,6 +13,7 @@
 	struct vm_struct * next;
 };
 
+void vinit(void);
 struct vm_struct * get_vm_area(unsigned long size);
 void vfree(void * addr);
 void * vmalloc(unsigned long size);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://humbolt.geo.uu.nl/Linux-MM/

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~1999-11-05  2:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <Pine.LNX.4.10.9911042338540.8880-100000@chiara.csoma.elte.hu>
1999-11-05  2:53 ` [RFC] kmap_permanent for 2.3.25 Kanoj Sarcar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox