diff -ru linux-2.4.0-test10.kio.02/drivers/char/raw.c linux-2.4.0-test10.kio.03/drivers/char/raw.c --- linux-2.4.0-test10.kio.02/drivers/char/raw.c Thu Nov 2 12:00:35 2000 +++ linux-2.4.0-test10.kio.03/drivers/char/raw.c Thu Nov 2 12:08:54 2000 @@ -326,6 +326,9 @@ size -= iobuf->retval; buf += iobuf->retval; } + + if (rw == READ) + mark_dirty_kiovec(&iobuf, 1, iobuf->retval); unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */ diff -ru linux-2.4.0-test10.kio.02/fs/iobuf.c linux-2.4.0-test10.kio.03/fs/iobuf.c --- linux-2.4.0-test10.kio.02/fs/iobuf.c Thu Nov 2 12:06:59 2000 +++ linux-2.4.0-test10.kio.03/fs/iobuf.c Thu Nov 2 12:08:54 2000 @@ -128,6 +128,7 @@ } + /* * Unmap all of the pages referenced by a kiobuf. We release the pages, * and unlock them if they were locked. @@ -137,6 +138,10 @@ { int i; struct page *map; + + if (iobuf->map_ops && iobuf->map_ops->unmap) + iobuf->map_ops->unmap(iobuf); + iobuf->map_ops = NULL; for (i = 0; i < iobuf->nr_pages; i++) { map = iobuf->maplist[i]; @@ -151,6 +156,38 @@ iobuf->locked = 0; } +void unmap_kiovec (struct kiobuf **iovec, int nr) +{ + for (; nr > 0; --nr) + unmap_kiobuf(*iovec++); +} + +/* Mark all kiobufs in a vector as dirty, propagating dirty bits to all + pages in the vector. "bytes" indicates how much of the data has been + modified. A value less than zero means "everything". */ + +int mark_dirty_kiovec(struct kiobuf **iovec, int nr, int bytes) +{ + struct kiobuf *iobuf; + int buf_bytes; + int rc, err = 0; + + for (; nr > 0; --nr) { + iobuf = *iovec++; + buf_bytes = bytes; + if (buf_bytes < 0 || buf_bytes > iobuf->length) + buf_bytes = iobuf->length; + rc = iobuf->map_ops->mark_dirty(iobuf, bytes); + if (rc && !err) + err = rc; + if (bytes >= 0) + bytes -= buf_bytes; + if (!bytes) + break; + } + return err; +} + /* * Lock down all of the pages of a kiovec for IO. @@ -253,5 +290,4 @@ } return 0; } - diff -ru linux-2.4.0-test10.kio.02/include/linux/iobuf.h linux-2.4.0-test10.kio.03/include/linux/iobuf.h --- linux-2.4.0-test10.kio.02/include/linux/iobuf.h Thu Nov 2 12:07:27 2000 +++ linux-2.4.0-test10.kio.03/include/linux/iobuf.h Thu Nov 2 12:08:54 2000 @@ -29,6 +29,15 @@ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) #define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) +struct kiobuf; + +/* Operations on an established mapping of a kiobuf */ +struct kiobuf_map_operations +{ + int (*unmap) (struct kiobuf *); + int (*mark_dirty) (struct kiobuf *, int); +}; + /* The main kiobuf struct used for all our IO! */ struct kiobuf @@ -60,6 +69,11 @@ void (*end_io) (struct kiobuf *); /* Completion callback */ wait_queue_head_t wait_queue; + + /* Private state for the mapping function: allows arbitrary IO + routines to perform completion appropriately. */ + void * map_private; + struct kiobuf_map_operations * map_ops; }; @@ -80,6 +94,7 @@ void unmap_kiobuf(struct kiobuf *iobuf); int lock_kiovec(int nr, struct kiobuf *iovec[], int wait); int unlock_kiovec(int nr, struct kiobuf *iovec[]); +int mark_dirty_kiovec(struct kiobuf **iovec, int nr, int bytes); /* fs/buffer.c */ diff -ru linux-2.4.0-test10.kio.02/mm/memory.c linux-2.4.0-test10.kio.03/mm/memory.c --- linux-2.4.0-test10.kio.02/mm/memory.c Thu Nov 2 12:39:16 2000 +++ linux-2.4.0-test10.kio.03/mm/memory.c Thu Nov 2 12:39:54 2000 @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -414,6 +416,84 @@ return page; } + +int discard_iobuf_filp_map(struct kiobuf *iobuf) +{ + int i; + struct file **files = (struct file **) iobuf->map_private; + struct file *filp; + + if (!files) + return 0; + + for (i = 0; i < iobuf->nr_pages; i++) { + filp = *files++; + if (filp != NULL) + fput(filp); + } + + kfree(iobuf->map_private); + iobuf->map_private = NULL; + return 0; +} + +/* On completion of a successful read() into the user buffer for any + * number of bytes, we need to propagate the dirty state into the + * underlying pages. + * + * "bytes == -1" means mark all pages in the iobuf data region dirty. */ + +int mark_user_iobuf_dirty(struct kiobuf *iobuf, int bytes) +{ + int first, last, i; + int err = 0, tmp; + struct file **files = (struct file **) iobuf->map_private; + + if (bytes < 0 || bytes > iobuf->length) + bytes = iobuf->length; + if (!bytes) + return 0; + + first = iobuf->offset >> PAGE_SHIFT; + last = (iobuf->offset + bytes - 1) >> PAGE_SHIFT; + + for (i=first; i<=last; i++) { + struct page *page = iobuf->maplist[i]; + struct file *filp = files[i]; + + if (page->mapping && page->mapping->a_ops->writepage) { + if (!iobuf->locked) + lock_page(page); + tmp = page->mapping->a_ops->writepage(filp, page); + if (!iobuf->locked) + UnlockPage(page); + if (tmp && !err) + err = tmp; + } else { +#if 0 /* Need to educate the VM elsewhere about this! At the very least: + __free_pages_ok: need to recognise dirty pages as valid + (or clean them in zap_page_range) + vmscan.c: flush dirty swap-cache pages to swap on + eviction */ + SetPageDirty(page); +#endif + } + } + return err; +} + +/* For kiobufs, "read" means "read into memory" and hence involves + * dirtying the pages concerned. */ + +struct kiobuf_map_operations usermap_read_ops = { + unmap: discard_iobuf_filp_map, + mark_dirty: mark_user_iobuf_dirty +}; + +struct kiobuf_map_operations usermap_write_ops = {}; + + + /* * Force in an entire range of pages from the current process's user VA, * and pin them in physical memory. @@ -423,7 +503,7 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) { unsigned long ptr, end; - int err; + int err, nrpages; struct mm_struct * mm; struct vm_area_struct * vma = 0; struct page * map; @@ -434,15 +514,34 @@ if (iobuf->nr_pages) return -EINVAL; + /* If it is already mapped, we have a big problem! */ + if (iobuf->map_ops != NULL) + BUG(); + mm = current->mm; dprintk ("map_user_kiobuf: begin\n"); ptr = va & PAGE_MASK; end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; - err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); + nrpages = (end - ptr) >> PAGE_SHIFT; + err = expand_kiobuf(iobuf, nrpages); if (err) return err; + /* For writable mappings, we need to support marking the pages + dirty later on, and that in turn requires storing the filps + associated with the vma being scanned. */ + if (datain) { + iobuf->map_private = kmalloc(sizeof(void *) * nrpages, GFP_KERNEL); + if (!iobuf->map_private) + return -ENOMEM; + memset(iobuf->map_private, 0, sizeof(void *) * nrpages); + + iobuf->map_ops = &usermap_read_ops; + } + else + iobuf->map_ops = &usermap_write_ops; + down(&mm->mmap_sem); err = -EFAULT; @@ -491,9 +590,20 @@ printk (KERN_INFO "Mapped page missing [%d]\n", i); spin_unlock(&mm->page_table_lock); iobuf->maplist[i] = map; - iobuf->nr_pages = ++i; ptr += PAGE_SIZE; + + /* The page is pinned and the mm is locked, so we can + safely lookup the filp for this page's writeback now + if we need to. */ + if (datain) { + struct file *filp = vma->vm_file; + if (filp) + get_file(filp); + ((struct file **)iobuf->map_private)[i] = filp; + } + + iobuf->nr_pages = ++i; } up(&mm->mmap_sem); @@ -503,6 +613,10 @@ out_unlock: up(&mm->mmap_sem); unmap_kiobuf(iobuf); + if (datain) + discard_iobuf_filp_map(iobuf); + iobuf->map_ops = NULL; + dprintk ("map_user_kiobuf: end %d\n", err); return err; }