Re: Non-GPL export of invalidate_mmap_range

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: "Paul E. McKenney" <paulmck@us.ibm.com>
To: Daniel Phillips <phillips@arcor.de>
Cc: "Stephen C. Tweedie" <sct@redhat.com>,
	Andrew Morton <akpm@osdl.org>,
	Christoph Hellwig <hch@infradead.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	linux-mm <linux-mm@kvack.org>
Subject: Re: Non-GPL export of invalidate_mmap_range
Date: Fri, 20 Feb 2004 04:02:55 -0800	[thread overview]
Message-ID: <20040220120255.GA1269@us.ibm.com> (raw)
In-Reply-To: <200402200007.25832.phillips@arcor.de>

On Fri, Feb 20, 2004 at 12:07:25AM -0500, Daniel Phillips wrote:
> On Thursday 19 February 2004 14:47, Paul E. McKenney wrote:
> > OK, I surrender.  I got some private email agreeing with this
> > viewpoint.  Any dissenters, speak soon, or...
> 
> An implementation is going to look something like the patch below. 
> Unfortunately I don't think there is a way around passing an extra parameter
> all the way down the unmap call chain.  Doubly unfortunately, this doesn't
> give any benefit at all to anybody who doesn't use a clustered filesystem
> (which is nearly everybody) while there is a marginal cost.  Do you know a
> better way?  Anyway, this is the price of correct MAP_PRIVATE semantics for
> clustered filesystems.  At least I have quantified it so we can decide if it's
> worth it.  (My opinion: correctness is always worth it.)

"My work is done!"  ;-)

Almost, anyway.  A few comments interspersed.  This would be in
addition to invalidate_mmap_range-non-gpl-export.patch, right?

I cannot think of any reasonable alternative to passing the parameter
down either, as it certainly does not be reasonable to duplicate the
code...

						Thanx, Paul

> Regards,
> 
> Daniel
> 
> --- 2.6.3.clean/include/linux/mm.h	2004-02-17 22:57:13.000000000 -0500
> +++ 2.6.3/include/linux/mm.h	2004-02-19 23:18:08.000000000 -0500
> @@ -434,9 +434,7 @@
>  			unsigned long size);
>  int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
>  		struct vm_area_struct *start_vma, unsigned long start_addr,
> -		unsigned long end_addr, unsigned long *nr_accounted);
> -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> -			unsigned long address, unsigned long size);
> +		unsigned long end_addr, unsigned long *nr_accounted, int zap);

How about something like "private_too" instead of "zap"?

(Ah!  unmap_page_range() converted to static, since it is used only
in memory.c.)

>  void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr);
>  int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
>  			struct vm_area_struct *vma);
> @@ -444,8 +442,7 @@
>  			unsigned long size, pgprot_t prot);
>  
>  extern void invalidate_mmap_range(struct address_space *mapping,
> -				  loff_t const holebegin,
> -				  loff_t const holelen);
> +			  loff_t const holebegin,  loff_t const holelen, int zap);
>  extern int vmtruncate(struct inode * inode, loff_t offset);
>  extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
>  extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
> --- 2.6.3.clean/mm/memory.c	2004-02-17 22:57:47.000000000 -0500
> +++ 2.6.3/mm/memory.c	2004-02-19 23:48:23.000000000 -0500
> @@ -386,7 +386,7 @@
>  
>  static void
>  zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd,
> -		unsigned long address, unsigned long size)
> +		unsigned long address, unsigned long size, int zap)
>  {
>  	unsigned long offset;
>  	pte_t *ptep;
> @@ -414,7 +414,7 @@
>  			tlb_remove_tlb_entry(tlb, ptep, address+offset);
>  			if (pfn_valid(pfn)) {
>  				struct page *page = pfn_to_page(pfn);
> -				if (!PageReserved(page)) {
> +				if (!PageReserved(page) && (zap || (page->mapping && !PageSwapCache(page)))) {

Longish line...

>  					if (pte_dirty(pte))
>  						set_page_dirty(page);
>  					if (page->mapping && pte_young(pte) &&
> @@ -436,7 +436,7 @@
>  
>  static void
>  zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir,
> -		unsigned long address, unsigned long size)
> +		unsigned long address, unsigned long size, int zap)
>  {
>  	pmd_t * pmd;
>  	unsigned long end;
> @@ -453,14 +453,14 @@
>  	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
>  		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
>  	do {
> -		zap_pte_range(tlb, pmd, address, end - address);
> -		address = (address + PMD_SIZE) & PMD_MASK; 
> +		zap_pte_range(tlb, pmd, address, end - address, zap);
> +		address = (address + PMD_SIZE) & PMD_MASK;
>  		pmd++;
>  	} while (address < end);
>  }
>  
> -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> -			unsigned long address, unsigned long end)
> +static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> +			unsigned long address, unsigned long end, int zap)
>  {
>  	pgd_t * dir;
>  
> @@ -474,7 +474,7 @@
>  	dir = pgd_offset(vma->vm_mm, address);
>  	tlb_start_vma(tlb, vma);
>  	do {
> -		zap_pmd_range(tlb, dir, address, end - address);
> +		zap_pmd_range(tlb, dir, address, end - address, zap);
>  		address = (address + PGDIR_SIZE) & PGDIR_MASK;
>  		dir++;
>  	} while (address && (address < end));
> @@ -524,7 +524,7 @@
>   */
>  int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
>  		struct vm_area_struct *vma, unsigned long start_addr,
> -		unsigned long end_addr, unsigned long *nr_accounted)
> +		unsigned long end_addr, unsigned long *nr_accounted, int zap)
>  {
>  	unsigned long zap_bytes = ZAP_BLOCK_SIZE;
>  	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
> @@ -568,7 +568,7 @@
>  				tlb_start_valid = 1;
>  			}
>  
> -			unmap_page_range(*tlbp, vma, start, start + block);
> +			unmap_page_range(*tlbp, vma, start, start + block, zap);
>  			start += block;
>  			zap_bytes -= block;
>  			if ((long)zap_bytes > 0)
> @@ -594,8 +594,8 @@
>   * @address: starting address of pages to zap
>   * @size: number of bytes to zap
>   */
> -void zap_page_range(struct vm_area_struct *vma,
> -			unsigned long address, unsigned long size)
> +void invalidate_page_range(struct vm_area_struct *vma,

Would it be useful for this to be inline?  (Wouldn't seem so,
zapping mappings has enough overhead that an extra level of
function call should be deep down in the noise...)

> +			unsigned long address, unsigned long size, int zap)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	struct mmu_gather *tlb;
> @@ -612,11 +612,17 @@
>  	lru_add_drain();
>  	spin_lock(&mm->page_table_lock);
>  	tlb = tlb_gather_mmu(mm, 0);
> -	unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted);
> +	unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, zap);
>  	tlb_finish_mmu(tlb, address, end);
>  	spin_unlock(&mm->page_table_lock);
>  }
>  
> +void zap_page_range(struct vm_area_struct *vma,
> +			unsigned long address, unsigned long size)
> +{
> +	invalidate_page_range(vma, address, size, 1);
> +}
> +
>  /*
>   * Do a quick page-table lookup for a single page.
>   * mm->page_table_lock must be held.
> @@ -1095,9 +1101,9 @@
>  		    	continue;	/* Mapping disjoint from hole. */
>  		zba = (hba <= vba) ? vba : hba;
>  		zea = (vea <= hea) ? vea : hea;
> -		zap_page_range(vp,
> +		invalidate_page_range(vp,
>  			       ((zba - vba) << PAGE_SHIFT) + vp->vm_start,
> -			       (zea - zba + 1) << PAGE_SHIFT);
> +			       (zea - zba + 1) << PAGE_SHIFT, 1);
>  	}
>  }
>  
> @@ -1116,7 +1122,7 @@
>   * end of the file.
>   */
>  void invalidate_mmap_range(struct address_space *mapping,
> -		      loff_t const holebegin, loff_t const holelen)
> +		      loff_t const holebegin, loff_t const holelen, int zap)
>  {
>  	unsigned long hba = holebegin >> PAGE_SHIFT;
>  	unsigned long hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;

Doesn't the new argument need to be passed down through
invalidate_mmap_range_list()?

> @@ -1156,7 +1162,7 @@
>  	if (inode->i_size < offset)
>  		goto do_expand;
>  	i_size_write(inode, offset);
> -	invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0);
> +	invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
>  	truncate_inode_pages(mapping, offset);
>  	goto out_truncate;
>  
> --- 2.6.3.clean/mm/mmap.c	2004-02-17 22:58:32.000000000 -0500
> +++ 2.6.3/mm/mmap.c	2004-02-19 22:46:01.000000000 -0500
> @@ -1134,7 +1134,7 @@
>  
>  	lru_add_drain();
>  	tlb = tlb_gather_mmu(mm, 0);
> -	unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted);
> +	unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, 1);
>  	vm_unacct_memory(nr_accounted);
>  
>  	if (is_hugepage_only_range(start, end - start))
> @@ -1436,7 +1436,7 @@
>  	flush_cache_mm(mm);
>  	/* Use ~0UL here to ensure all VMAs in the mm are unmapped */
>  	mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
> -					~0UL, &nr_accounted);
> +					~0UL, &nr_accounted, 1);
>  	vm_unacct_memory(nr_accounted);
>  	BUG_ON(mm->map_count);	/* This is just debugging */
>  	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
> 
> 
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

next prev parent reply	other threads:[~2004-02-20 12:02 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-02-16 19:09 Paul E. McKenney
2004-02-17  2:31 ` Andrew Morton
2004-02-17  7:35 ` Christoph Hellwig
2004-02-17 12:40   ` Paul E. McKenney
2004-02-18  0:19     ` Andrew Morton
2004-02-18 12:51       ` Arjan van de Ven
2004-02-18 14:00         ` Paul E. McKenney
2004-02-18 21:10           ` Christoph Hellwig
2004-02-18 15:06             ` Paul E. McKenney
2004-02-18 22:21               ` Christoph Hellwig
2004-02-18 22:51                 ` Andrew Morton
2004-02-18 23:00                   ` Christoph Hellwig
2004-02-18 16:21                     ` Paul E. McKenney
2004-02-18 23:32                     ` Andrew Morton
2004-02-19 12:32                       ` Christoph Hellwig
2004-02-19 18:56                         ` Andrew Morton
2004-02-19 19:01                           ` Christoph Hellwig
2004-02-19 13:04                             ` Paul E. McKenney
2004-02-20  3:17                             ` Anton Blanchard
2004-02-20 21:46                               ` Valdis.Kletnieks
2004-02-19  0:28                     ` Andrew Morton
2004-02-18 18:36                       ` Paul E. McKenney
2004-02-19 12:31                       ` Christoph Hellwig
2004-02-19  9:11                         ` Paul E. McKenney
2004-02-19 18:32                           ` Lars Marowsky-Bree
2004-02-19 18:38                             ` Arjan van de Ven
2004-02-19 19:16                             ` viro
2004-02-19 16:15                               ` Paul E. McKenney
2004-02-19 18:59                         ` Tim Bird
2004-02-20  1:27                       ` David Schwartz
2004-02-19  9:11                   ` David Weinehall
2004-02-19  8:58                     ` Paul E. McKenney
2004-03-04  5:51                       ` Mike Fedyk
2004-02-19 10:29                   ` Lars Marowsky-Bree
2004-02-19  9:00                     ` Paul E. McKenney
2004-02-19 11:11                     ` Arjan van de Ven
2004-02-19 11:53                       ` Lars Marowsky-Bree
2004-02-18 18:04         ` Tim Bird
2004-02-19 20:56       ` Daniel Phillips
2004-02-19 22:06         ` Stephen C. Tweedie
2004-02-19 22:31           ` Daniel Phillips
2004-02-19 16:42             ` Paul E. McKenney
2004-02-20  2:06               ` Daniel Phillips
2004-02-19 19:47                 ` Paul E. McKenney
2004-02-20  5:07                   ` Daniel Phillips
2004-02-20 12:02                     ` Paul E. McKenney [this message]
2004-02-20 20:37                       ` Daniel Phillips
2004-02-20 14:01                         ` Paul E. McKenney
2004-02-20 23:00                           ` Daniel Phillips
2004-02-20 16:17                             ` Paul E. McKenney
2004-02-21  3:19                               ` Daniel Phillips
2004-02-21 19:00                               ` Daniel Phillips
2004-02-22 23:39                                 ` Paul E. McKenney
2004-02-25 21:04                                   ` [RFC] Distributed mmap API Daniel Phillips
2004-02-25 19:12                                     ` Paul E. McKenney
2004-02-25 19:14                                     ` Paul E. McKenney
2004-02-25 22:07                                     ` Andrew Morton
2004-02-25 22:07                                       ` Daniel Phillips
2004-02-25 22:16                                         ` Andrew Morton
2004-02-25 22:46                                           ` Daniel Phillips
2004-03-03  3:00                                       ` Daniel Phillips
2004-03-03  3:15                                         ` Andrew Morton
2004-03-03 13:06                                           ` Daniel Phillips
2004-03-04 18:55                                             ` Paul E. McKenney
2004-02-20 21:17                         ` Non-GPL export of invalidate_mmap_range Christoph Hellwig
2004-02-20 22:16                           ` Daniel Phillips
2004-02-18 12:12     ` Dominik Kubla
2004-02-17 22:22 ` David Weinehall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040220120255.GA1269@us.ibm.com \
    --to=paulmck@us.ibm.com \
    --cc=akpm@osdl.org \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=phillips@arcor.de \
    --cc=sct@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox