linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [-mm PATCH]  Memory controller improve user interface
@ 2007-08-29 11:10 Balbir Singh
  2007-08-29 15:28 ` Paul Menage
  2007-08-29 18:45 ` Dave Hansen
  0 siblings, 2 replies; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 11:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel Mailing List, Linux Containers, Paul Menage,
	Linux MM Mailing List, David Rientjes, Balbir Singh


Change the interface to use kilobytes instead of pages. Page sizes can vary
across platforms and configurations. A new strategy routine has been added
to the resource counters infrastructure to format the data as desired.

Suggested by David Rientjes, Andrew Morton and Herbert Poetzl

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---

 Documentation/controllers/memory.txt |    7 +++--
 include/linux/res_counter.h          |    6 ++--
 kernel/res_counter.c                 |   24 +++++++++++++----
 mm/memcontrol.c                      |   47 +++++++++++++++++++++++++++--------
 4 files changed, 64 insertions(+), 20 deletions(-)

diff -puN mm/memcontrol.c~mem-control-make-ui-use-kilobytes mm/memcontrol.c
--- linux-2.6.23-rc3/mm/memcontrol.c~mem-control-make-ui-use-kilobytes	2007-08-28 13:20:44.000000000 +0530
+++ linux-2.6.23-rc3-balbir/mm/memcontrol.c	2007-08-29 14:36:07.000000000 +0530
@@ -32,6 +32,7 @@
 
 struct container_subsys mem_container_subsys;
 static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
+static const int MEM_CONTAINER_CHARGE_KB = (PAGE_SIZE >> 10);
 
 /*
  * The memory controller data structure. The memory controller controls both
@@ -312,7 +313,7 @@ int mem_container_charge(struct page *pa
 	 * If we created the page_container, we should free it on exceeding
 	 * the container limit.
 	 */
-	while (res_counter_charge(&mem->res, 1)) {
+	while (res_counter_charge(&mem->res, MEM_CONTAINER_CHARGE_KB)) {
 		if (try_to_free_mem_container_pages(mem))
 			continue;
 
@@ -352,7 +353,7 @@ int mem_container_charge(struct page *pa
 		kfree(pc);
 		pc = race_pc;
 		atomic_inc(&pc->ref_cnt);
-		res_counter_uncharge(&mem->res, 1);
+		res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
 		css_put(&mem->css);
 		goto done;
 	}
@@ -417,7 +418,7 @@ void mem_container_uncharge(struct page_
 		css_put(&mem->css);
 		page_assign_page_container(page, NULL);
 		unlock_page_container(page);
-		res_counter_uncharge(&mem->res, 1);
+		res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
 
  		spin_lock_irqsave(&mem->lru_lock, flags);
  		list_del_init(&pc->lru);
@@ -426,12 +427,37 @@ void mem_container_uncharge(struct page_
 	}
 }
 
-static ssize_t mem_container_read(struct container *cont, struct cftype *cft,
-			struct file *file, char __user *userbuf, size_t nbytes,
-			loff_t *ppos)
+int mem_container_read_strategy(unsigned long val, char *buf)
+{
+	return sprintf(buf, "%lu (kB)\n", val);
+}
+
+int mem_container_write_strategy(char *buf, unsigned long *tmp)
+{
+	*tmp = memparse(buf, &buf);
+	if (*buf != '\0')
+		return -EINVAL;
+
+	*tmp = *tmp >> 10;		/* convert to kilobytes */
+	return 0;
+}
+
+static ssize_t mem_container_read_usage(struct container *cont,
+			struct cftype *cft, struct file *file,
+			char __user *userbuf, size_t nbytes, loff_t *ppos)
+{
+	return res_counter_read(&mem_container_from_cont(cont)->res,
+				cft->private, userbuf, nbytes, ppos,
+				mem_container_read_strategy);
+}
+
+static ssize_t mem_container_read(struct container *cont,
+			struct cftype *cft, struct file *file,
+			char __user *userbuf, size_t nbytes, loff_t *ppos)
 {
 	return res_counter_read(&mem_container_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos);
+				cft->private, userbuf, nbytes, ppos,
+				NULL);
 }
 
 static ssize_t mem_container_write(struct container *cont, struct cftype *cft,
@@ -439,7 +465,8 @@ static ssize_t mem_container_write(struc
 				size_t nbytes, loff_t *ppos)
 {
 	return res_counter_write(&mem_container_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos);
+				cft->private, userbuf, nbytes, ppos,
+				mem_container_write_strategy);
 }
 
 static ssize_t mem_control_type_write(struct container *cont,
@@ -500,13 +527,13 @@ static struct cftype mem_container_files
 	{
 		.name = "usage",
 		.private = RES_USAGE,
-		.read = mem_container_read,
+		.read = mem_container_read_usage,
 	},
 	{
 		.name = "limit",
 		.private = RES_LIMIT,
 		.write = mem_container_write,
-		.read = mem_container_read,
+		.read = mem_container_read_usage,
 	},
 	{
 		.name = "failcnt",
diff -puN include/linux/memcontrol.h~mem-control-make-ui-use-kilobytes include/linux/memcontrol.h
diff -puN include/linux/res_counter.h~mem-control-make-ui-use-kilobytes include/linux/res_counter.h
--- linux-2.6.23-rc3/include/linux/res_counter.h~mem-control-make-ui-use-kilobytes	2007-08-28 13:21:36.000000000 +0530
+++ linux-2.6.23-rc3-balbir/include/linux/res_counter.h	2007-08-29 00:52:45.000000000 +0530
@@ -52,9 +52,11 @@ struct res_counter {
  */
 
 ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos);
+		const char __user *buf, size_t nbytes, loff_t *pos,
+		int (*read_strategy)(unsigned long val, char *s));
 ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos);
+		const char __user *buf, size_t nbytes, loff_t *pos,
+		int (*write_strategy)(char *buf, unsigned long *val));
 
 /*
  * the field descriptors. one for each member of res_counter
diff -puN kernel/res_counter.c~mem-control-make-ui-use-kilobytes kernel/res_counter.c
--- linux-2.6.23-rc3/kernel/res_counter.c~mem-control-make-ui-use-kilobytes	2007-08-28 13:21:41.000000000 +0530
+++ linux-2.6.23-rc3-balbir/kernel/res_counter.c	2007-08-29 14:54:04.000000000 +0530
@@ -76,20 +76,25 @@ static inline unsigned long *res_counter
 }
 
 ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos)
+		const char __user *userbuf, size_t nbytes, loff_t *pos,
+		int (*read_strategy)(unsigned long val, char *st_buf))
 {
 	unsigned long *val;
 	char buf[64], *s;
 
 	s = buf;
 	val = res_counter_member(counter, member);
-	s += sprintf(s, "%lu\n", *val);
+	if (read_strategy)
+		s += read_strategy(*val, s);
+	else
+		s += sprintf(s, "%lu\n", *val);
 	return simple_read_from_buffer((void __user *)userbuf, nbytes,
 			pos, buf, s - buf);
 }
 
 ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos)
+		const char __user *userbuf, size_t nbytes, loff_t *pos,
+		int (*write_strategy)(char *st_buf, unsigned long *val))
 {
 	int ret;
 	char *buf, *end;
@@ -106,9 +111,16 @@ ssize_t res_counter_write(struct res_cou
 		goto out_free;
 
 	ret = -EINVAL;
-	tmp = simple_strtoul(buf, &end, 10);
-	if (*end != '\0')
-		goto out_free;
+
+	if (write_strategy) {
+		if (write_strategy(buf, &tmp)) {
+			goto out_free;
+		}
+	} else {
+		tmp = simple_strtoul(buf, &end, 10);
+		if (*end != '\0')
+			goto out_free;
+	}
 
 	val = res_counter_member(counter, member);
 	*val = tmp;
diff -puN Documentation/controllers/memory.txt~mem-control-make-ui-use-kilobytes Documentation/controllers/memory.txt
--- linux-2.6.23-rc3/Documentation/controllers/memory.txt~mem-control-make-ui-use-kilobytes	2007-08-29 14:42:03.000000000 +0530
+++ linux-2.6.23-rc3-balbir/Documentation/controllers/memory.txt	2007-08-29 14:50:42.000000000 +0530
@@ -165,11 +165,14 @@ c. Enable CONFIG_CONTAINER_MEM_CONT
 
 Since now we're in the 0 container,
 We can alter the memory limit:
-# echo -n 6000 > /containers/0/memory.limit
+# echo -n 4000K > /containers/0/memory.limit
+
+NOTE: The interface has now changed to display the usage in kilobytes
+instead of pages
 
 We can check the usage:
 # cat /containers/0/memory.usage
-25
+56 (kB)
 
 The memory.failcnt field gives the number of times that the container limit was
 exceeded.
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 11:10 [-mm PATCH] Memory controller improve user interface Balbir Singh
@ 2007-08-29 15:28 ` Paul Menage
  2007-08-29 16:07   ` Balbir Singh
  2007-08-29 18:45 ` Dave Hansen
  1 sibling, 1 reply; 15+ messages in thread
From: Paul Menage @ 2007-08-29 15:28 UTC (permalink / raw)
  To: Balbir Singh
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux Containers,
	Linux MM Mailing List, David Rientjes

On 8/29/07, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>
> Change the interface to use kilobytes instead of pages. Page sizes can vary
> across platforms and configurations. A new strategy routine has been added
> to the resource counters infrastructure to format the data as desired.
>
> Suggested by David Rientjes, Andrew Morton and Herbert Poetzl
>
> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
> ---
>
>  Documentation/controllers/memory.txt |    7 +++--
>  include/linux/res_counter.h          |    6 ++--
>  kernel/res_counter.c                 |   24 +++++++++++++----
>  mm/memcontrol.c                      |   47 +++++++++++++++++++++++++++--------
>  4 files changed, 64 insertions(+), 20 deletions(-)
>
> diff -puN mm/memcontrol.c~mem-control-make-ui-use-kilobytes mm/memcontrol.c
> --- linux-2.6.23-rc3/mm/memcontrol.c~mem-control-make-ui-use-kilobytes  2007-08-28 13:20:44.000000000 +0530
> +++ linux-2.6.23-rc3-balbir/mm/memcontrol.c     2007-08-29 14:36:07.000000000 +0530
> @@ -32,6 +32,7 @@
>
>  struct container_subsys mem_container_subsys;
>  static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
> +static const int MEM_CONTAINER_CHARGE_KB = (PAGE_SIZE >> 10);
>
>  /*
>   * The memory controller data structure. The memory controller controls both
> @@ -312,7 +313,7 @@ int mem_container_charge(struct page *pa
>          * If we created the page_container, we should free it on exceeding
>          * the container limit.
>          */
> -       while (res_counter_charge(&mem->res, 1)) {
> +       while (res_counter_charge(&mem->res, MEM_CONTAINER_CHARGE_KB)) {
>                 if (try_to_free_mem_container_pages(mem))
>                         continue;
>
> @@ -352,7 +353,7 @@ int mem_container_charge(struct page *pa
>                 kfree(pc);
>                 pc = race_pc;
>                 atomic_inc(&pc->ref_cnt);
> -               res_counter_uncharge(&mem->res, 1);
> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>                 css_put(&mem->css);
>                 goto done;
>         }
> @@ -417,7 +418,7 @@ void mem_container_uncharge(struct page_
>                 css_put(&mem->css);
>                 page_assign_page_container(page, NULL);
>                 unlock_page_container(page);
> -               res_counter_uncharge(&mem->res, 1);
> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>
>                 spin_lock_irqsave(&mem->lru_lock, flags);
>                 list_del_init(&pc->lru);
> @@ -426,12 +427,37 @@ void mem_container_uncharge(struct page_
>         }
>  }
>
> -static ssize_t mem_container_read(struct container *cont, struct cftype *cft,
> -                       struct file *file, char __user *userbuf, size_t nbytes,
> -                       loff_t *ppos)
> +int mem_container_read_strategy(unsigned long val, char *buf)
> +{
> +       return sprintf(buf, "%lu (kB)\n", val);
> +}
> +
> +int mem_container_write_strategy(char *buf, unsigned long *tmp)
> +{
> +       *tmp = memparse(buf, &buf);
> +       if (*buf != '\0')
> +               return -EINVAL;
> +
> +       *tmp = *tmp >> 10;              /* convert to kilobytes */
> +       return 0;
> +}

This seems a bit inconsistent - if you write a value to a limit file,
then the value that you read back is reduced by a factor of 1024?
Having the "(kB)" suffix isn't really a big help to automated
middleware.

I'd still be in favour of just reading/writing 64-bit values
representing bytes - simple, and unambiguous for programmatic use, and
not really any less user-friendly than kilobytes  for manual use
(since the numbers involved are going to be unwieldly for manual use
whether they're in bytes or kB).

Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 15:28 ` Paul Menage
@ 2007-08-29 16:07   ` Balbir Singh
  2007-08-29 16:17     ` Paul Menage
  0 siblings, 1 reply; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 16:07 UTC (permalink / raw)
  To: Paul Menage
  Cc: Linux Containers, Linux MM Mailing List, Andrew Morton,
	Linux Kernel Mailing List, David Rientjes

Paul Menage wrote:
> On 8/29/07, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>> Change the interface to use kilobytes instead of pages. Page sizes can vary
>> across platforms and configurations. A new strategy routine has been added
>> to the resource counters infrastructure to format the data as desired.
>>
>> Suggested by David Rientjes, Andrew Morton and Herbert Poetzl
>>
>> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
>> ---
>>
>>  Documentation/controllers/memory.txt |    7 +++--
>>  include/linux/res_counter.h          |    6 ++--
>>  kernel/res_counter.c                 |   24 +++++++++++++----
>>  mm/memcontrol.c                      |   47 +++++++++++++++++++++++++++--------
>>  4 files changed, 64 insertions(+), 20 deletions(-)
>>
>> diff -puN mm/memcontrol.c~mem-control-make-ui-use-kilobytes mm/memcontrol.c
>> --- linux-2.6.23-rc3/mm/memcontrol.c~mem-control-make-ui-use-kilobytes  2007-08-28 13:20:44.000000000 +0530
>> +++ linux-2.6.23-rc3-balbir/mm/memcontrol.c     2007-08-29 14:36:07.000000000 +0530
>> @@ -32,6 +32,7 @@
>>
>>  struct container_subsys mem_container_subsys;
>>  static const int MEM_CONTAINER_RECLAIM_RETRIES = 5;
>> +static const int MEM_CONTAINER_CHARGE_KB = (PAGE_SIZE >> 10);
>>
>>  /*
>>   * The memory controller data structure. The memory controller controls both
>> @@ -312,7 +313,7 @@ int mem_container_charge(struct page *pa
>>          * If we created the page_container, we should free it on exceeding
>>          * the container limit.
>>          */
>> -       while (res_counter_charge(&mem->res, 1)) {
>> +       while (res_counter_charge(&mem->res, MEM_CONTAINER_CHARGE_KB)) {
>>                 if (try_to_free_mem_container_pages(mem))
>>                         continue;
>>
>> @@ -352,7 +353,7 @@ int mem_container_charge(struct page *pa
>>                 kfree(pc);
>>                 pc = race_pc;
>>                 atomic_inc(&pc->ref_cnt);
>> -               res_counter_uncharge(&mem->res, 1);
>> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>>                 css_put(&mem->css);
>>                 goto done;
>>         }
>> @@ -417,7 +418,7 @@ void mem_container_uncharge(struct page_
>>                 css_put(&mem->css);
>>                 page_assign_page_container(page, NULL);
>>                 unlock_page_container(page);
>> -               res_counter_uncharge(&mem->res, 1);
>> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>>
>>                 spin_lock_irqsave(&mem->lru_lock, flags);
>>                 list_del_init(&pc->lru);
>> @@ -426,12 +427,37 @@ void mem_container_uncharge(struct page_
>>         }
>>  }
>>
>> -static ssize_t mem_container_read(struct container *cont, struct cftype *cft,
>> -                       struct file *file, char __user *userbuf, size_t nbytes,
>> -                       loff_t *ppos)
>> +int mem_container_read_strategy(unsigned long val, char *buf)
>> +{
>> +       return sprintf(buf, "%lu (kB)\n", val);
>> +}
>> +
>> +int mem_container_write_strategy(char *buf, unsigned long *tmp)
>> +{
>> +       *tmp = memparse(buf, &buf);
>> +       if (*buf != '\0')
>> +               return -EINVAL;
>> +
>> +       *tmp = *tmp >> 10;              /* convert to kilobytes */
>> +       return 0;
>> +}
> 
> This seems a bit inconsistent - if you write a value to a limit file,
> then the value that you read back is reduced by a factor of 1024?
> Having the "(kB)" suffix isn't really a big help to automated
> middleware.
> 

Why is that? Is it because you could write 4M and see it show up
as 4096 kilobytes? We'll that can be fixed with another variant
of the memparse() utility.

> I'd still be in favour of just reading/writing 64-bit values
> representing bytes - simple, and unambiguous for programmatic use, and
> not really any less user-friendly than kilobytes  for manual use
> (since the numbers involved are going to be unwieldly for manual use
> whether they're in bytes or kB).
> 

64 bit might be an overkill for 32 bit machines. 32 bit machines with
PAE cannot use 32 bit values, they need 64 bits. I think KiloBytes
is an acceptable metric these days, everybody understands them.

> Paul
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 16:07   ` Balbir Singh
@ 2007-08-29 16:17     ` Paul Menage
  0 siblings, 0 replies; 15+ messages in thread
From: Paul Menage @ 2007-08-29 16:17 UTC (permalink / raw)
  To: balbir
  Cc: Linux Containers, Linux MM Mailing List, Andrew Morton,
	Linux Kernel Mailing List, David Rientjes

On 8/29/07, Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> >
> > This seems a bit inconsistent - if you write a value to a limit file,
> > then the value that you read back is reduced by a factor of 1024?
> > Having the "(kB)" suffix isn't really a big help to automated
> > middleware.
> >
>
> Why is that? Is it because you could write 4M and see it show up
> as 4096 kilobytes? We'll that can be fixed with another variant
> of the memparse() utility.

I was thinking the other way around - you can write 1048576 (i.e. 1MB)
to the file and read back 1024. It just seems to me that it's clearer
if you write X to the file to get X back.

>
> 64 bit might be an overkill for 32 bit machines. 32 bit machines with
> PAE cannot use 32 bit values, they need 64 bits.

How is using a 64-bit value for consistency overkill?

As someone pointed out, 4TB machines probably aren't that far around
the corner (if they're not here already) so even if you use KB rather
than bytes, userspace needs to be using an int64 for this value in
case it ends up running as a 32-bit-compiled app on a 64-bit kernel
with lots of memory.

Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 11:10 [-mm PATCH] Memory controller improve user interface Balbir Singh
  2007-08-29 15:28 ` Paul Menage
@ 2007-08-29 18:45 ` Dave Hansen
  2007-08-29 22:04   ` Balbir Singh
  1 sibling, 1 reply; 15+ messages in thread
From: Dave Hansen @ 2007-08-29 18:45 UTC (permalink / raw)
  To: Balbir Singh
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

On Wed, 2007-08-29 at 16:40 +0530, Balbir Singh wrote:
> 
> 
> @@ -352,7 +353,7 @@ int mem_container_charge(struct page *pa
>                 kfree(pc);
>                 pc = race_pc;
>                 atomic_inc(&pc->ref_cnt);
> -               res_counter_uncharge(&mem->res, 1);
> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>                 css_put(&mem->css);
>                 goto done;
>         } 

Do these changes really need to happen anywhere besides the
user<->kernel boundary?  Why can't internal tracking be in pages?

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 18:45 ` Dave Hansen
@ 2007-08-29 22:04   ` Balbir Singh
  2007-08-29 22:18     ` Dave Hansen
  0 siblings, 1 reply; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 22:04 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

Dave Hansen wrote:
> On Wed, 2007-08-29 at 16:40 +0530, Balbir Singh wrote:
>>
>> @@ -352,7 +353,7 @@ int mem_container_charge(struct page *pa
>>                 kfree(pc);
>>                 pc = race_pc;
>>                 atomic_inc(&pc->ref_cnt);
>> -               res_counter_uncharge(&mem->res, 1);
>> +               res_counter_uncharge(&mem->res, MEM_CONTAINER_CHARGE_KB);
>>                 css_put(&mem->css);
>>                 goto done;
>>         } 
> 
> Do these changes really need to happen anywhere besides the
> user<->kernel boundary?  Why can't internal tracking be in pages?

I've thought about this before. The problem is that a user could
set his limit to 10000 bytes, but would then see the usage and
limit round to the closest page boundary. This can be confusing
to a user.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 22:04   ` Balbir Singh
@ 2007-08-29 22:18     ` Dave Hansen
  2007-08-29 22:20       ` Paul Menage
  2007-08-29 22:27       ` Balbir Singh
  0 siblings, 2 replies; 15+ messages in thread
From: Dave Hansen @ 2007-08-29 22:18 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

On Thu, 2007-08-30 at 03:34 +0530, Balbir Singh wrote:
> I've thought about this before. The problem is that a user could
> set his limit to 10000 bytes, but would then see the usage and
> limit round to the closest page boundary. This can be confusing
> to a user. 

True, but we're lying if we allow a user to set their limit there,
because we can't actually enforce a limit at 8,192 bytes vs 10,000.
They're the same limit as far as the kernel is concerned.

Why not just -EINVAL if the value isn't page-aligned?  There are plenty
of interfaces in the kernel that require userspace to know the page
size, so this shouldn't be too difficult.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 22:18     ` Dave Hansen
@ 2007-08-29 22:20       ` Paul Menage
  2007-08-29 22:25         ` Dave Hansen
  2007-08-29 22:27       ` Balbir Singh
  1 sibling, 1 reply; 15+ messages in thread
From: Paul Menage @ 2007-08-29 22:20 UTC (permalink / raw)
  To: Dave Hansen
  Cc: balbir, Andrew Morton, Linux Kernel Mailing List,
	Linux MM Mailing List, David Rientjes, Linux Containers

On 8/29/07, Dave Hansen <haveblue@us.ibm.com> wrote:
> On Thu, 2007-08-30 at 03:34 +0530, Balbir Singh wrote:
> > I've thought about this before. The problem is that a user could
> > set his limit to 10000 bytes, but would then see the usage and
> > limit round to the closest page boundary. This can be confusing
> > to a user.
>
> True, but we're lying if we allow a user to set their limit there,
> because we can't actually enforce a limit at 8,192 bytes vs 10,000.
> They're the same limit as far as the kernel is concerned.
>
> Why not just -EINVAL if the value isn't page-aligned?  There are plenty
> of interfaces in the kernel that require userspace to know the page
> size, so this shouldn't be too difficult.

I'd argue that having the user's specified limit be truncated to the
page size is less confusing than giving an EINVAL if it's not page
aligned.

Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 22:20       ` Paul Menage
@ 2007-08-29 22:25         ` Dave Hansen
  2007-08-29 22:37           ` Balbir Singh
  0 siblings, 1 reply; 15+ messages in thread
From: Dave Hansen @ 2007-08-29 22:25 UTC (permalink / raw)
  To: Paul Menage
  Cc: balbir, Andrew Morton, Linux Kernel Mailing List,
	Linux MM Mailing List, David Rientjes, Linux Containers

On Wed, 2007-08-29 at 15:20 -0700, Paul Menage wrote:
> 
> I'd argue that having the user's specified limit be truncated to the
> page size is less confusing than giving an EINVAL if it's not page
> aligned.

Do we truncate mmap() values to the nearest page so to not confuse the
user? ;)

Imagine a careful application setting and accounting for limits on a
long-running system.  Might its internal accounting get sufficiently
misaligned from the kernel's after a while to cause a problem?
Truncating values like that would appear reserve significantly less
memory than desired over a long period of time.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 22:18     ` Dave Hansen
  2007-08-29 22:20       ` Paul Menage
@ 2007-08-29 22:27       ` Balbir Singh
  2007-08-29 22:36         ` Dave Hansen
  1 sibling, 1 reply; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 22:27 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

Dave Hansen wrote:
> On Thu, 2007-08-30 at 03:34 +0530, Balbir Singh wrote:
>> I've thought about this before. The problem is that a user could
>> set his limit to 10000 bytes, but would then see the usage and
>> limit round to the closest page boundary. This can be confusing
>> to a user. 
> 
> True, but we're lying if we allow a user to set their limit there,
> because we can't actually enforce a limit at 8,192 bytes vs 10,000.
> They're the same limit as far as the kernel is concerned.
> 
> Why not just -EINVAL if the value isn't page-aligned?  There are plenty
> of interfaces in the kernel that require userspace to know the page
> size, so this shouldn't be too difficult.

True, mmap() is a good example of such an interface for developers, I
am not sure about system admins though.

To quote Andrew
<quote>
Reporting tools could run getpagesize() and do the arithmetic, but we
generally try to avoid exposing PAGE_SIZE, HZ, etc to userspace in this
manner.
</quote>

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 22:27       ` Balbir Singh
@ 2007-08-29 22:36         ` Dave Hansen
  2007-08-29 22:44           ` Balbir Singh
  0 siblings, 1 reply; 15+ messages in thread
From: Dave Hansen @ 2007-08-29 22:36 UTC (permalink / raw)
  To: balbir
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

On Thu, 2007-08-30 at 03:57 +0530, Balbir Singh wrote:
> True, mmap() is a good example of such an interface for developers, I
> am not sure about system admins though.
> 
> To quote Andrew
> <quote>
> Reporting tools could run getpagesize() and do the arithmetic, but we
> generally try to avoid exposing PAGE_SIZE, HZ, etc to userspace in this
> manner.
> </quote>

Well, rounding to PAGE_SIZE exposes PAGE_SIZE as well, just in a
non-intuitive fashion. :)

If we're going to modify what the user specifies, we should probably at
least mandate that writes are only a "suggestion" and users must read
back the value to ensure what actually got committed.

If we're going to round in any direction, shouldn't we round up?  If a
user specifies 4097 bytes and uses two pages, we don't want to complain
when they hit that second page.  

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 22:25         ` Dave Hansen
@ 2007-08-29 22:37           ` Balbir Singh
  2007-08-30  5:38             ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 22:37 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Paul Menage, Andrew Morton, Linux Kernel Mailing List,
	Linux MM Mailing List, David Rientjes, Linux Containers

Dave Hansen wrote:
> On Wed, 2007-08-29 at 15:20 -0700, Paul Menage wrote:
>> I'd argue that having the user's specified limit be truncated to the
>> page size is less confusing than giving an EINVAL if it's not page
>> aligned.
> 
> Do we truncate mmap() values to the nearest page so to not confuse the
> user? ;)
> 

I think rounding to the closest page size is a better option, but
again it can be a bit confusing. I am all for using memparse() to
parse the user input as a specification of the memory limit.

The second question of how to store it internally without truncation/
rounding is something we need to agree upon. We also need to see
how to display the data back to the user.

I chose kilobytes for two reasons

1. Several people recommended it
2. Herbert mentioned that they've moved to that interface and it
   was working fine for them.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

PS: I am going off to the web to search for some CUI/CLI guidelines.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH]  Memory controller improve user interface
  2007-08-29 22:36         ` Dave Hansen
@ 2007-08-29 22:44           ` Balbir Singh
  0 siblings, 0 replies; 15+ messages in thread
From: Balbir Singh @ 2007-08-29 22:44 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage

Dave Hansen wrote:
> On Thu, 2007-08-30 at 03:57 +0530, Balbir Singh wrote:
>> True, mmap() is a good example of such an interface for developers, I
>> am not sure about system admins though.
>>
>> To quote Andrew
>> <quote>
>> Reporting tools could run getpagesize() and do the arithmetic, but we
>> generally try to avoid exposing PAGE_SIZE, HZ, etc to userspace in this
>> manner.
>> </quote>
> 
> Well, rounding to PAGE_SIZE exposes PAGE_SIZE as well, just in a
> non-intuitive fashion. :)
> 

Agreed, but the user might choose to ignore it altogether.

> If we're going to modify what the user specifies, we should probably at
> least mandate that writes are only a "suggestion" and users must read
> back the value to ensure what actually got committed.
> 

Agreed, excellent suggestion!

> If we're going to round in any direction, shouldn't we round up?  If a
> user specifies 4097 bytes and uses two pages, we don't want to complain
> when they hit that second page.  
> 

Absolutely, I used rounding to mean round up, truncation for rounding down.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-29 22:37           ` Balbir Singh
@ 2007-08-30  5:38             ` KAMEZAWA Hiroyuki
  2007-08-30  9:13               ` Balbir Singh
  0 siblings, 1 reply; 15+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-08-30  5:38 UTC (permalink / raw)
  To: balbir
  Cc: Dave Hansen, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage, Andrew Morton

On Thu, 30 Aug 2007 04:07:11 +0530
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> 1. Several people recommended it
> 2. Herbert mentioned that they've moved to that interface and it
>    was working fine for them.
> 

I have no strong opinion. But how about Mega bytes ? (too big ?)
There will be no rounding up/down problem.

-Kame.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [-mm PATCH] Memory controller improve user interface
  2007-08-30  5:38             ` KAMEZAWA Hiroyuki
@ 2007-08-30  9:13               ` Balbir Singh
  0 siblings, 0 replies; 15+ messages in thread
From: Balbir Singh @ 2007-08-30  9:13 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dave Hansen, Linux Kernel Mailing List, Linux MM Mailing List,
	David Rientjes, Linux Containers, Paul Menage, Andrew Morton

KAMEZAWA Hiroyuki wrote:
> On Thu, 30 Aug 2007 04:07:11 +0530
> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
>> 1. Several people recommended it
>> 2. Herbert mentioned that they've moved to that interface and it
>>    was working fine for them.
>>
> 
> I have no strong opinion. But how about Mega bytes ? (too big ?)
> There will be no rounding up/down problem.
> 

Here is what I am thinking, allow the user to input bytes/kilobytes/
megabytes or gigabytes. Store the data internally in kilobytes or
PFN. I prefer kilobytes (no rounding issues), but while implementing
limits we round up to the closest PFN.


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-08-30  9:13 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-29 11:10 [-mm PATCH] Memory controller improve user interface Balbir Singh
2007-08-29 15:28 ` Paul Menage
2007-08-29 16:07   ` Balbir Singh
2007-08-29 16:17     ` Paul Menage
2007-08-29 18:45 ` Dave Hansen
2007-08-29 22:04   ` Balbir Singh
2007-08-29 22:18     ` Dave Hansen
2007-08-29 22:20       ` Paul Menage
2007-08-29 22:25         ` Dave Hansen
2007-08-29 22:37           ` Balbir Singh
2007-08-30  5:38             ` KAMEZAWA Hiroyuki
2007-08-30  9:13               ` Balbir Singh
2007-08-29 22:27       ` Balbir Singh
2007-08-29 22:36         ` Dave Hansen
2007-08-29 22:44           ` Balbir Singh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox