* [PATCH 1/2] hugetlb: region_chg provides only cache entry
2019-09-19 20:04 [PATCH 0/2] Cleanups to hugetlb code Mina Almasry
@ 2019-09-19 20:04 ` Mina Almasry
2019-09-19 20:04 ` [PATCH 2/2] hugetlb: remove duplicated code Mina Almasry
1 sibling, 0 replies; 3+ messages in thread
From: Mina Almasry @ 2019-09-19 20:04 UTC (permalink / raw)
To: mike.kravetz
Cc: almasrymina, rientjes, shakeelb, gthelen, akpm, linux-kernel, linux-mm
Current behavior is that region_chg provides both a cache entry in
resv->region_cache, AND a placeholder entry in resv->regions. region_add
first tries to use the placeholder, and if it finds that the placeholder
has been deleted by a racing region_del call, it uses the cache entry.
This behavior is completely unnecessary and is removed in this patch for
a couple of reasons:
1. region_add needs to either find a cached file_region entry in
resv->region_cache, or find an entry in resv->regions to expand. It
does not need both.
2. region_chg adding a placeholder entry in resv->regions opens up
a possible race with region_del, where region_chg adds a placeholder
region in resv->regions, and this region is deleted by a racing call
to region_del during region_chg execution or before region_add is
called. Removing the race makes the code easier to reason about and
maintain.
In addition, a follow up patch in another series that disables region
coalescing, which would be further complicated if the race with
region_del exists.
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
mm/hugetlb.c | 63 +++++++++-------------------------------------------
1 file changed, 11 insertions(+), 52 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6d7296dd11b8..a14f6047fc7e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -246,14 +246,10 @@ struct file_region {
/*
* Add the huge page range represented by [f, t) to the reserve
- * map. In the normal case, existing regions will be expanded
- * to accommodate the specified range. Sufficient regions should
- * exist for expansion due to the previous call to region_chg
- * with the same range. However, it is possible that region_del
- * could have been called after region_chg and modifed the map
- * in such a way that no region exists to be expanded. In this
- * case, pull a region descriptor from the cache associated with
- * the map and use that for the new range.
+ * map. Existing regions will be expanded to accommodate the specified
+ * range, or a region will be taken from the cache. Sufficient regions
+ * must exist in the cache due to the previous call to region_chg with
+ * the same range.
*
* Return the number of new huge pages added to the map. This
* number is greater than or equal to zero.
@@ -272,9 +268,8 @@ static long region_add(struct resv_map *resv, long f, long t)
/*
* If no region exists which can be expanded to include the
- * specified range, the list must have been modified by an
- * interleving call to region_del(). Pull a region descriptor
- * from the cache and use it for this range.
+ * specified range, pull a region descriptor from the cache
+ * and use it for this range.
*/
if (&rg->link == head || t < rg->from) {
VM_BUG_ON(resv->region_cache_count <= 0);
@@ -339,15 +334,9 @@ static long region_add(struct resv_map *resv, long f, long t)
* call to region_add that will actually modify the reserve
* map to add the specified range [f, t). region_chg does
* not change the number of huge pages represented by the
- * map. However, if the existing regions in the map can not
- * be expanded to represent the new range, a new file_region
- * structure is added to the map as a placeholder. This is
- * so that the subsequent region_add call will have all the
- * regions it needs and will not fail.
- *
- * Upon entry, region_chg will also examine the cache of region descriptors
- * associated with the map. If there are not enough descriptors cached, one
- * will be allocated for the in progress add operation.
+ * map. A new file_region structure is added to the cache
+ * as a placeholder, so that the subsequent region_add
+ * call will have all the regions it needs and will not fail.
*
* Returns the number of huge pages that need to be added to the existing
* reservation map for the range [f, t). This number is greater or equal to
@@ -357,10 +346,9 @@ static long region_add(struct resv_map *resv, long f, long t)
static long region_chg(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
- struct file_region *rg, *nrg = NULL;
+ struct file_region *rg;
long chg = 0;
-retry:
spin_lock(&resv->lock);
retry_locked:
resv->adds_in_progress++;
@@ -378,10 +366,8 @@ static long region_chg(struct resv_map *resv, long f, long t)
spin_unlock(&resv->lock);
trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg) {
- kfree(nrg);
+ if (!trg)
return -ENOMEM;
- }
spin_lock(&resv->lock);
list_add(&trg->link, &resv->region_cache);
@@ -394,28 +380,6 @@ static long region_chg(struct resv_map *resv, long f, long t)
if (f <= rg->to)
break;
- /* If we are below the current region then a new region is required.
- * Subtle, allocate a new region at the position but make it zero
- * size such that we can guarantee to record the reservation. */
- if (&rg->link == head || t < rg->from) {
- if (!nrg) {
- resv->adds_in_progress--;
- spin_unlock(&resv->lock);
- nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
- if (!nrg)
- return -ENOMEM;
-
- nrg->from = f;
- nrg->to = f;
- INIT_LIST_HEAD(&nrg->link);
- goto retry;
- }
-
- list_add(&nrg->link, rg->link.prev);
- chg = t - f;
- goto out_nrg;
- }
-
/* Round our left edge to the current segment if it encloses us. */
if (f > rg->from)
f = rg->from;
@@ -439,11 +403,6 @@ static long region_chg(struct resv_map *resv, long f, long t)
}
out:
- spin_unlock(&resv->lock);
- /* We already know we raced and no longer need the new region */
- kfree(nrg);
- return chg;
-out_nrg:
spin_unlock(&resv->lock);
return chg;
}
--
2.23.0.351.gc4317032e6-goog
^ permalink raw reply [flat|nested] 3+ messages in thread* [PATCH 2/2] hugetlb: remove duplicated code
2019-09-19 20:04 [PATCH 0/2] Cleanups to hugetlb code Mina Almasry
2019-09-19 20:04 ` [PATCH 1/2] hugetlb: region_chg provides only cache entry Mina Almasry
@ 2019-09-19 20:04 ` Mina Almasry
1 sibling, 0 replies; 3+ messages in thread
From: Mina Almasry @ 2019-09-19 20:04 UTC (permalink / raw)
To: mike.kravetz
Cc: almasrymina, rientjes, shakeelb, gthelen, akpm, linux-kernel, linux-mm
Remove duplicated code between region_chg and region_add, and refactor it into
a common function, add_reservation_in_range. This is mostly done because
there is a follow up change in another series that disables region
coalescing in region_add, and I want to make that change in one place
only. It should improve maintainability anyway on its own.
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
mm/hugetlb.c | 119 ++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 62 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a14f6047fc7e..052a2532528a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -244,6 +244,60 @@ struct file_region {
long to;
};
+/* Must be called with resv->lock held. Calling this with count_only == true
+ * will count the number of pages to be added but will not modify the linked
+ * list.
+ */
+static long add_reservation_in_range(struct resv_map *resv, long f, long t,
+ bool count_only)
+{
+ long chg = 0;
+ struct list_head *head = &resv->regions;
+ struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+
+ /* Locate the region we are before or in. */
+ list_for_each_entry (rg, head, link)
+ if (f <= rg->to)
+ break;
+
+ /* Round our left edge to the current segment if it encloses us. */
+ if (f > rg->from)
+ f = rg->from;
+
+ chg = t - f;
+
+ /* Check for and consume any regions we now overlap with. */
+ nrg = rg;
+ list_for_each_entry_safe (rg, trg, rg->link.prev, link) {
+ if (&rg->link == head)
+ break;
+ if (rg->from > t)
+ break;
+
+ /* We overlap with this area, if it extends further than
+ * us then we must extend ourselves. Account for its
+ * existing reservation.
+ */
+ if (rg->to > t) {
+ chg += rg->to - t;
+ t = rg->to;
+ }
+ chg -= rg->to - rg->from;
+
+ if (!count_only && rg != nrg) {
+ list_del(&rg->link);
+ kfree(rg);
+ }
+ }
+
+ if (!count_only) {
+ nrg->from = f;
+ nrg->to = t;
+ }
+
+ return chg;
+}
+
/*
* Add the huge page range represented by [f, t) to the reserve
* map. Existing regions will be expanded to accommodate the specified
@@ -257,7 +311,7 @@ struct file_region {
static long region_add(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
- struct file_region *rg, *nrg, *trg;
+ struct file_region *rg, *nrg;
long add = 0;
spin_lock(&resv->lock);
@@ -287,38 +341,7 @@ static long region_add(struct resv_map *resv, long f, long t)
goto out_locked;
}
- /* Round our left edge to the current segment if it encloses us. */
- if (f > rg->from)
- f = rg->from;
-
- /* Check for and consume any regions we now overlap with. */
- nrg = rg;
- list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
- if (&rg->link == head)
- break;
- if (rg->from > t)
- break;
-
- /* If this area reaches higher then extend our area to
- * include it completely. If this is not the first area
- * which we intend to reuse, free it. */
- if (rg->to > t)
- t = rg->to;
- if (rg != nrg) {
- /* Decrement return value by the deleted range.
- * Another range will span this area so that by
- * end of routine add will be >= zero
- */
- add -= (rg->to - rg->from);
- list_del(&rg->link);
- kfree(rg);
- }
- }
-
- add += (nrg->from - f); /* Added to beginning of region */
- nrg->from = f;
- add += t - nrg->to; /* Added to end of region */
- nrg->to = t;
+ add = add_reservation_in_range(resv, f, t, false);
out_locked:
resv->adds_in_progress--;
@@ -345,8 +368,6 @@ static long region_add(struct resv_map *resv, long f, long t)
*/
static long region_chg(struct resv_map *resv, long f, long t)
{
- struct list_head *head = &resv->regions;
- struct file_region *rg;
long chg = 0;
spin_lock(&resv->lock);
@@ -375,34 +396,8 @@ static long region_chg(struct resv_map *resv, long f, long t)
goto retry_locked;
}
- /* Locate the region we are before or in. */
- list_for_each_entry(rg, head, link)
- if (f <= rg->to)
- break;
-
- /* Round our left edge to the current segment if it encloses us. */
- if (f > rg->from)
- f = rg->from;
- chg = t - f;
-
- /* Check for and consume any regions we now overlap with. */
- list_for_each_entry(rg, rg->link.prev, link) {
- if (&rg->link == head)
- break;
- if (rg->from > t)
- goto out;
-
- /* We overlap with this area, if it extends further than
- * us then we must extend ourselves. Account for its
- * existing reservation. */
- if (rg->to > t) {
- chg += rg->to - t;
- t = rg->to;
- }
- chg -= rg->to - rg->from;
- }
+ chg = add_reservation_in_range(resv, f, t, true);
-out:
spin_unlock(&resv->lock);
return chg;
}
--
2.23.0.351.gc4317032e6-goog
^ permalink raw reply [flat|nested] 3+ messages in thread