[PATCH xen.git] Add hugepage support to balloon driver

View: New views
1 Messages — Rating Filter:   Alert me  

[PATCH xen.git] Add hugepage support to balloon driver

by Dave McCracken-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

This patch adds hugepage support to the balloon driver.  It is activated
by specifying "balloon_hugepages" on the kernel command line.  Once activated,
the balloon driver will work entirely in hugepage sized chunks.

If, when returning pages, it finds a hugepage that is not contiguous
at the machine level, it will return each underlying page separately.
When this page is later repopulated it will be contiguous.

Signed-off-by: Dave McCracken <dave.mccracken@...>

--------

 balloon.c |  171 +++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 125 insertions(+), 46 deletions(-)

--- 2.6-xen/drivers/xen/balloon.c 2009-10-29 17:48:30.000000000 -0500
+++ 2.6-xen-balloon/drivers/xen/balloon.c 2009-10-29 19:14:33.000000000 -0500
@@ -59,7 +59,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
@@ -85,6 +85,14 @@ static int register_balloon(struct sys_d
 
 static struct balloon_stats balloon_stats;
 
+/*
+ * Work in pages of this order.  Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+static int balloon_order;
+static unsigned long balloon_npages;
+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
@@ -113,10 +121,41 @@ static struct timer_list balloon_timer;
 static void scrub_page(struct page *page)
 {
 #ifdef CONFIG_XEN_SCRUB_PAGES
- clear_highpage(page);
+ int i;
+
+ for (i = 0; i < balloon_npages; i++)
+ clear_highpage(page++);
 #endif
 }
 
+static void free_discontig_frame(void)
+{
+ int rc;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .domid        = DOMID_SELF,
+ .nr_extents   = balloon_npages,
+ .extent_order = 0
+ };
+
+ set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
+ rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(rc != balloon_npages);
+}
+
+static unsigned long shrink_frame(unsigned long nr_pages)
+{
+ unsigned long i, j;
+
+ for (i = 0, j = 0; i < nr_pages; i++, j++) {
+ if (frame_list[i] == 0)
+ j++;
+ if (i != j)
+ frame_list[i] = frame_list[j];
+ }
+ return i;
+}
+
 /* balloon_append: add the given page to the balloon. */
 static void balloon_append(struct page *page)
 {
@@ -190,12 +229,11 @@ static unsigned long current_target(void
 
 static int increase_reservation(unsigned long nr_pages)
 {
- unsigned long  pfn, i, flags;
+ unsigned long  pfn, mfn, i, j, flags;
  struct page   *page;
  long           rc;
  struct xen_memory_reservation reservation = {
  .address_bits = 0,
- .extent_order = 0,
  .domid        = DOMID_SELF
  };
 
@@ -207,12 +245,14 @@ static int increase_reservation(unsigned
  page = balloon_first_page();
  for (i = 0; i < nr_pages; i++) {
  BUG_ON(page == NULL);
- frame_list[i] = page_to_pfn(page);;
+ frame_list[i] = page_to_pfn(page);
  page = balloon_next_page(page);
  }
 
  set_xen_guest_handle(reservation.extent_start, frame_list);
  reservation.nr_extents = nr_pages;
+ reservation.extent_order = balloon_order;
+
  rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
  if (rc < 0)
  goto out;
@@ -222,19 +262,22 @@ static int increase_reservation(unsigned
  BUG_ON(page == NULL);
 
  pfn = page_to_pfn(page);
+ mfn = frame_list[i];
  BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
        phys_to_machine_mapping_valid(pfn));
 
- set_phys_to_machine(pfn, frame_list[i]);
+ for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+ set_phys_to_machine(pfn, mfn);
 
- /* Link back into the page tables if not highmem. */
- if (pfn < max_low_pfn) {
- int ret;
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(frame_list[i], PAGE_KERNEL),
- 0);
- BUG_ON(ret);
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(mfn, PAGE_KERNEL),
+ 0);
+ BUG_ON(ret);
+ }
  }
 
  /* Relinquish the page back to the allocator. */
@@ -253,13 +296,13 @@ static int increase_reservation(unsigned
 
 static int decrease_reservation(unsigned long nr_pages)
 {
- unsigned long  pfn, i, flags;
+ unsigned long  pfn, lpfn, mfn, i, j, flags;
  struct page   *page;
  int            need_sleep = 0;
- int ret;
+ int discontig, discontig_free;
+ int ret;
  struct xen_memory_reservation reservation = {
  .address_bits = 0,
- .extent_order = 0,
  .domid        = DOMID_SELF
  };
 
@@ -267,7 +310,7 @@ static int decrease_reservation(unsigned
  nr_pages = ARRAY_SIZE(frame_list);
 
  for (i = 0; i < nr_pages; i++) {
- if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+ if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
  nr_pages = i;
  need_sleep = 1;
  break;
@@ -277,14 +320,6 @@ static int decrease_reservation(unsigned
  frame_list[i] = pfn_to_mfn(pfn);
 
  scrub_page(page);
-
- if (!PageHighMem(page)) {
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
-                }
-
  }
 
  /* Ensure that ballooned highmem pages don't have kmaps. */
@@ -295,18 +330,39 @@ static int decrease_reservation(unsigned
 
  /* No more mappings: invalidate P2M and add to balloon. */
  for (i = 0; i < nr_pages; i++) {
- pfn = mfn_to_pfn(frame_list[i]);
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ mfn = frame_list[i];
+ lpfn = pfn = mfn_to_pfn(mfn);
  balloon_append(pfn_to_page(pfn));
+ discontig_free = 0;
+ for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
+ if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn)
+ discontig_free = 1;
+
+ set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
+ if (!PageHighMem(page)) {
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(lpfn << PAGE_SHIFT),
+ __pte_ma(0), 0);
+ BUG_ON(ret);
+ }
+ }
+ if (discontig_free) {
+ free_discontig_frame();
+ frame_list[i] = 0;
+ discontig = 1;
+ }
  }
+ balloon_stats.current_pages -= nr_pages;
+
+ if (discontig)
+ nr_pages = shrink_frame(nr_pages);
 
  set_xen_guest_handle(reservation.extent_start, frame_list);
  reservation.nr_extents   = nr_pages;
+ reservation.extent_order = balloon_order;
  ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
  BUG_ON(ret != nr_pages);
 
- balloon_stats.current_pages -= nr_pages;
-
  spin_unlock_irqrestore(&xen_reservation_lock, flags);
 
  return need_sleep;
@@ -374,7 +430,7 @@ static void watch_target(struct xenbus_w
  /* The given memory/target value is in KiB, so it needs converting to
  * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
  */
- balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+ balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + balloon_order));
 }
 
 static int balloon_init_watcher(struct notifier_block *notifier,
@@ -399,9 +455,12 @@ static int __init balloon_init(void)
  if (!xen_pv_domain())
  return -ENODEV;
 
- pr_info("xen_balloon: Initialising balloon driver.\n");
+ pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
+ balloon_order);
+
+ balloon_npages = 1 << balloon_order;
 
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
  balloon_stats.target_pages  = balloon_stats.current_pages;
  balloon_stats.balloon_low   = 0;
  balloon_stats.balloon_high  = 0;
@@ -414,7 +473,7 @@ static int __init balloon_init(void)
  register_balloon(&balloon_sysdev);
 
  /* Initialise the balloon with excess memory space. */
- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += balloon_npages) {
  if (page_is_ram(pfn)) {
  struct page *page = pfn_to_page(pfn);
  if (!PageReserved(page))
@@ -464,16 +523,20 @@ static int dealloc_pte_fn(pte_t *pte, st
 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
  struct page *page, **pagevec;
- int i, ret;
+ int npages;
+ int i, j, ret;
+
+ /* Round up to next number of balloon_order pages */
+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
 
- pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+ pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL);
  if (pagevec == NULL)
  return NULL;
 
  for (i = 0; i < nr_pages; i++) {
  void *v;
 
- page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
+ page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order);
  if (page == NULL)
  goto err;
 
@@ -484,8 +547,8 @@ struct page **alloc_empty_pages_and_page
  v = page_address(page);
 
  ret = apply_to_page_range(&init_mm, (unsigned long)v,
-  PAGE_SIZE, dealloc_pte_fn,
-  NULL);
+  PAGE_SIZE << balloon_order,
+  dealloc_pte_fn, NULL);
 
  if (ret != 0) {
  mutex_unlock(&balloon_mutex);
@@ -493,8 +556,10 @@ struct page **alloc_empty_pages_and_page
  __free_page(page);
  goto err;
  }
+ for (j = 0; j < balloon_npages; j++)
+ pagevec[(i<<balloon_order)+j] = page++;
 
- totalram_pages = --balloon_stats.current_pages;
+ totalram_pages = balloon_stats.current_pages -= balloon_npages;
 
  mutex_unlock(&balloon_mutex);
  }
@@ -507,7 +572,7 @@ struct page **alloc_empty_pages_and_page
  err:
  mutex_lock(&balloon_mutex);
  while (--i >= 0)
- balloon_append(pagevec[i]);
+ balloon_append(pagevec[i << balloon_order]);
  mutex_unlock(&balloon_mutex);
  kfree(pagevec);
  pagevec = NULL;
@@ -517,15 +582,21 @@ EXPORT_SYMBOL_GPL(alloc_empty_pages_and_
 
 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
 {
+ struct page *page;
  int i;
+ int npages;
 
  if (pagevec == NULL)
  return;
 
+ /* Round up to next number of balloon_order pages */
+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
+
  mutex_lock(&balloon_mutex);
  for (i = 0; i < nr_pages; i++) {
- BUG_ON(page_count(pagevec[i]) != 1);
- balloon_append(pagevec[i]);
+ page = pagevec[i << balloon_order];
+ BUG_ON(page_count(page) != 1);
+ balloon_append(page);
  }
  mutex_unlock(&balloon_mutex);
 
@@ -535,6 +606,14 @@ void free_empty_pages_and_pagevec(struct
 }
 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
 
+static int __init balloon_parse_huge(char *s)
+{
+ balloon_order = 9;
+ return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
 #define BALLOON_SHOW(name, format, args...) \
  static ssize_t show_##name(struct sys_device *dev, \
    struct sysdev_attribute *attr, \
@@ -568,7 +647,7 @@ static ssize_t store_target_kb(struct sy
 
  target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 
- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
  return count;
 }
@@ -582,7 +661,7 @@ static ssize_t show_target(struct sys_de
 {
  return sprintf(buf, "%llu\n",
        (unsigned long long)balloon_stats.target_pages
-       << PAGE_SHIFT);
+       << (PAGE_SHIFT + balloon_order));
 }
 
 static ssize_t store_target(struct sys_device *dev,
@@ -598,7 +677,7 @@ static ssize_t store_target(struct sys_d
 
  target_bytes = memparse(buf, &endchar);
 
- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
  return count;
 }

_______________________________________________
Xen-devel mailing list
Xen-devel@...
http://lists.xensource.com/xen-devel