bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH gnumach v3] Implement per task virtual memory limit


From: dnietoc
Subject: [PATCH gnumach v3] Implement per task virtual memory limit
Date: Mon, 30 Dec 2024 23:03:20 +0000

From: Diego Nieto Cid <dnietoc@gmail.com>

  * include/mach/gnumach.defs: (vm_set_size_limit) new routine
    (vm_get_size_limit) likewise
  * kern/task.c: (task_create_kernel) if parent_task is not null copy virtual 
memory limit
  * tests/test-vm.c: (test_vm_limit) add test for the new routines
  * vm/vm_kern.c: (projected_buffer_allocate) increase size_none when 
protection is VM_PROT_NONE
    (projected_buffer_map) likewise
  * vm/vm_map.h: (struct vm_map) new fields size_none, size_cur_limit and 
size_max_limit
  * vm/vm_map.c: (vm_map_setup) initialize new fields
    (vm_map_enforce_limit) new function
    (vm_map_copy_limits) new function
    (vm_map_find_entry) call limit enforcer function
    (vm_map_enter) likewise
    (vm_map_copyout) likewise
    (vm_map_copyout_page_list) likewise
    (vm_map_fork) copy parent limit to the new map and compute and set 
size_none of the new map
  * vm/vm_user.c: (vm_set_size_limit) new function
    (vm_get_size_limit) likewise
---
 include/mach/gnumach.defs |  33 +++++++++++
 kern/task.c               |   5 ++
 tests/test-vm.c           |  85 ++++++++++++++++++++++++++++
 vm/vm_kern.c              |  26 +++++++++
 vm/vm_map.c               | 113 +++++++++++++++++++++++++++++++++++++-
 vm/vm_map.h               |  13 +++++
 vm/vm_user.c              |  69 +++++++++++++++++++++++
 7 files changed, 342 insertions(+), 2 deletions(-)

diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
index f13e866b..a07a1011 100644
--- a/include/mach/gnumach.defs
+++ b/include/mach/gnumach.defs
@@ -223,3 +223,36 @@ simpleroutine thread_set_name(
 routine thread_get_name(
         thread : thread_t;
         out name : kernel_debug_name_t);
+
+/*
+ *     Set a task virtual memory limit parameters
+ *
+ *     HOST_PORT must be the privileged host control port
+ *     if the caller desires to increase the current max limit.
+ *
+ *     On the other hand, if the max limit is being decreased, the
+ *     unprivileged host control port (as returned by mach_host_self())
+ *     can be provided.
+ *
+ *     Returns:
+ *       - KERN_SUCCESS
+ *       - KERN_INVALID_TASK
+ *       - KERN_INVALID_HOST
+ *       - KERN_INVALID_ARGUMENT
+ *           * when current_limit > max_limit
+ *           * attempt to increase max limit without providing
+ *             the privileged host control port.
+ */
+routine vm_set_size_limit(
+       host_port     : mach_port_t;
+       map           : vm_task_t;
+       current_limit : vm_size_t;
+       max_limit     : vm_size_t);
+
+/*
+ *     Get a task virtual memory limit parameters
+ */
+routine vm_get_size_limit(
+               map           : vm_task_t;
+       out     current_limit : vm_size_t;
+       out     max_limit     : vm_size_t);
diff --git a/kern/task.c b/kern/task.c
index bd57ca2a..e78e856f 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -126,6 +126,11 @@ task_create_kernel(
                                        trunc_page(VM_MAX_USER_ADDRESS));
                        if (new_task->map == VM_MAP_NULL)
                                pmap_destroy(new_pmap);
+                       else if (parent_task != TASK_NULL) {
+                               vm_map_lock_read(parent_task->map);
+                               vm_map_copy_limits(new_task->map, 
parent_task->map);
+                               vm_map_unlock_read(parent_task->map);
+                       }
                }
        }
        if (new_task->map == VM_MAP_NULL) {
diff --git a/tests/test-vm.c b/tests/test-vm.c
index 4ece792e..8e4ad884 100644
--- a/tests/test-vm.c
+++ b/tests/test-vm.c
@@ -75,11 +75,96 @@ static void test_wire()
   // TODO check that all memory is actually wired or unwired
 }
 
+void test_vm_limit()
+{
+  kern_return_t err;
+  vm_address_t mem, mem2, mem3;
+  const size_t M_128K = 128l * 1024l;
+  const size_t M_128M = 128l * 1024l * 1024l;
+  const size_t M_512M = 512l * 1024l * 1024l;
+  vm_size_t cur;
+  vm_size_t max;
+
+  /* set VM memory limitations */
+  err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M);
+  ASSERT_RET(err, "cannot set VM limits");
+
+  /* check limits are actually saved */
+  err = vm_get_size_limit(mach_task_self(), &cur, &max);
+  ASSERT_RET(err, "getting the VM limit failed");
+  ASSERT(cur == M_128M, "cur limit was not expected");
+  ASSERT(max == M_512M, "max limit was not expected");
+
+  /* check we can no longer increase the max limit */
+  err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M * 
2);
+  ASSERT(err == KERN_INVALID_ARGUMENT, "raising VM max limit shall fail with 
KERN_INVALID_ARGUMENT");
+
+  /* alloc some memory below the limit */
+  err = vm_allocate(mach_task_self(), &mem, M_128K, TRUE);
+  ASSERT_RET(err, "allocating memory below the limit must succeed");
+  err = vm_deallocate(mach_task_self(), mem, M_128K);
+  ASSERT_RET(err, "deallocation failed");
+
+  /* alloc a bigger chunk to make it hit the limit */
+  err = vm_allocate(mach_task_self(), &mem, (M_128M * 2), TRUE);
+  ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE");
+
+  /* check that privileged tasks can increase the hard limit */
+  err = vm_set_size_limit(host_priv(), mach_task_self(), (M_512M + M_128M), 
M_512M * 2);
+  ASSERT_RET(err, "privileged tasks shall be allowed to increase the max 
limit");
+
+  /* check limits are actually saved */
+  err = vm_get_size_limit(mach_task_self(), &cur, &max);
+  ASSERT_RET(err, "getting the VM limit failed");
+  ASSERT(cur == (M_512M + M_128M), "cur limit was not expected");
+  ASSERT(max == (M_512M * 2), "max limit was not expected");
+
+  /* allocating the bigger chunk with the new limit shall succeed */
+  err = vm_allocate(mach_task_self(), &mem, (M_128M * 2), TRUE);
+  ASSERT_RET(err, "allocation should now succedd");
+  err = vm_deallocate(mach_task_self(), mem, (M_128M * 2));
+  ASSERT_RET(err, "deallocation failed");
+
+  /* check that the limit does not apply to VM_PROT_NONE mappings */
+  err = vm_map(mach_task_self(),
+    &mem, (M_512M * 3), 0, 0, MACH_PORT_NULL, 0, 1,
+    VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_COPY);
+  ASSERT_RET(err, "allocation of VM_PROT_NONE areas should not be subject to 
the limit");
+
+  /* check that the VM_PROT_NONE allocation does not reduce the limit */
+  err = vm_allocate(mach_task_self(), &mem2, M_512M, TRUE);
+  ASSERT_RET(err, "allocation should succedd in spite of the VM_PROT_NONE 
map");
+  err = vm_deallocate(mach_task_self(), mem2, M_512M);
+  ASSERT_RET(err, "deallocation failed");
+  err = vm_deallocate(mach_task_self(), mem, (M_512M * 3));
+  ASSERT_RET(err, "deallocation failed");
+
+  /* check that allocations demoted to VM_PROT_NONE no longer counts towards 
the VM limit */
+  err = vm_allocate(mach_task_self(), &mem, M_512M, TRUE);
+  ASSERT_RET(err, "allocating memory below the limit must succeed");
+  err = vm_allocate(mach_task_self(), &mem2, M_128M, TRUE);
+  /* the current limit is M_512M + M_128M, this allocation should hit the 
limit */
+  ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE");
+  err = vm_protect(mach_task_self(), mem, M_512M, TRUE, VM_PROT_NONE);
+  ASSERT_RET(err, "could not drop protection to VM_PROT_NONE");
+  /* after dropping the protection there should be enough space again */
+  err = vm_allocate(mach_task_self(), &mem2, M_128M, TRUE);
+  ASSERT_RET(err, "allocating memory below the limit must succeed");
+  /* this allocation purpose is showing the failure message to check size_none 
value */
+  err = vm_allocate(mach_task_self(), &mem3, M_512M, TRUE);
+  ASSERT(err == KERN_NO_SPACE, "going above the limit should still fail");
+  err = vm_deallocate(mach_task_self(), mem2, M_128M);
+  ASSERT_RET(err, "deallocation failed");
+  err = vm_deallocate(mach_task_self(), mem, M_512M);
+  ASSERT_RET(err, "deallocation failed");
+}
+
 int main(int argc, char *argv[], int envc, char *envp[])
 {
   printf("VM_MIN_ADDRESS=0x%p\n", VM_MIN_ADDRESS);
   printf("VM_MAX_ADDRESS=0x%p\n", VM_MAX_ADDRESS);
   test_wire();
   test_memobj();
+  test_vm_limit();
   return 0;
 }
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index 51223d98..6b03d014 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -144,6 +144,19 @@ projected_buffer_allocate(
        u_entry->protection = protection;
        u_entry->max_protection = protection;
        u_entry->inheritance = inheritance;
+
+       /*
+        *      vm_map_find_entry allocated an entry of size `size`
+        *      without knowing its protection.
+        *
+        *      For this to work, the VM limit must not be reached
+        *      in the allocation, even though we are going to free
+        *      the chunk from being accounted for the current map
+        *      limit by increasing map->size_none here.
+        */
+       if (protection == VM_PROT_NONE)
+               map->size_none += size;
+
        vm_map_unlock(map);
                *user_p = addr;
 
@@ -226,6 +239,19 @@ projected_buffer_map(
        u_entry->max_protection = protection;
        u_entry->inheritance = inheritance;
        u_entry->wired_count = k_entry->wired_count;
+
+       /*
+        *      vm_map_find_entry allocated an entry of size `size`
+        *      without knowing its protection.
+        *
+        *      For this to work, the VM limit must not be reached
+        *      in the allocation, even though we are going to free
+        *      the chunk from being accounted for the current map
+        *      limit by increasing map->size_none here.
+        */
+       if (protection == VM_PROT_NONE)
+               map->size_none += size;
+
        vm_map_unlock(map);
                *user_p = user_addr;
 
diff --git a/vm/vm_map.c b/vm/vm_map.c
index 03d22ea1..9cf2932d 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -189,6 +189,7 @@ void vm_map_setup(
 
        map->size = 0;
        map->size_wired = 0;
+       map->size_none = 0;
        map->ref_count = 1;
        map->pmap = pmap;
        map->min_offset = min;
@@ -198,6 +199,14 @@ void vm_map_setup(
        map->first_free = vm_map_to_entry(map);
        map->hint = vm_map_to_entry(map);
        map->name = NULL;
+       /* TODO add to default limit the swap size */
+       if (pmap != kernel_pmap) {
+               map->size_cur_limit = vm_page_mem_size() / 2;
+               map->size_max_limit = vm_page_mem_size() / 2;
+       } else {
+               map->size_cur_limit = (~0UL);
+               map->size_max_limit = (~0UL);
+       }
        vm_map_lock_init(map);
        simple_lock_init(&map->ref_lock);
        simple_lock_init(&map->hint_lock);
@@ -268,6 +277,49 @@ void vm_map_unlock(struct vm_map *map)
        lock_write_done(&map->lock);
 }
 
+/*
+ *     Enforces the VM limit of a target map.
+ */
+static kern_return_t
+vm_map_enforce_limit(
+       vm_map_t map,
+       vm_size_t size,
+       const char *fn_name)
+{
+       /* Limit is ignored for the kernel map */
+       if (vm_map_pmap(map) == kernel_pmap) {
+               return KERN_SUCCESS;
+       }
+
+       /* Avoid taking into account the total VM_PROT_NONE virtual memory */
+       vm_size_t usable_size = map->size - map->size_none;
+       vm_size_t new_size = size + usable_size;
+       /* Check for integer overflow */
+       if (new_size < size) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if (new_size > map->size_cur_limit) {
+               task_t task = current_task();
+               printf("[%s] [task %s] map size: %lu, none: %lu, requested: 
%lu, limit: %lu\n",
+                       fn_name, task->name, map->size, map->size_none, size, 
map->size_cur_limit);
+               return KERN_NO_SPACE;
+       }
+
+       return KERN_SUCCESS;
+}
+
+/*
+ *    Copies the limits from source to destination map.
+ *    Called by task_create_kernel with the src_map locked.
+ */
+void
+vm_map_copy_limits(vm_map_t dst_map, vm_map_t src_map)
+{
+       dst_map->size_cur_limit = src_map->size_cur_limit;
+       dst_map->size_max_limit = src_map->size_max_limit;
+}
+
 /*
  *     vm_map_entry_create:    [ internal use only ]
  *
@@ -789,6 +841,7 @@ kern_return_t vm_map_find_entry(
        vm_map_entry_t  entry, new_entry;
        vm_offset_t     start;
        vm_offset_t     end;
+       kern_return_t   err;
 
        entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start);
 
@@ -796,6 +849,9 @@ kern_return_t vm_map_find_entry(
                return KERN_NO_SPACE;
        }
 
+       if ((err = vm_map_enforce_limit(map, size, "vm_map_find_entry")) != 
KERN_SUCCESS)
+               return err;
+
        end = start + size;
 
        /*
@@ -1037,6 +1093,16 @@ kern_return_t vm_map_enter(
                        RETURN(KERN_NO_SPACE);
        }
 
+       /*
+        *      If the allocation has protection equal to VM_PROT_NONE,
+        *      don't check for limits as the map's size_none field is
+        *      not yet incremented.
+        */
+       if (max_protection != VM_PROT_NONE) {
+               if ((result = vm_map_enforce_limit(map, size, "vm_map_enter")) 
!= KERN_SUCCESS)
+                       RETURN(result);
+       }
+
        /*
         *      At this point,
         *              "start" and "end" should define the endpoints of the
@@ -1077,6 +1143,8 @@ kern_return_t vm_map_enter(
                         *      new range.
                         */
                        map->size += size;
+                       if (max_protection == VM_PROT_NONE)
+                               map->size_none += size;
                        entry->vme_end = end;
                        vm_map_gap_update(&map->hdr, entry);
                        /*
@@ -1113,6 +1181,8 @@ kern_return_t vm_map_enter(
                         *      new range.
                         */
                        map->size += size;
+                       if (max_protection == VM_PROT_NONE)
+                               map->size_none += size;
                        next_entry->vme_start = start;
                        vm_map_gap_update(&map->hdr, entry);
                        /*
@@ -1160,6 +1230,8 @@ kern_return_t vm_map_enter(
 
        vm_map_entry_link(map, entry, new_entry);
        map->size += size;
+       if (max_protection == VM_PROT_NONE)
+               map->size_none += size;
 
        /*
         *      Update the free space hint and the lookup hint
@@ -1679,11 +1751,17 @@ kern_return_t vm_map_protect(
                vm_map_clip_end(map, current, end);
 
                old_prot = current->protection;
-               if (set_max)
+               if (set_max) {
+                       if (current->max_protection != new_prot) {
+                               if (new_prot == VM_PROT_NONE)
+                                       map->size_none += current->vme_end - 
current->vme_start;
+                               if (current->max_protection == VM_PROT_NONE)
+                                       map->size_none -= current->vme_end - 
current->vme_start;
+                       }
                        current->protection =
                                (current->max_protection = new_prot) &
                                        old_prot;
-               else
+               } else
                        current->protection = new_prot;
 
                /*
@@ -2042,6 +2120,8 @@ void vm_map_entry_delete(
 
        vm_map_entry_unlink(map, entry);
        map->size -= size;
+       if (entry->max_protection == VM_PROT_NONE)
+               map->size_none -= size;
 
        vm_map_entry_dispose(map, entry);
 }
@@ -2882,6 +2962,11 @@ kern_return_t vm_map_copyout(
                return KERN_NO_SPACE;
        }
 
+       if ((kr = vm_map_enforce_limit(dst_map, size, "vm_map_copyout")) != 
KERN_SUCCESS) {
+               vm_map_unlock(dst_map);
+               return kr;
+       }
+
        /*
         *      Adjust the addresses in the copy chain, and
         *      reset the region attributes.
@@ -2985,6 +3070,10 @@ kern_return_t vm_map_copyout(
        SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
 
        dst_map->size += size;
+       /*
+        *      dst_map->size_none need no updating because the protection
+        *      of all entries is VM_PROT_DEFAULT / VM_PROT_ALL
+        */
 
        /*
         *      Link in the copy
@@ -3062,6 +3151,11 @@ kern_return_t vm_map_copyout_page_list(
                return KERN_NO_SPACE;
        }
 
+       if ((result = vm_map_enforce_limit(dst_map, size, 
"vm_map_copyout_page_lists")) != KERN_SUCCESS) {
+               vm_map_unlock(dst_map);
+               return result;
+       }
+
        end = start + size;
 
        must_wire = dst_map->wiring_required;
@@ -3206,6 +3300,10 @@ create_object:
        }
        SAVE_HINT(dst_map, entry);
        dst_map->size += size;
+       /*
+        *      dst_map->size_none need no updating because the protection
+        *      of all entries is VM_PROT_DEFAULT / VM_PROT_ALL
+        */
 
        /*
         *      Link in the entry
@@ -4390,6 +4488,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
        vm_map_entry_t  new_entry;
        pmap_t          new_pmap = pmap_create((vm_size_t) 0);
        vm_size_t       new_size = 0;
+       vm_size_t       new_size_none = 0;
        vm_size_t       entry_size;
        vm_object_t     object;
 
@@ -4524,6 +4623,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
                                old_entry->vme_start);
 
                        new_size += entry_size;
+                       if (old_entry->max_protection == VM_PROT_NONE)
+                               new_size_none += entry_size;
                        break;
 
                case VM_INHERIT_COPY:
@@ -4572,6 +4673,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
 
 
                                        new_size += entry_size;
+                                       if (old_entry->max_protection == 
VM_PROT_NONE)
+                                               new_size_none += entry_size;
                                        break;
                                }
 
@@ -4609,6 +4712,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
 
                        vm_map_copy_insert(new_map, last, copy);
                        new_size += entry_size;
+                       if (old_entry->max_protection == VM_PROT_NONE)
+                               new_size_none += entry_size;
 
                        /*
                         *      Pick up the traversal at the end of
@@ -4630,6 +4735,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
        }
 
        new_map->size = new_size;
+       new_map->size_none = new_size_none;
+       vm_map_copy_limits(new_map, old_map);
        vm_map_unlock(old_map);
 
        return(new_map);
@@ -5163,6 +5270,8 @@ void vm_map_print(db_expr_t addr, boolean_t have_addr, 
db_expr_t count, const ch
         printf("ref=%d,nentries=%d\n", map->ref_count, map->hdr.nentries);
         printf("size=%lu,resident:%lu,wired=%lu\n", map->size,
                pmap_resident_count(map->pmap) * PAGE_SIZE, map->size_wired);
+        printf("max_limit=%lu,cur_limit=%lu,size_none=%lu\n",
+               map->size_max_limit, map->size_cur_limit, map->size_none);
         printf("version=%d\n", map->timestamp);
        indent += 1;
        for (entry = vm_map_first_entry(map);
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 900f1218..96f7124f 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -184,6 +184,7 @@ struct vm_map {
        pmap_t                  pmap;           /* Physical map */
        vm_size_t               size;           /* virtual size */
        vm_size_t               size_wired;     /* wired size */
+       vm_size_t               size_none;      /* none protection size */
        int                     ref_count;      /* Reference count */
        decl_simple_lock_data(, ref_lock)       /* Lock for ref_count field */
        vm_map_entry_t          hint;           /* hint for quick lookups */
@@ -198,6 +199,10 @@ struct vm_map {
        unsigned int            timestamp;      /* Version number */
 
        const char              *name;          /* Associated name */
+
+       vm_size_t               size_cur_limit; /* current limit on virtual 
memory size */
+       vm_size_t               size_max_limit; /* maximum size an unprivileged 
user can
+                                                  change current limit to */
 };
 
 #define vm_map_to_entry(map)   ((struct vm_map_entry *) &(map)->hdr.links)
@@ -582,4 +587,12 @@ void _vm_map_clip_end(
        vm_offset_t             end,
        boolean_t               link_gap);
 
+/*
+ *      This function is called to inherit the virtual memory limits
+ *      from one vm_map_t to another.
+ */
+void vm_map_copy_limits(
+       vm_map_t dst,
+       vm_map_t src);
+
 #endif /* _VM_VM_MAP_H_ */
diff --git a/vm/vm_user.c b/vm/vm_user.c
index 62aedad3..8089f3cf 100644
--- a/vm/vm_user.c
+++ b/vm/vm_user.c
@@ -804,3 +804,72 @@ kern_return_t vm_pages_phys(
 
        return KERN_SUCCESS;
 }
+
+/*
+ *     vm_set_size_limit
+ *
+ *     Sets the current/maximum virtual adress space limits
+ *     of the `target_task`.
+ *
+ *     The host privileged port must be provided to increase
+ *     the max limit.
+ */
+kern_return_t
+vm_set_size_limit(
+       const ipc_port_t host_port,
+       vm_map_t         map,
+       vm_size_t        current_limit,
+       vm_size_t        max_limit)
+{
+       ipc_kobject_type_t ikot_host = IKOT_NONE;
+
+       if (current_limit > max_limit)
+               return KERN_INVALID_ARGUMENT;
+       if (map == VM_MAP_NULL)
+               return KERN_INVALID_TASK;
+
+       if (!IP_VALID(host_port))
+               return KERN_INVALID_HOST;
+       ip_lock(host_port);
+       if (ip_active(host_port))
+               ikot_host = ip_kotype(host_port);
+       ip_unlock(host_port);
+
+       if (ikot_host != IKOT_HOST && ikot_host != IKOT_HOST_PRIV)
+               return KERN_INVALID_HOST;
+
+       vm_map_lock(map);
+       if (max_limit > map->size_max_limit && ikot_host != IKOT_HOST_PRIV) {
+               vm_map_unlock(map);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       map->size_cur_limit = current_limit;
+       map->size_max_limit = max_limit;
+       vm_map_unlock(map);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ *     vm_get_size_limit
+ *
+ *     Gets the current/maximum virtual adress space limits
+ *     of the provided `map`.
+ */
+kern_return_t
+vm_get_size_limit(
+       vm_map_t        map,
+       vm_size_t       *current_limit,
+       vm_size_t       *max_limit)
+{
+       if (map == VM_MAP_NULL)
+               return KERN_INVALID_TASK;
+
+       vm_map_lock_read(map);
+       *current_limit = map->size_cur_limit;
+       *max_limit = map->size_max_limit;
+       vm_map_unlock_read(map);
+
+       return KERN_SUCCESS;
+}
-- 
2.45.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]