[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH gnumach v3] Implement per task virtual memory limit
From: |
dnietoc |
Subject: |
[PATCH gnumach v3] Implement per task virtual memory limit |
Date: |
Mon, 30 Dec 2024 23:03:20 +0000 |
From: Diego Nieto Cid <dnietoc@gmail.com>
* include/mach/gnumach.defs: (vm_set_size_limit) new routine
(vm_get_size_limit) likewise
* kern/task.c: (task_create_kernel) if parent_task is not null copy virtual
memory limit
* tests/test-vm.c: (test_vm_limit) add test for the new routines
* vm/vm_kern.c: (projected_buffer_allocate) increase size_none when
protection is VM_PROT_NONE
(projected_buffer_map) likewise
* vm/vm_map.h: (struct vm_map) new fields size_none, size_cur_limit and
size_max_limit
* vm/vm_map.c: (vm_map_setup) initialize new fields
(vm_map_enforce_limit) new function
(vm_map_copy_limits) new function
(vm_map_find_entry) call limit enforcer function
(vm_map_enter) likewise
(vm_map_copyout) likewise
(vm_map_copyout_page_list) likewise
(vm_map_fork) copy parent limit to the new map and compute and set
size_none of the new map
* vm/vm_user.c: (vm_set_size_limit) new function
(vm_get_size_limit) likewise
---
include/mach/gnumach.defs | 33 +++++++++++
kern/task.c | 5 ++
tests/test-vm.c | 85 ++++++++++++++++++++++++++++
vm/vm_kern.c | 26 +++++++++
vm/vm_map.c | 113 +++++++++++++++++++++++++++++++++++++-
vm/vm_map.h | 13 +++++
vm/vm_user.c | 69 +++++++++++++++++++++++
7 files changed, 342 insertions(+), 2 deletions(-)
diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
index f13e866b..a07a1011 100644
--- a/include/mach/gnumach.defs
+++ b/include/mach/gnumach.defs
@@ -223,3 +223,36 @@ simpleroutine thread_set_name(
routine thread_get_name(
thread : thread_t;
out name : kernel_debug_name_t);
+
+/*
+ * Set a task virtual memory limit parameters
+ *
+ * HOST_PORT must be the privileged host control port
+ * if the caller desires to increase the current max limit.
+ *
+ * On the other hand, if the max limit is being decreased, the
+ * unprivileged host control port (as returned by mach_host_self())
+ * can be provided.
+ *
+ * Returns:
+ * - KERN_SUCCESS
+ * - KERN_INVALID_TASK
+ * - KERN_INVALID_HOST
+ * - KERN_INVALID_ARGUMENT
+ * * when current_limit > max_limit
+ * * attempt to increase max limit without providing
+ * the privileged host control port.
+ */
+routine vm_set_size_limit(
+ host_port : mach_port_t;
+ map : vm_task_t;
+ current_limit : vm_size_t;
+ max_limit : vm_size_t);
+
+/*
+ * Get a task virtual memory limit parameters
+ */
+routine vm_get_size_limit(
+ map : vm_task_t;
+ out current_limit : vm_size_t;
+ out max_limit : vm_size_t);
diff --git a/kern/task.c b/kern/task.c
index bd57ca2a..e78e856f 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -126,6 +126,11 @@ task_create_kernel(
trunc_page(VM_MAX_USER_ADDRESS));
if (new_task->map == VM_MAP_NULL)
pmap_destroy(new_pmap);
+ else if (parent_task != TASK_NULL) {
+ vm_map_lock_read(parent_task->map);
+ vm_map_copy_limits(new_task->map,
parent_task->map);
+ vm_map_unlock_read(parent_task->map);
+ }
}
}
if (new_task->map == VM_MAP_NULL) {
diff --git a/tests/test-vm.c b/tests/test-vm.c
index 4ece792e..8e4ad884 100644
--- a/tests/test-vm.c
+++ b/tests/test-vm.c
@@ -75,11 +75,96 @@ static void test_wire()
// TODO check that all memory is actually wired or unwired
}
+void test_vm_limit()
+{
+ kern_return_t err;
+ vm_address_t mem, mem2, mem3;
+ const size_t M_128K = 128l * 1024l;
+ const size_t M_128M = 128l * 1024l * 1024l;
+ const size_t M_512M = 512l * 1024l * 1024l;
+ vm_size_t cur;
+ vm_size_t max;
+
+ /* set VM memory limitations */
+ err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M);
+ ASSERT_RET(err, "cannot set VM limits");
+
+ /* check limits are actually saved */
+ err = vm_get_size_limit(mach_task_self(), &cur, &max);
+ ASSERT_RET(err, "getting the VM limit failed");
+ ASSERT(cur == M_128M, "cur limit was not expected");
+ ASSERT(max == M_512M, "max limit was not expected");
+
+ /* check we can no longer increase the max limit */
+ err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M *
2);
+ ASSERT(err == KERN_INVALID_ARGUMENT, "raising VM max limit shall fail with
KERN_INVALID_ARGUMENT");
+
+ /* alloc some memory below the limit */
+ err = vm_allocate(mach_task_self(), &mem, M_128K, TRUE);
+ ASSERT_RET(err, "allocating memory below the limit must succeed");
+ err = vm_deallocate(mach_task_self(), mem, M_128K);
+ ASSERT_RET(err, "deallocation failed");
+
+ /* alloc a bigger chunk to make it hit the limit */
+ err = vm_allocate(mach_task_self(), &mem, (M_128M * 2), TRUE);
+ ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE");
+
+ /* check that privileged tasks can increase the hard limit */
+ err = vm_set_size_limit(host_priv(), mach_task_self(), (M_512M + M_128M),
M_512M * 2);
+ ASSERT_RET(err, "privileged tasks shall be allowed to increase the max
limit");
+
+ /* check limits are actually saved */
+ err = vm_get_size_limit(mach_task_self(), &cur, &max);
+ ASSERT_RET(err, "getting the VM limit failed");
+ ASSERT(cur == (M_512M + M_128M), "cur limit was not expected");
+ ASSERT(max == (M_512M * 2), "max limit was not expected");
+
+ /* allocating the bigger chunk with the new limit shall succeed */
+ err = vm_allocate(mach_task_self(), &mem, (M_128M * 2), TRUE);
+ ASSERT_RET(err, "allocation should now succedd");
+ err = vm_deallocate(mach_task_self(), mem, (M_128M * 2));
+ ASSERT_RET(err, "deallocation failed");
+
+ /* check that the limit does not apply to VM_PROT_NONE mappings */
+ err = vm_map(mach_task_self(),
+ &mem, (M_512M * 3), 0, 0, MACH_PORT_NULL, 0, 1,
+ VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_COPY);
+ ASSERT_RET(err, "allocation of VM_PROT_NONE areas should not be subject to
the limit");
+
+ /* check that the VM_PROT_NONE allocation does not reduce the limit */
+ err = vm_allocate(mach_task_self(), &mem2, M_512M, TRUE);
+ ASSERT_RET(err, "allocation should succedd in spite of the VM_PROT_NONE
map");
+ err = vm_deallocate(mach_task_self(), mem2, M_512M);
+ ASSERT_RET(err, "deallocation failed");
+ err = vm_deallocate(mach_task_self(), mem, (M_512M * 3));
+ ASSERT_RET(err, "deallocation failed");
+
+ /* check that allocations demoted to VM_PROT_NONE no longer counts towards
the VM limit */
+ err = vm_allocate(mach_task_self(), &mem, M_512M, TRUE);
+ ASSERT_RET(err, "allocating memory below the limit must succeed");
+ err = vm_allocate(mach_task_self(), &mem2, M_128M, TRUE);
+ /* the current limit is M_512M + M_128M, this allocation should hit the
limit */
+ ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE");
+ err = vm_protect(mach_task_self(), mem, M_512M, TRUE, VM_PROT_NONE);
+ ASSERT_RET(err, "could not drop protection to VM_PROT_NONE");
+ /* after dropping the protection there should be enough space again */
+ err = vm_allocate(mach_task_self(), &mem2, M_128M, TRUE);
+ ASSERT_RET(err, "allocating memory below the limit must succeed");
+ /* this allocation purpose is showing the failure message to check size_none
value */
+ err = vm_allocate(mach_task_self(), &mem3, M_512M, TRUE);
+ ASSERT(err == KERN_NO_SPACE, "going above the limit should still fail");
+ err = vm_deallocate(mach_task_self(), mem2, M_128M);
+ ASSERT_RET(err, "deallocation failed");
+ err = vm_deallocate(mach_task_self(), mem, M_512M);
+ ASSERT_RET(err, "deallocation failed");
+}
+
int main(int argc, char *argv[], int envc, char *envp[])
{
printf("VM_MIN_ADDRESS=0x%p\n", VM_MIN_ADDRESS);
printf("VM_MAX_ADDRESS=0x%p\n", VM_MAX_ADDRESS);
test_wire();
test_memobj();
+ test_vm_limit();
return 0;
}
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index 51223d98..6b03d014 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -144,6 +144,19 @@ projected_buffer_allocate(
u_entry->protection = protection;
u_entry->max_protection = protection;
u_entry->inheritance = inheritance;
+
+ /*
+ * vm_map_find_entry allocated an entry of size `size`
+ * without knowing its protection.
+ *
+ * For this to work, the VM limit must not be reached
+ * in the allocation, even though we are going to free
+ * the chunk from being accounted for the current map
+ * limit by increasing map->size_none here.
+ */
+ if (protection == VM_PROT_NONE)
+ map->size_none += size;
+
vm_map_unlock(map);
*user_p = addr;
@@ -226,6 +239,19 @@ projected_buffer_map(
u_entry->max_protection = protection;
u_entry->inheritance = inheritance;
u_entry->wired_count = k_entry->wired_count;
+
+ /*
+ * vm_map_find_entry allocated an entry of size `size`
+ * without knowing its protection.
+ *
+ * For this to work, the VM limit must not be reached
+ * in the allocation, even though we are going to free
+ * the chunk from being accounted for the current map
+ * limit by increasing map->size_none here.
+ */
+ if (protection == VM_PROT_NONE)
+ map->size_none += size;
+
vm_map_unlock(map);
*user_p = user_addr;
diff --git a/vm/vm_map.c b/vm/vm_map.c
index 03d22ea1..9cf2932d 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -189,6 +189,7 @@ void vm_map_setup(
map->size = 0;
map->size_wired = 0;
+ map->size_none = 0;
map->ref_count = 1;
map->pmap = pmap;
map->min_offset = min;
@@ -198,6 +199,14 @@ void vm_map_setup(
map->first_free = vm_map_to_entry(map);
map->hint = vm_map_to_entry(map);
map->name = NULL;
+ /* TODO add to default limit the swap size */
+ if (pmap != kernel_pmap) {
+ map->size_cur_limit = vm_page_mem_size() / 2;
+ map->size_max_limit = vm_page_mem_size() / 2;
+ } else {
+ map->size_cur_limit = (~0UL);
+ map->size_max_limit = (~0UL);
+ }
vm_map_lock_init(map);
simple_lock_init(&map->ref_lock);
simple_lock_init(&map->hint_lock);
@@ -268,6 +277,49 @@ void vm_map_unlock(struct vm_map *map)
lock_write_done(&map->lock);
}
+/*
+ * Enforces the VM limit of a target map.
+ */
+static kern_return_t
+vm_map_enforce_limit(
+ vm_map_t map,
+ vm_size_t size,
+ const char *fn_name)
+{
+ /* Limit is ignored for the kernel map */
+ if (vm_map_pmap(map) == kernel_pmap) {
+ return KERN_SUCCESS;
+ }
+
+ /* Avoid taking into account the total VM_PROT_NONE virtual memory */
+ vm_size_t usable_size = map->size - map->size_none;
+ vm_size_t new_size = size + usable_size;
+ /* Check for integer overflow */
+ if (new_size < size) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if (new_size > map->size_cur_limit) {
+ task_t task = current_task();
+ printf("[%s] [task %s] map size: %lu, none: %lu, requested:
%lu, limit: %lu\n",
+ fn_name, task->name, map->size, map->size_none, size,
map->size_cur_limit);
+ return KERN_NO_SPACE;
+ }
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Copies the limits from source to destination map.
+ * Called by task_create_kernel with the src_map locked.
+ */
+void
+vm_map_copy_limits(vm_map_t dst_map, vm_map_t src_map)
+{
+ dst_map->size_cur_limit = src_map->size_cur_limit;
+ dst_map->size_max_limit = src_map->size_max_limit;
+}
+
/*
* vm_map_entry_create: [ internal use only ]
*
@@ -789,6 +841,7 @@ kern_return_t vm_map_find_entry(
vm_map_entry_t entry, new_entry;
vm_offset_t start;
vm_offset_t end;
+ kern_return_t err;
entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start);
@@ -796,6 +849,9 @@ kern_return_t vm_map_find_entry(
return KERN_NO_SPACE;
}
+ if ((err = vm_map_enforce_limit(map, size, "vm_map_find_entry")) !=
KERN_SUCCESS)
+ return err;
+
end = start + size;
/*
@@ -1037,6 +1093,16 @@ kern_return_t vm_map_enter(
RETURN(KERN_NO_SPACE);
}
+ /*
+ * If the allocation has protection equal to VM_PROT_NONE,
+ * don't check for limits as the map's size_none field is
+ * not yet incremented.
+ */
+ if (max_protection != VM_PROT_NONE) {
+ if ((result = vm_map_enforce_limit(map, size, "vm_map_enter"))
!= KERN_SUCCESS)
+ RETURN(result);
+ }
+
/*
* At this point,
* "start" and "end" should define the endpoints of the
@@ -1077,6 +1143,8 @@ kern_return_t vm_map_enter(
* new range.
*/
map->size += size;
+ if (max_protection == VM_PROT_NONE)
+ map->size_none += size;
entry->vme_end = end;
vm_map_gap_update(&map->hdr, entry);
/*
@@ -1113,6 +1181,8 @@ kern_return_t vm_map_enter(
* new range.
*/
map->size += size;
+ if (max_protection == VM_PROT_NONE)
+ map->size_none += size;
next_entry->vme_start = start;
vm_map_gap_update(&map->hdr, entry);
/*
@@ -1160,6 +1230,8 @@ kern_return_t vm_map_enter(
vm_map_entry_link(map, entry, new_entry);
map->size += size;
+ if (max_protection == VM_PROT_NONE)
+ map->size_none += size;
/*
* Update the free space hint and the lookup hint
@@ -1679,11 +1751,17 @@ kern_return_t vm_map_protect(
vm_map_clip_end(map, current, end);
old_prot = current->protection;
- if (set_max)
+ if (set_max) {
+ if (current->max_protection != new_prot) {
+ if (new_prot == VM_PROT_NONE)
+ map->size_none += current->vme_end -
current->vme_start;
+ if (current->max_protection == VM_PROT_NONE)
+ map->size_none -= current->vme_end -
current->vme_start;
+ }
current->protection =
(current->max_protection = new_prot) &
old_prot;
- else
+ } else
current->protection = new_prot;
/*
@@ -2042,6 +2120,8 @@ void vm_map_entry_delete(
vm_map_entry_unlink(map, entry);
map->size -= size;
+ if (entry->max_protection == VM_PROT_NONE)
+ map->size_none -= size;
vm_map_entry_dispose(map, entry);
}
@@ -2882,6 +2962,11 @@ kern_return_t vm_map_copyout(
return KERN_NO_SPACE;
}
+ if ((kr = vm_map_enforce_limit(dst_map, size, "vm_map_copyout")) !=
KERN_SUCCESS) {
+ vm_map_unlock(dst_map);
+ return kr;
+ }
+
/*
* Adjust the addresses in the copy chain, and
* reset the region attributes.
@@ -2985,6 +3070,10 @@ kern_return_t vm_map_copyout(
SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
dst_map->size += size;
+ /*
+ * dst_map->size_none need no updating because the protection
+ * of all entries is VM_PROT_DEFAULT / VM_PROT_ALL
+ */
/*
* Link in the copy
@@ -3062,6 +3151,11 @@ kern_return_t vm_map_copyout_page_list(
return KERN_NO_SPACE;
}
+ if ((result = vm_map_enforce_limit(dst_map, size,
"vm_map_copyout_page_lists")) != KERN_SUCCESS) {
+ vm_map_unlock(dst_map);
+ return result;
+ }
+
end = start + size;
must_wire = dst_map->wiring_required;
@@ -3206,6 +3300,10 @@ create_object:
}
SAVE_HINT(dst_map, entry);
dst_map->size += size;
+ /*
+ * dst_map->size_none need no updating because the protection
+ * of all entries is VM_PROT_DEFAULT / VM_PROT_ALL
+ */
/*
* Link in the entry
@@ -4390,6 +4488,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
vm_map_entry_t new_entry;
pmap_t new_pmap = pmap_create((vm_size_t) 0);
vm_size_t new_size = 0;
+ vm_size_t new_size_none = 0;
vm_size_t entry_size;
vm_object_t object;
@@ -4524,6 +4623,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
old_entry->vme_start);
new_size += entry_size;
+ if (old_entry->max_protection == VM_PROT_NONE)
+ new_size_none += entry_size;
break;
case VM_INHERIT_COPY:
@@ -4572,6 +4673,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
new_size += entry_size;
+ if (old_entry->max_protection ==
VM_PROT_NONE)
+ new_size_none += entry_size;
break;
}
@@ -4609,6 +4712,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
vm_map_copy_insert(new_map, last, copy);
new_size += entry_size;
+ if (old_entry->max_protection == VM_PROT_NONE)
+ new_size_none += entry_size;
/*
* Pick up the traversal at the end of
@@ -4630,6 +4735,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
}
new_map->size = new_size;
+ new_map->size_none = new_size_none;
+ vm_map_copy_limits(new_map, old_map);
vm_map_unlock(old_map);
return(new_map);
@@ -5163,6 +5270,8 @@ void vm_map_print(db_expr_t addr, boolean_t have_addr,
db_expr_t count, const ch
printf("ref=%d,nentries=%d\n", map->ref_count, map->hdr.nentries);
printf("size=%lu,resident:%lu,wired=%lu\n", map->size,
pmap_resident_count(map->pmap) * PAGE_SIZE, map->size_wired);
+ printf("max_limit=%lu,cur_limit=%lu,size_none=%lu\n",
+ map->size_max_limit, map->size_cur_limit, map->size_none);
printf("version=%d\n", map->timestamp);
indent += 1;
for (entry = vm_map_first_entry(map);
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 900f1218..96f7124f 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -184,6 +184,7 @@ struct vm_map {
pmap_t pmap; /* Physical map */
vm_size_t size; /* virtual size */
vm_size_t size_wired; /* wired size */
+ vm_size_t size_none; /* none protection size */
int ref_count; /* Reference count */
decl_simple_lock_data(, ref_lock) /* Lock for ref_count field */
vm_map_entry_t hint; /* hint for quick lookups */
@@ -198,6 +199,10 @@ struct vm_map {
unsigned int timestamp; /* Version number */
const char *name; /* Associated name */
+
+ vm_size_t size_cur_limit; /* current limit on virtual
memory size */
+ vm_size_t size_max_limit; /* maximum size an unprivileged
user can
+ change current limit to */
};
#define vm_map_to_entry(map) ((struct vm_map_entry *) &(map)->hdr.links)
@@ -582,4 +587,12 @@ void _vm_map_clip_end(
vm_offset_t end,
boolean_t link_gap);
+/*
+ * This function is called to inherit the virtual memory limits
+ * from one vm_map_t to another.
+ */
+void vm_map_copy_limits(
+ vm_map_t dst,
+ vm_map_t src);
+
#endif /* _VM_VM_MAP_H_ */
diff --git a/vm/vm_user.c b/vm/vm_user.c
index 62aedad3..8089f3cf 100644
--- a/vm/vm_user.c
+++ b/vm/vm_user.c
@@ -804,3 +804,72 @@ kern_return_t vm_pages_phys(
return KERN_SUCCESS;
}
+
+/*
+ * vm_set_size_limit
+ *
+ * Sets the current/maximum virtual adress space limits
+ * of the `target_task`.
+ *
+ * The host privileged port must be provided to increase
+ * the max limit.
+ */
+kern_return_t
+vm_set_size_limit(
+ const ipc_port_t host_port,
+ vm_map_t map,
+ vm_size_t current_limit,
+ vm_size_t max_limit)
+{
+ ipc_kobject_type_t ikot_host = IKOT_NONE;
+
+ if (current_limit > max_limit)
+ return KERN_INVALID_ARGUMENT;
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ if (!IP_VALID(host_port))
+ return KERN_INVALID_HOST;
+ ip_lock(host_port);
+ if (ip_active(host_port))
+ ikot_host = ip_kotype(host_port);
+ ip_unlock(host_port);
+
+ if (ikot_host != IKOT_HOST && ikot_host != IKOT_HOST_PRIV)
+ return KERN_INVALID_HOST;
+
+ vm_map_lock(map);
+ if (max_limit > map->size_max_limit && ikot_host != IKOT_HOST_PRIV) {
+ vm_map_unlock(map);
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ map->size_cur_limit = current_limit;
+ map->size_max_limit = max_limit;
+ vm_map_unlock(map);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_get_size_limit
+ *
+ * Gets the current/maximum virtual adress space limits
+ * of the provided `map`.
+ */
+kern_return_t
+vm_get_size_limit(
+ vm_map_t map,
+ vm_size_t *current_limit,
+ vm_size_t *max_limit)
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ vm_map_lock_read(map);
+ *current_limit = map->size_cur_limit;
+ *max_limit = map->size_max_limit;
+ vm_map_unlock_read(map);
+
+ return KERN_SUCCESS;
+}
--
2.45.2
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [PATCH gnumach v3] Implement per task virtual memory limit,
dnietoc <=