[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC 2 PATCH 01/16] numa: Split the numa functionality
From: |
Eduardo Habkost |
Subject: |
Re: [Qemu-devel] [RFC 2 PATCH 01/16] numa: Split the numa functionality |
Date: |
Thu, 10 Oct 2019 00:25:38 -0300 |
Hi,
First of all, sorry for taking more than a month to start
reviewing this.
On Fri, Sep 06, 2019 at 07:11:43PM +0000, Moger, Babu wrote:
> To support new epyc mode, we need to know the number of numa nodes
> in advance to generate apic id correctly. [...]
This explains that we need to initialize numa_info earlier than
something...
> [...] So, split the numa
> initialization into two. The function parse_numa initializes numa_info
> and updates nb_numa_nodes. And then parse_numa_node does the numa node
> initialization.
...but I miss what "something" is.
The sequence of events here will be:
* parse_numa_opts()
* for each -numa option:
* parse_numa()
* set_numa_options()
* parse_numa_info()
* here ms->numa_state->num_nodes is incremented [1]
* parse_numa_node_opts()
* for each -numa option:
* parse_numa_node()
* set_numa_node_options()
* here are the operations that are being delayed by this
patch [2]
What exactly makes it necessary for [2] to happen after [1] is
done for all NUMA nodes?
This needs to be clear in the code, otherwise somebody will try to refactor
this in the future and merge set_numa_node_options() and parse_numa_info()
again, not knowing why ordering between [1] and [2] is so important.
In addition to documenting it better, I suggest saving the CPU
index list in NodeInfo, instead of calling qemu_opts_foreach()
twice. (Probably a good idea to document the new field as
internal, though. We don't want machine-specific code to be
looking at the CPU index list.)
Also, would it work if the delayed initialization is done at
numa_complete_configuration() instead of a new
parse_numa_node_opts() function? We already have 2 separate
steps in NUMA initialization (parse_numa_node() and
numa_complete_configuration()), so it would be nice to avoid
adding a 3rd one.
Putting all the suggestions together, the code would look like this:
static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
Error **errp)
{
/* ... */
numa_info[nodenr].cpu_indexes = QAPI_CLONE(node->cpus, uint16List);
/* ... */
}
void numa_complete_configuration(MachineState *ms)
{
/* ... */
for (i = 0; i < ms->numa_state->num_nodes; i++) {
/*
* numa_node_complete_configuration() needs to be called after all
* nodes were already parsed, because <insert reason here>,
*/
numa_node_complete_configuration(numa_info[i]);
}
}
void numa_node_complete_configuration(MachineState *ms, NodeInfo *node)
{
for (cpu_index = node->cpu_indexes; cpu_index; cpu_index = cpu_index->next)
{
CpuInstanceProperties props;
props = mc->cpu_index_to_instance_props(ms, cpu_index->value);
props.node_id = nodenr;
props.has_node_id = true;
machine_set_cpu_numa_node(ms, &props, &err);
}
}
>
> Signed-off-by: Babu Moger <address@hidden>
> ---
> hw/core/numa.c | 106
> +++++++++++++++++++++++++++++++++++--------------
> include/sysemu/numa.h | 2 +
> vl.c | 2 +
> 3 files changed, 80 insertions(+), 30 deletions(-)
>
> diff --git a/hw/core/numa.c b/hw/core/numa.c
> index a11431483c..27fa6b5e1d 100644
> --- a/hw/core/numa.c
> +++ b/hw/core/numa.c
> @@ -55,14 +55,10 @@ bool have_numa_distance;
> NodeInfo numa_info[MAX_NODES];
>
>
> -static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
> +static void parse_numa_info(MachineState *ms, NumaNodeOptions *node,
> Error **errp)
> {
> - Error *err = NULL;
> uint16_t nodenr;
> - uint16List *cpus = NULL;
> - MachineClass *mc = MACHINE_GET_CLASS(ms);
> - unsigned int max_cpus = ms->smp.max_cpus;
>
> if (node->has_nodeid) {
> nodenr = node->nodeid;
> @@ -81,29 +77,6 @@ static void parse_numa_node(MachineState *ms,
> NumaNodeOptions *node,
> return;
> }
>
> - if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) {
> - error_setg(errp, "NUMA is not supported by this machine-type");
> - return;
> - }
> - for (cpus = node->cpus; cpus; cpus = cpus->next) {
> - CpuInstanceProperties props;
> - if (cpus->value >= max_cpus) {
> - error_setg(errp,
> - "CPU index (%" PRIu16 ")"
> - " should be smaller than maxcpus (%d)",
> - cpus->value, max_cpus);
> - return;
> - }
> - props = mc->cpu_index_to_instance_props(ms, cpus->value);
> - props.node_id = nodenr;
> - props.has_node_id = true;
> - machine_set_cpu_numa_node(ms, &props, &err);
> - if (err) {
> - error_propagate(errp, err);
> - return;
> - }
> - }
> -
> have_memdevs = have_memdevs ? : node->has_memdev;
> have_mem = have_mem ? : node->has_mem;
> if ((node->has_mem && have_memdevs) || (node->has_memdev && have_mem)) {
> @@ -177,7 +150,7 @@ void set_numa_options(MachineState *ms, NumaOptions
> *object, Error **errp)
>
> switch (object->type) {
> case NUMA_OPTIONS_TYPE_NODE:
> - parse_numa_node(ms, &object->u.node, &err);
> + parse_numa_info(ms, &object->u.node, &err);
> if (err) {
> goto end;
> }
> @@ -242,6 +215,73 @@ end:
> return 0;
> }
>
> +void set_numa_node_options(MachineState *ms, NumaOptions *object, Error
> **errp)
> +{
> + MachineClass *mc = MACHINE_GET_CLASS(ms);
> + NumaNodeOptions *node = &object->u.node;
> + unsigned int max_cpus = ms->smp.max_cpus;
> + uint16List *cpus = NULL;
> + Error *err = NULL;
> + uint16_t nodenr;
> +
> + if (node->has_nodeid) {
> + nodenr = node->nodeid;
> + } else {
> + error_setg(errp, "NUMA node information is not available");
> + }
> +
> + if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) {
> + error_setg(errp, "NUMA is not supported by this machine-type");
> + return;
> + }
> +
> + for (cpus = node->cpus; cpus; cpus = cpus->next) {
> + CpuInstanceProperties props;
> + if (cpus->value >= max_cpus) {
> + error_setg(errp,
> + "CPU index (%" PRIu16 ")"
> + " should be smaller than maxcpus (%d)",
> + cpus->value, max_cpus);
> + return;
> + }
> + props = mc->cpu_index_to_instance_props(ms, cpus->value);
> + props.node_id = nodenr;
> + props.has_node_id = true;
> + machine_set_cpu_numa_node(ms, &props, &err);
> + if (err) {
> + error_propagate(errp, err);
> + return;
> + }
> + }
> +}
> +
> +static int parse_numa_node(void *opaque, QemuOpts *opts, Error **errp)
> +{
> + NumaOptions *object = NULL;
> + MachineState *ms = MACHINE(opaque);
> + Error *err = NULL;
> + Visitor *v = opts_visitor_new(opts);
> +
> + visit_type_NumaOptions(v, NULL, &object, &err);
> + visit_free(v);
> + if (err) {
> + goto end;
> + }
> +
> + if (object->type == NUMA_OPTIONS_TYPE_NODE) {
> + set_numa_node_options(ms, object, &err);
> + }
> +
> +end:
> + qapi_free_NumaOptions(object);
> + if (err) {
> + error_propagate(errp, err);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> /* If all node pair distances are symmetric, then only distances
> * in one direction are enough. If there is even one asymmetric
> * pair, though, then all distances must be provided. The
> @@ -368,7 +408,7 @@ void numa_complete_configuration(MachineState *ms)
> if (ms->ram_slots > 0 && nb_numa_nodes == 0 &&
> mc->auto_enable_numa_with_memhp) {
> NumaNodeOptions node = { };
> - parse_numa_node(ms, &node, &error_abort);
> + parse_numa_info(ms, &node, &error_abort);
> }
>
> assert(max_numa_nodeid <= MAX_NODES);
> @@ -448,6 +488,12 @@ void parse_numa_opts(MachineState *ms)
> qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, &error_fatal);
> }
>
> +void parse_numa_node_opts(MachineState *ms)
> +{
> + qemu_opts_foreach(qemu_find_opts("numa"), parse_numa_node,
> + ms, &error_fatal);
> +}
> +
> void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp)
> {
> int node_id = object_property_get_int(OBJECT(dev), "node-id",
> &error_abort);
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 01a263eba2..ca109adaa6 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -24,7 +24,9 @@ struct NumaNodeMem {
> extern NodeInfo numa_info[MAX_NODES];
>
> void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
> +void set_numa_node_options(MachineState *ms, NumaOptions *object, Error
> **errp);
> void parse_numa_opts(MachineState *ms);
> +void parse_numa_node_opts(MachineState *ms);
> void numa_complete_configuration(MachineState *ms);
> void query_numa_node_mem(NumaNodeMem node_mem[]);
> extern QemuOptsList qemu_numa_opts;
> diff --git a/vl.c b/vl.c
> index b426b32134..711d2ae5da 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -4339,6 +4339,8 @@ int main(int argc, char **argv, char **envp)
> }
> parse_numa_opts(current_machine);
>
> + parse_numa_node_opts(current_machine);
> +
> /* do monitor/qmp handling at preconfig state if requested */
> main_loop();
>
>
--
Eduardo
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Qemu-devel] [RFC 2 PATCH 01/16] numa: Split the numa functionality,
Eduardo Habkost <=