[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC PATCH v1] x86/cpu: initialize the CPU concurrently
From: |
Eduardo Habkost |
Subject: |
Re: [RFC PATCH v1] x86/cpu: initialize the CPU concurrently |
Date: |
Fri, 18 Dec 2020 13:47:54 -0500 |
Hi,
Thanks for the patch, and sorry for taking so long to look at it.
On Wed, Nov 25, 2020 at 07:54:17PM +0800, Zhenyu Ye wrote:
> From 0b4318c9dbf6fa152ec14eab29837ea06e2d78e5 Mon Sep 17 00:00:00 2001
> From: eillon <yezhenyu2@huawei.com>
> Date: Wed, 25 Nov 2020 19:17:03 +0800
> Subject: [PATCH] x86/cpu: initialize the CPU concurrently
>
> Currently we initialize cpu one by one in qemu_init_vcpu(), every cpu
> should have to wait util cpu->created = true. When
> cpus_accel->create_vcpu_thread
> costs too long time or the number of CPUs is too large, this will prolong
> the boot time.
>
How long was boot time before and after the patch?
> This patch initializes the CPU concurrently.
>
> Signed-off-by: eillon <yezhenyu2@huawei.com>
> ---
> hw/i386/x86.c | 7 +++++--
> include/hw/core/cpu.h | 3 +++
> include/hw/i386/x86.h | 3 ++-
> softmmu/cpus.c | 9 +++++++--
> 4 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 5944fc44ed..a98f68b220 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -101,9 +101,11 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState
> *x86ms,
> }
>
>
> -void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp)
> +void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id,
> + bool last_cpu, Error **errp)
> {
> Object *cpu = object_new(MACHINE(x86ms)->cpu_type);
> + ((CPUState *)cpu)->last_cpu = last_cpu;
Please use the CPU() macro instead of direct casts. Preferably
with a separate variable. e.g.:
Object *obj = object_new(MACHINE(x86ms)->cpu_type);
CPUState *cpu = CPU(obj);
>
> if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) {
> goto out;
> @@ -135,7 +137,8 @@ void x86_cpus_init(X86MachineState *x86ms, int
> default_cpu_version)
> ms->smp.max_cpus - 1)
> + 1;
> possible_cpus = mc->possible_cpu_arch_ids(ms);
> for (i = 0; i < ms->smp.cpus; i++) {
> - x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
> + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id,
> + i == ms->smp.cpus - 1, &error_fatal);
> }
> }
>
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 3d92c967ff..b7e05e2d58 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -467,6 +467,9 @@ struct CPUState {
>
> /* track IOMMUs whose translations we've cached in the TCG TLB */
> GArray *iommu_notifiers;
> +
> + /* The last cpu to init. */
> + bool last_cpu;
> };
>
> typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ;
> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
> index 739fac5087..0f7a6e5d16 100644
> --- a/include/hw/i386/x86.h
> +++ b/include/hw/i386/x86.h
> @@ -84,7 +84,8 @@ void init_topo_info(X86CPUTopoInfo *topo_info, const
> X86MachineState *x86ms);
> uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
> unsigned int cpu_index);
>
> -void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
> +void x86_cpu_new(X86MachineState *pcms, int64_t apic_id,
> + bool last_cpu, Error **errp);
> void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
> CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
> unsigned cpu_index);
> diff --git a/softmmu/cpus.c b/softmmu/cpus.c
> index e46ac68ad0..5a8eae28ab 100644
> --- a/softmmu/cpus.c
> +++ b/softmmu/cpus.c
> @@ -621,8 +621,13 @@ void qemu_init_vcpu(CPUState *cpu)
> g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
> cpus_accel->create_vcpu_thread(cpu);
>
> - while (!cpu->created) {
> - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
> + if (cpu->last_cpu) {
What about all the other architectures that don't set
last_cpu=true? They will never wait for the CPU to be created.
> + CPUState *cs = first_cpu;
> + CPU_FOREACH(cs) {
> + while (!cs->created) {
> + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
> + }
> + }
I suggest doing this "wait for all CPUs" step outside qemu_init_vcpu().
What about not making the last CPU special, and just providing a
optional mechanism to wait for all VCPU threads after the CPU
objects were created? e.g.:
struct CPUState {
...
/*
* If true, qemu_init_vcpu() will not wait for the VCPU thread to be
created
* before returning.
*/
bool async_init;
};
void qemu_wait_vcpu_thread_init(CPUState *cpu)
{
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
void qemu_wait_all_vcpu_threads_init(void)
{
CPU_FOREACH(cpu) {
qemu_wait_vcpu_thread_init(cpu);
}
}
void qemu_init_vcpu(CPUState *cpu)
{
...
if (!cpu->async_init) {
qemu_wait_vcpu_thread_init(cpu);
}
}
void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, bool async, Error
**errp)
{
...
cpu->async_init = async;
qdev_realize(DEVICE(cpu), NULL, errp);
...
}
void x86_cpus_init(...)
{
...
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, true,
&error_fatal);
}
qemu_wait_all_vcpu_threads_init();
}
> }
> }
>
> --
> 2.22.0.windows.1
>
--
Eduardo