qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling


From: Peter Collingbourne
Subject: Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
Date: Tue, 1 Dec 2020 17:57:30 -0800

On Tue, Dec 1, 2020 at 5:54 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 02.12.20 02:52, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf <agraf@csgraf.de> wrote:
> >>
> >> On 02.12.20 02:32, Peter Collingbourne wrote:
> >>> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
> >>>> On 01.12.20 22:00, Peter Collingbourne wrote:
> >>>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>>>> up on IPI.
> >>>>>
> >>>>> Signed-off-by: Peter Collingbourne <pcc@google.com>
> >>>>> ---
> >>>>> v2:
> >>>>> - simplify locking further
> >>>>> - wait indefinitely on disabled or masked timers
> >>>>>
> >>>>>     accel/hvf/hvf-cpus.c     |   5 +-
> >>>>>     include/sysemu/hvf_int.h |   3 +-
> >>>>>     target/arm/hvf/hvf.c     | 116 
> >>>>> ++++++++++++++-------------------------
> >>>>>     3 files changed, 43 insertions(+), 81 deletions(-)
> >>>>>
> >>>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>>>> index 4360f64671..b2c8fb57f6 100644
> >>>>> --- a/accel/hvf/hvf-cpus.c
> >>>>> +++ b/accel/hvf/hvf-cpus.c
> >>>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>>>         sigact.sa_handler = dummy_signal;
> >>>>>         sigaction(SIG_IPI, &sigact, NULL);
> >>>>>
> >>>>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> >>>>> -    sigdelset(&set, SIG_IPI);
> >>>>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
> >>>>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> >>>>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
> >>>>>
> >>>>>     #ifdef __aarch64__
> >>>>>         r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t 
> >>>>> **)&cpu->hvf->exit, NULL);
> >>>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>>>> index c56baa3ae8..13adf6ea77 100644
> >>>>> --- a/include/sysemu/hvf_int.h
> >>>>> +++ b/include/sysemu/hvf_int.h
> >>>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>>>     struct hvf_vcpu_state {
> >>>>>         uint64_t fd;
> >>>>>         void *exit;
> >>>>> -    struct timespec ts;
> >>>>> -    bool sleeping;
> >>>>> +    sigset_t unblock_ipi_mask;
> >>>>>     };
> >>>>>
> >>>>>     void assert_hvf_ok(hv_return_t ret);
> >>>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>>>> index 8fe10966d2..3321d48aa2 100644
> >>>>> --- a/target/arm/hvf/hvf.c
> >>>>> +++ b/target/arm/hvf/hvf.c
> >>>>> @@ -2,6 +2,7 @@
> >>>>>      * QEMU Hypervisor.framework support for Apple Silicon
> >>>>>
> >>>>>      * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> >>>>> + * Copyright 2020 Google LLC
> >>>>>      *
> >>>>>      * This work is licensed under the terms of the GNU GPL, version 2 
> >>>>> or later.
> >>>>>      * See the COPYING file in the top-level directory.
> >>>>> @@ -18,6 +19,7 @@
> >>>>>     #include "sysemu/hw_accel.h"
> >>>>>
> >>>>>     #include <Hypervisor/Hypervisor.h>
> >>>>> +#include <mach/mach_time.h>
> >>>>>
> >>>>>     #include "exec/address-spaces.h"
> >>>>>     #include "hw/irq.h"
> >>>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>>>>
> >>>>>     void hvf_kick_vcpu_thread(CPUState *cpu)
> >>>>>     {
> >>>>> -    if (cpu->hvf->sleeping) {
> >>>>> -        /*
> >>>>> -         * When sleeping, make sure we always send signals. Also, 
> >>>>> clear the
> >>>>> -         * timespec, so that an IPI that arrives between setting 
> >>>>> hvf->sleeping
> >>>>> -         * and the nanosleep syscall still aborts the sleep.
> >>>>> -         */
> >>>>> -        cpu->thread_kicked = false;
> >>>>> -        cpu->hvf->ts = (struct timespec){ };
> >>>>> -        cpus_kick_thread(cpu);
> >>>>> -    } else {
> >>>>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>>>> -    }
> >>>>> +    cpus_kick_thread(cpu);
> >>>>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>>>>     }
> >>>>>
> >>>>>     static int hvf_inject_interrupts(CPUState *cpu)
> >>>>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >>>>>         return 0;
> >>>>>     }
> >>>>>
> >>>>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> >>>>> +{
> >>>>> +    /*
> >>>>> +     * Use pselect to sleep so that other threads can IPI us while 
> >>>>> we're
> >>>>> +     * sleeping.
> >>>>> +     */
> >>>>> +    qatomic_mb_set(&cpu->thread_kicked, false);
> >>>>> +    qemu_mutex_unlock_iothread();
> >>>>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> >>>>> +    qemu_mutex_lock_iothread();
> >>>>> +}
> >>>>> +
> >>>>>     int hvf_vcpu_exec(CPUState *cpu)
> >>>>>     {
> >>>>>         ARMCPU *arm_cpu = ARM_CPU(cpu);
> >>>>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>         hv_return_t r;
> >>>>>         int ret = 0;
> >>>>>
> >>>>> -    qemu_mutex_unlock_iothread();
> >>>>> -
> >>>>>         do {
> >>>>>             bool advance_pc = false;
> >>>>>
> >>>>> -        qemu_mutex_lock_iothread();
> >>>>>             current_cpu = cpu;
> >>>>>             qemu_wait_io_event_common(cpu);
> >>>>> -        qemu_mutex_unlock_iothread();
> >>>>>
> >>>>>             flush_cpu_state(cpu);
> >>>>>
> >>>>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             }
> >>>>>
> >>>>>             if (cpu->halted) {
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 return EXCP_HLT;
> >>>>>             }
> >>>>>
> >>>>> +        qemu_mutex_unlock_iothread();
> >>>>>             assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
> >>>>>
> >>>>>             /* handle VMEXIT */
> >>>>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             uint64_t syndrome = hvf_exit->exception.syndrome;
> >>>>>             uint32_t ec = syn_get_ec(syndrome);
> >>>>>
> >>>>> +        qemu_mutex_lock_iothread();
> >>>>>             switch (exit_reason) {
> >>>>>             case HV_EXIT_REASON_EXCEPTION:
> >>>>>                 /* This is the main one, handle below. */
> >>>>>                 break;
> >>>>>             case HV_EXIT_REASON_VTIMER_ACTIVATED:
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 current_cpu = cpu;
> >>>>>                 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> >>>>> -            qemu_mutex_unlock_iothread();
> >>>>>                 continue;
> >>>>>             case HV_EXIT_REASON_CANCELED:
> >>>>>                 /* we got kicked, no exit to process */
> >>>>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>                 uint32_t srt = (syndrome >> 16) & 0x1f;
> >>>>>                 uint64_t val = 0;
> >>>>>
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 current_cpu = cpu;
> >>>>>
> >>>>>                 DPRINTF("data abort: [pc=0x%llx va=0x%016llx 
> >>>>> pa=0x%016llx isv=%x "
> >>>>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>                     hvf_set_reg(cpu, srt, val);
> >>>>>                 }
> >>>>>
> >>>>> -            qemu_mutex_unlock_iothread();
> >>>>> -
> >>>>>                 advance_pc = true;
> >>>>>                 break;
> >>>>>             }
> >>>>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             case EC_WFX_TRAP:
> >>>>>                 if (!(syndrome & WFX_IS_WFE) && 
> >>>>> !(cpu->interrupt_request &
> >>>>>                     (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>>>> -                uint64_t cval, ctl, val, diff, now;
> >>>>> +                advance_pc = true;
> >>>>>
> >>>>> -                /* Set up a local timer for vtimer if necessary ... */
> >>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> >>>>> HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> >>>>> -                assert_hvf_ok(r);
> >>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> >>>>> HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> >>>>> +                uint64_t ctl;
> >>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> >>>>> HV_SYS_REG_CNTV_CTL_EL0,
> >>>>> +                                        &ctl);
> >>>>>                     assert_hvf_ok(r);
> >>>>>
> >>>>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
> >>>>> -                diff = cval - val;
> >>>>> -
> >>>>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
> >>>>> -                      gt_cntfrq_period_ns(arm_cpu);
> >>>>> -
> >>>>> -                /* Timer disabled or masked, just wait for long */
> >>>>>                     if (!(ctl & 1) || (ctl & 2)) {
> >>>>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
> >>>>> -                           gt_cntfrq_period_ns(arm_cpu);
> >>>>> +                    /* Timer disabled or masked, just wait for an IPI. 
> >>>>> */
> >>>>> +                    hvf_wait_for_ipi(cpu, NULL);
> >>>>> +                    break;
> >>>>>                     }
> >>>>>
> >>>>> -                if (diff < INT64_MAX) {
> >>>>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
> >>>>> -                    struct timespec *ts = &cpu->hvf->ts;
> >>>>> -
> >>>>> -                    *ts = (struct timespec){
> >>>>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
> >>>>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
> >>>>> -                    };
> >>>>> -
> >>>>> -                    /*
> >>>>> -                     * Waking up easily takes 1ms, don't go to sleep 
> >>>>> for smaller
> >>>>> -                     * time periods than 2ms.
> >>>>> -                     */
> >>>>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) 
> >>>>> {
> >>>>> -                        advance_pc = true;
> >>>>> -                        break;
> >>>>> -                    }
> >>>>> -
> >>>>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI 
> >>>>> signal. */
> >>>>> -                    cpu->hvf->sleeping = true;
> >>>>> -                    smp_mb();
> >>>>> -
> >>>>> -                    /* Bail out if we received an IRQ meanwhile */
> >>>>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
> >>>>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>>>> -                        cpu->hvf->sleeping = false;
> >>>>> -                        break;
> >>>>> -                    }
> >>>>> -
> >>>>> -                    /* nanosleep returns on signal, so we wake up on 
> >>>>> kick. */
> >>>>> -                    nanosleep(ts, NULL);
> >>>>> -
> >>>>> -                    /* Out of sleep - either naturally or because of a 
> >>>>> kick */
> >>>>> -                    cpu->hvf->sleeping = false;
> >>>>> +                uint64_t cval;
> >>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> >>>>> HV_SYS_REG_CNTV_CVAL_EL0,
> >>>>> +                                        &cval);
> >>>>> +                assert_hvf_ok(r);
> >>>>> +
> >>>>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
> >>>> I think you touched based on it in a previous thread, but would you mind
> >>>> to explain again why mach_absolute_time() is the right thing to check
> >>>> cval against? If I read the headers correctly, the cnvt_off register
> >>>> should be 0, so cntvct should be the reference time, no?
> >>> In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
> >>> same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
> >>> at the guest we see that CNTPCT_EL0 corresponds to
> >>> mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
> >>> at XNU kernel sources you will see that mach_absolute_time() reads
> >>> CNTPCT_EL0 and adds a constant corresponding to the amount of time
> >>> that the machine spends asleep) so I think that what's going on at the
> >>> hypervisor level is that guest CNTPOFF_EL2 is being set to the same
> >>> constant to make it correspond to mach_absolute_time().
> >>
> >> Yes, I can absolutely see how it's different from CNTPCT, but it should
> >> be identical to CNTVCT_EL0 inside QEMU, no?
> > Not necessarily. It's possible for the guest to observe a different
> > CNTVCT_EL0 either because CNTPOFF_EL2 is set to a non-zero value or
> > because the hypervisor is secretly adding the "time spent asleep"
> > constant to CNTVOFF_EL2 on top of the value that we set it to via the
> > API.
>
>
> Yes, it could be doing the same thing with mach_absolute_time(). What
> I'm asking is whether calling that over just pulling CNTVCT directly
> gives you any benefit.

If you run this program you will see that mach_absolute_time() may
give you a different result to CNTVCT.

pcc@pac-mini ~> cat time.c
#include <stdio.h>
#include <mach/mach_time.h>

int main() {
  uint64_t cntpct, cntvct;
  __asm__ __volatile__("mrs %0, cntpct_el0" : "=r"(cntpct));
  __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(cntvct));

  printf("cntpct = %lx cntvct = %lx mat = %lx\n", cntpct, cntvct,
mach_absolute_time());
}
pcc@pac-mini ~> ./time
cntpct = d714e2217d0 cntvct = d714e2217d0 mat = 9259a829290

Peter



reply via email to

[Prev in Thread] Current Thread [Next in Thread]