[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v1 09/11] tcg: drop global lock during TCG code ex
From: |
Jan Kiszka |
Subject: |
Re: [Qemu-devel] [RFC v1 09/11] tcg: drop global lock during TCG code execution |
Date: |
Wed, 23 Mar 2016 21:36:41 +0100 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686 (x86_64); de; rv:1.8.1.12) Gecko/20080226 SUSE/2.0.0.12-1.1 Thunderbird/2.0.0.12 Mnenhy/0.7.5.666 |
On 2016-03-23 17:27, Alex Bennée wrote:
>
> KONRAD Frederic <address@hidden> writes:
>
>> Hi Alex,
>>
>> Thanks for having pulling all that together!
>> About this patch the original author was Jan Kiszka
>> (https://lists.gnu.org/archive/html/qemu-devel/2011-09/msg03323.html)
>>
>> This has probably been dropped during a rebase.
>
> I'll amend the author on v2.
>
> Jan,
>
> The original didn't have a s-o-b tag from you. Are you happy to give one
> for its current iteration?
You can consider it
Signed-off-by: Jan Kiszka <address@hidden>
At that point, I didn't consider it ready for upstream (see the log), so
I left out the signature.
Jan
>
>>
>> Thanks,
>> Fred
>>
>> Le 18/03/2016 17:18, Alex Bennée a écrit :
>>> From: KONRAD Frederic <address@hidden>
>>>
>>> This finally allows TCG to benefit from the iothread introduction: Drop
>>> the global mutex while running pure TCG CPU code. Reacquire the lock
>>> when entering MMIO or PIO emulation, or when leaving the TCG loop.
>>>
>>> We have to revert a few optimization for the current TCG threading
>>> model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
>>> kicking it in qemu_cpu_kick. We also need to disable RAM block
>>> reordering until we have a more efficient locking mechanism at hand.
>>>
>>> Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
>>> These numbers demonstrate where we gain something:
>>>
>>> 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm
>>> 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm
>>>
>>> The guest CPU was fully loaded, but the iothread could still run mostly
>>> independent on a second core. Without the patch we don't get beyond
>>>
>>> 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm
>>> 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm
>>>
>>> We don't benefit significantly, though, when the guest is not fully
>>> loading a host CPU.
>>>
>>> Message-Id: <address@hidden>
>>> Signed-off-by: KONRAD Frederic <address@hidden>
>>> [AJB: -smp single-threaded fix, rm old info from commit msg]
>>> Signed-off-by: Alex Bennée <address@hidden>
>>>
>>> ---
>>> v1 (ajb):
>>> - SMP failure now fixed by previous commit
>>> Changes from Fred Konrad (mttcg-v7 via paolo):
>>> * Rebase on the current HEAD.
>>> * Fixes a deadlock in qemu_devices_reset().
>>> * Remove the mutex in address_space_*
>>> ---
>>> cpu-exec.c | 10 ++++++++++
>>> cpus.c | 26 +++-----------------------
>>> cputlb.c | 4 ++++
>>> exec.c | 12 ++++++++++++
>>> hw/i386/kvmvapic.c | 3 +++
>>> softmmu_template.h | 17 +++++++++++++++++
>>> translate-all.c | 11 +++++++++--
>>> 7 files changed, 58 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/cpu-exec.c b/cpu-exec.c
>>> index 3572256..76891fd 100644
>>> --- a/cpu-exec.c
>>> +++ b/cpu-exec.c
>>> @@ -28,6 +28,7 @@
>>> #include "qemu/rcu.h"
>>> #include "exec/tb-hash.h"
>>> #include "exec/log.h"
>>> +#include "qemu/main-loop.h"
>>> #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
>>> #include "hw/i386/apic.h"
>>> #endif
>>> @@ -449,6 +450,13 @@ int cpu_exec(CPUState *cpu)
>>> for(;;) {
>>> interrupt_request = cpu->interrupt_request;
>>> if (unlikely(interrupt_request)) {
>>> + /* FIXME: this needs to take the iothread lock.
>>> + * For this we need to find all places in
>>> + * cc->cpu_exec_interrupt that can call cpu_loop_exit,
>>> + * and call qemu_unlock_iothread_mutex() there. Else,
>>> + * add a flag telling cpu_loop_exit() to unlock it.
>>> + */
>>> +
>>> if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
>>> /* Mask out external interrupts for this step. */
>>> interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
>>> @@ -501,6 +509,7 @@ int cpu_exec(CPUState *cpu)
>>> the program flow was changed */
>>> next_tb = 0;
>>> }
>>> +
>>> }
>>> if (unlikely(cpu->exit_request
>>> || replay_has_interrupt())) {
>>> @@ -618,6 +627,7 @@ int cpu_exec(CPUState *cpu)
>>> g_assert(env == &x86_cpu->env);
>>> #endif
>>> #endif /* buggy compiler */
>>> +
>>> cpu->can_do_io = 1;
>>> tb_lock_reset();
>>> }
>>> diff --git a/cpus.c b/cpus.c
>>> index a87fbf9..0e3d5f9 100644
>>> --- a/cpus.c
>>> +++ b/cpus.c
>>> @@ -911,8 +911,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
>>> #endif /* _WIN32 */
>>>
>>> static QemuMutex qemu_global_mutex;
>>> -static QemuCond qemu_io_proceeded_cond;
>>> -static unsigned iothread_requesting_mutex;
>>>
>>> static QemuThread io_thread;
>>>
>>> @@ -928,7 +926,6 @@ void qemu_init_cpu_loop(void)
>>> qemu_cond_init(&qemu_cpu_cond);
>>> qemu_cond_init(&qemu_pause_cond);
>>> qemu_cond_init(&qemu_work_cond);
>>> - qemu_cond_init(&qemu_io_proceeded_cond);
>>> qemu_mutex_init(&qemu_global_mutex);
>>>
>>> qemu_thread_get_self(&io_thread);
>>> @@ -1043,10 +1040,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)
>>> qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
>>> }
>>>
>>> - while (iothread_requesting_mutex) {
>>> - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
>>> - }
>>> -
>>> CPU_FOREACH(cpu) {
>>> qemu_wait_io_event_common(cpu);
>>> }
>>> @@ -1314,22 +1307,7 @@ bool qemu_mutex_iothread_locked(void)
>>>
>>> void qemu_mutex_lock_iothread(void)
>>> {
>>> - atomic_inc(&iothread_requesting_mutex);
>>> - /* In the simple case there is no need to bump the VCPU thread out of
>>> - * TCG code execution.
>>> - */
>>> - if (!tcg_enabled() || qemu_in_vcpu_thread() ||
>>> - !first_cpu || !first_cpu->created) {
>>> - qemu_mutex_lock(&qemu_global_mutex);
>>> - atomic_dec(&iothread_requesting_mutex);
>>> - } else {
>>> - if (qemu_mutex_trylock(&qemu_global_mutex)) {
>>> - qemu_cpu_kick_no_halt();
>>> - qemu_mutex_lock(&qemu_global_mutex);
>>> - }
>>> - atomic_dec(&iothread_requesting_mutex);
>>> - qemu_cond_broadcast(&qemu_io_proceeded_cond);
>>> - }
>>> + qemu_mutex_lock(&qemu_global_mutex);
>>> iothread_locked = true;
>>> }
>>>
>>> @@ -1575,7 +1553,9 @@ static int tcg_cpu_exec(CPUState *cpu)
>>> cpu->icount_decr.u16.low = decr;
>>> cpu->icount_extra = count;
>>> }
>>> + qemu_mutex_unlock_iothread();
>>> ret = cpu_exec(cpu);
>>> + qemu_mutex_lock_iothread();
>>> #ifdef CONFIG_PROFILER
>>> tcg_time += profile_getclock() - ti;
>>> #endif
>>> diff --git a/cputlb.c b/cputlb.c
>>> index 2f7a166..d607471 100644
>>> --- a/cputlb.c
>>> +++ b/cputlb.c
>>> @@ -30,6 +30,10 @@
>>> #include "exec/ram_addr.h"
>>> #include "tcg/tcg.h"
>>>
>>> +#ifdef CONFIG_SOFTMMU
>>> +#include "qemu/main-loop.h"
>>> +#endif
>>> +
>>> //#define DEBUG_TLB
>>> //#define DEBUG_TLB_CHECK
>>>
>>> diff --git a/exec.c b/exec.c
>>> index 402b751..9986d59 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -2113,6 +2113,12 @@ static void check_watchpoint(int offset, int len,
>>> MemTxAttrs attrs, int flags)
>>> }
>>> cpu->watchpoint_hit = wp;
>>>
>>> + /*
>>> + * Unlock iothread mutex before calling cpu_loop_exit
>>> + * or cpu_resume_from_signal.
>>> + */
>>> + qemu_mutex_unlock_iothread();
>>> +
>>> /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.
>>> */
>>> tb_lock();
>>> tb_check_watchpoint(cpu);
>>> @@ -2348,8 +2354,14 @@ static void io_mem_init(void)
>>> memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL,
>>> NULL, UINT64_MAX);
>>> memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops,
>>> NULL,
>>> NULL, UINT64_MAX);
>>> +
>>> + /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
>>> + * which must be called without the iothread mutex.
>>> + */
>>> memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
>>> NULL, UINT64_MAX);
>>> + memory_region_clear_global_locking(&io_mem_notdirty);
>>> +
>>> memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
>>> NULL, UINT64_MAX);
>>> }
>>> diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
>>> index 7c0d542..a0439a8 100644
>>> --- a/hw/i386/kvmvapic.c
>>> +++ b/hw/i386/kvmvapic.c
>>> @@ -447,6 +447,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU
>>> *cpu, target_ulong ip)
>>> resume_all_vcpus();
>>>
>>> if (!kvm_enabled()) {
>>> + /* Unlock iothread mutex before calling cpu_resume_from_signal. */
>>> + qemu_mutex_unlock_iothread();
>>> +
>>> /* Unlocked by cpu_resume_from_signal. */
>>> tb_lock();
>>> cs->current_tb = NULL;
>>> diff --git a/softmmu_template.h b/softmmu_template.h
>>> index 208f808..0b6c609 100644
>>> --- a/softmmu_template.h
>>> +++ b/softmmu_template.h
>>> @@ -151,6 +151,7 @@ static inline DATA_TYPE glue(io_read,
>>> SUFFIX)(CPUArchState *env,
>>> CPUState *cpu = ENV_GET_CPU(env);
>>> hwaddr physaddr = iotlbentry->addr;
>>> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
>>> + bool locked = false;
>>>
>>> physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
>>> cpu->mem_io_pc = retaddr;
>>> @@ -159,8 +160,15 @@ static inline DATA_TYPE glue(io_read,
>>> SUFFIX)(CPUArchState *env,
>>> }
>>>
>>> cpu->mem_io_vaddr = addr;
>>> + if (mr->global_locking) {
>>> + qemu_mutex_lock_iothread();
>>> + locked = true;
>>> + }
>>> memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT,
>>> iotlbentry->attrs);
>>> + if (locked) {
>>> + qemu_mutex_unlock_iothread();
>>> + }
>>> return val;
>>> }
>>> #endif
>>> @@ -358,6 +366,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState
>>> *env,
>>> CPUState *cpu = ENV_GET_CPU(env);
>>> hwaddr physaddr = iotlbentry->addr;
>>> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
>>> + bool locked = false;
>>>
>>> physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
>>> if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
>>> @@ -366,8 +375,16 @@ static inline void glue(io_write, SUFFIX)(CPUArchState
>>> *env,
>>>
>>> cpu->mem_io_vaddr = addr;
>>> cpu->mem_io_pc = retaddr;
>>> +
>>> + if (mr->global_locking) {
>>> + qemu_mutex_lock_iothread();
>>> + locked = true;
>>> + }
>>> memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
>>> iotlbentry->attrs);
>>> + if (locked) {
>>> + qemu_mutex_unlock_iothread();
>>> + }
>>> }
>>>
>>> void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE
>>> val,
>>> diff --git a/translate-all.c b/translate-all.c
>>> index 1aab243..fe1392a 100644
>>> --- a/translate-all.c
>>> +++ b/translate-all.c
>>> @@ -1221,6 +1221,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start,
>>> tb_page_addr_t end)
>>> * this TB.
>>> *
>>> * Called with mmap_lock held for user-mode emulation
>>> + * If called from generated code, iothread mutex must not be held.
>>> */
>>> void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t
>>> end,
>>> int is_cpu_write_access)
>>> @@ -1333,7 +1334,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t
>>> start, tb_page_addr_t end,
>>> }
>>>
>>> #ifdef CONFIG_SOFTMMU
>>> -/* len must be <= 8 and start must be a multiple of len */
>>> +/* len must be <= 8 and start must be a multiple of len.
>>> + * Called via softmmu_template.h, with iothread mutex not held.
>>> + */
>>> void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
>>> {
>>> PageDesc *p;
>>> @@ -1591,6 +1594,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
>>> }
>>>
>>> #if !defined(CONFIG_USER_ONLY)
>>> +/* If called from generated code, iothread mutex must not be held. */
>>> void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
>>> {
>>> ram_addr_t ram_addr;
>>> @@ -1637,7 +1641,10 @@ void tb_check_watchpoint(CPUState *cpu)
>>>
>>> #ifndef CONFIG_USER_ONLY
>>> /* in deterministic execution mode, instructions doing device I/Os
>>> - must be at the end of the TB */
>>> + * must be at the end of the TB.
>>> + *
>>> + * Called by softmmu_template.h, with iothread mutex not held.
>>> + */
>>> void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>>> {
>>> #if defined(TARGET_MIPS) || defined(TARGET_SH4)
>
>
> --
> Alex Bennée
>
--
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux
- [Qemu-devel] [RFC v1 08/11] tcg: add kick timer for single-threaded vCPU emulation, (continued)
- [Qemu-devel] [RFC v1 08/11] tcg: add kick timer for single-threaded vCPU emulation, Alex Bennée, 2016/03/18
- [Qemu-devel] [RFC v1 07/11] tcg: add options for enabling MTTCG, Alex Bennée, 2016/03/18
- [Qemu-devel] [RFC v1 10/11] tcg: grab iothread lock in cpu-exec interrupt handling, Alex Bennée, 2016/03/18
- [Qemu-devel] [RFC v1 11/11] tcg: enable thread-per-vCPU, Alex Bennée, 2016/03/18
- [Qemu-devel] [RFC v1 09/11] tcg: drop global lock during TCG code execution, Alex Bennée, 2016/03/18