[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC v1 01/11] tcg: move tb_find_fast outside the tb_lock c
From: |
Alex Bennée |
Subject: |
[Qemu-devel] [RFC v1 01/11] tcg: move tb_find_fast outside the tb_lock critical section |
Date: |
Fri, 18 Mar 2016 16:18:42 +0000 |
From: KONRAD Frederic <address@hidden>
Signed-off-by: KONRAD Frederic <address@hidden>
Signed-off-by: Paolo Bonzini <address@hidden>
[AJB: minor checkpatch fixes]
Signed-off-by: Alex Bennée <address@hidden>
---
v1(ajb)
- checkpatch fixes
---
cpu-exec.c | 74 +++++++++++++++++++++++++++++++++----------------------
include/qom/cpu.h | 2 ++
tcg/tcg.h | 1 +
translate-all.c | 23 ++++++++++++++++-
4 files changed, 70 insertions(+), 30 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c
index 07545aa..52f25de 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -225,8 +225,9 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_phys_hash_func(phys_pc);
for (ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
- (tb = *ptb1) != NULL;
+ (tb = atomic_read(ptb1)) != NULL;
ptb1 = &tb->phys_hash_next) {
+ smp_read_barrier_depends();
if (tb->pc != pc ||
tb->page_addr[0] != phys_page1 ||
tb->cs_base != cs_base ||
@@ -254,7 +255,18 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
*ptb1 = tb->phys_hash_next;
tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+ } else {
+ return NULL;
}
+
+ /* If tb_flush was called since the last time we released the lock,
+ * forget about this TB.
+ */
+ smp_rmb();
+ if (atomic_read(&cpu->tb_invalidated_flag)) {
+ return NULL;
+ }
+
return tb;
}
@@ -265,36 +277,31 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
{
TranslationBlock *tb;
- tb = tb_find_physical(cpu, pc, cs_base, flags);
- if (tb) {
- goto found;
- }
-
-#ifdef CONFIG_USER_ONLY
- /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
- * taken outside tb_lock. Since we're momentarily dropping
- * tb_lock, there's a chance that our desired tb has been
- * translated.
+ /* First try to get the tb. If we don't find it we need to lock and
+ * compile it.
*/
- tb_unlock();
- mmap_lock();
- tb_lock();
tb = tb_find_physical(cpu, pc, cs_base, flags);
- if (tb) {
- mmap_unlock();
- goto found;
- }
-#endif
-
- /* if no translated code available, then translate it now */
- cpu->tb_invalidated_flag = 0;
- tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
-
+ if (!tb) {
#ifdef CONFIG_USER_ONLY
- mmap_unlock();
+ /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
+ * taken outside tb_lock. tb_lock is released later in
+ * cpu_exec.
+ */
+ mmap_lock();
+ tb_lock();
+
+ /* Retry to get the TB in case a CPU just translate it to avoid having
+ * duplicated TB in the pool.
+ */
+ tb = tb_find_physical(cpu, pc, cs_base, flags);
#endif
+ if (!tb) {
+ /* if no translated code available, then translate it now */
+ tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+ }
+ mmap_unlock();
+ }
-found:
/* we add the TB in the virtual pc hash table */
cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
return tb;
@@ -312,6 +319,8 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu)
is executed. */
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+ /* Read tb_jmp_cache before tb->pc. */
+ smp_read_barrier_depends();
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags)) {
tb = tb_find_slow(cpu, pc, cs_base, flags);
@@ -489,15 +498,18 @@ int cpu_exec(CPUState *cpu)
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
}
- tb_lock();
tb = tb_find_fast(cpu);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
doing it in tb_find_slow */
- if (cpu->tb_invalidated_flag) {
+ if (atomic_read(&cpu->tb_invalidated_flag)) {
/* as some TB could have been invalidated because
of a tb_flush while generating the code, we
must recompute the hash index here */
next_tb = 0;
+
+ /* Clear the flag, we've now observed the flush. */
+ tb_lock_recursive();
+ cpu->tb_invalidated_flag = 0;
}
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
@@ -508,10 +520,14 @@ int cpu_exec(CPUState *cpu)
jump. */
if (next_tb != 0 && tb->page_addr[1] == -1
&& !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+ tb_lock_recursive();
tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
next_tb & TB_EXIT_MASK, tb);
}
- tb_unlock();
+ /* The lock may not be taken if we went through the
+ * fast lookup path and did not have to do any patching.
+ */
+ tb_lock_reset();
if (likely(!cpu->exit_request)) {
trace_exec_tb(tb, tb->pc);
tc_ptr = tb->tc_ptr;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9538f9c..4132108 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -241,6 +241,8 @@ struct kvm_run;
* @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
* CPU and return to its top level loop.
* @tb_invalidated_flag: Set to tell TCG that tb_flush has been called.
+ * It is only cleared while holding the tb_lock, so that no tb_flush can
+ * happen concurrently.
* @singlestep_enabled: Flags for single-stepping.
* @icount_extra: Instructions until next timer event.
* @icount_decr: Number of cycles left, with interrupt flag in high bit.
diff --git a/tcg/tcg.h b/tcg/tcg.h
index b83f763..aa4e123 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -615,6 +615,7 @@ void tcg_pool_delete(TCGContext *s);
void tb_lock(void);
void tb_unlock(void);
+bool tb_lock_recursive(void);
void tb_lock_reset(void);
static inline void *tcg_malloc(int size)
diff --git a/translate-all.c b/translate-all.c
index 8e1edd6..f68dcbc 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -143,6 +143,17 @@ void tb_unlock(void)
#endif
}
+bool tb_lock_recursive(void)
+{
+#ifdef CONFIG_USER_ONLY
+ if (have_tb_lock) {
+ return false;
+ }
+ tb_lock();
+#endif
+ return true;
+}
+
void tb_lock_reset(void)
{
#ifdef CONFIG_USER_ONLY
@@ -843,7 +854,8 @@ void tb_flush(CPUState *cpu)
tcg_ctx.tb_ctx.nb_tbs = 0;
CPU_FOREACH(cpu) {
- cpu->tb_invalidated_flag = 1;
+ atomic_set(&cpu->tb_invalidated_flag, 1);
+ smp_wmb();
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
}
@@ -979,6 +991,9 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
pc = tb->pc;
tb->pc = -1;
+ /* Pairs with smp_read_barrier_depends() in tb_find_fast. */
+ smp_wmb();
+
/* Then suppress this TB from the two jump lists. CPUs will not jump
* anymore into this translation block.
*/
@@ -1478,7 +1493,13 @@ static void tb_link_page(TranslationBlock *tb,
tb_page_addr_t phys_pc,
/* add in the physical hash table */
h = tb_phys_hash_func(phys_pc);
ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+
+ /* Both write barriers pair with tb_find_physical's
+ * smp_read_barrier_depends.
+ */
+ smp_wmb();
tb->phys_hash_next = *ptb;
+ smp_wmb();
*ptb = tb;
/* add in the page list */
--
2.7.3
[Qemu-devel] [RFC v1 04/11] tcg: protect TBContext with tb_lock., Alex Bennée, 2016/03/18
[Qemu-devel] [RFC v1 08/11] tcg: add kick timer for single-threaded vCPU emulation, Alex Bennée, 2016/03/18
[Qemu-devel] [RFC v1 07/11] tcg: add options for enabling MTTCG, Alex Bennée, 2016/03/18