[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [Qemu-Devel][PATCH 2/3] Saving counters between tb_flus
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [Qemu-Devel][PATCH 2/3] Saving counters between tb_flush events. |
Date: |
Mon, 17 Jun 2019 11:00:07 +0100 |
User-agent: |
mu4e 1.3.2; emacs 26.1 |
vandersonmr <address@hidden> writes:
> A new hash map was added to store the accumulated execution
> frequency of the TBs even after tb_flush events. A dump
> function was also added as a way to visualize these frequencies.
>
> Signed-off-by: vandersonmr <address@hidden>
I forgot to mention the formatting looks a little off here. It should be
your full name (accents and all) before the email address, e.g:
Signed-off-by: Alex Bennée <address@hidden>
> ---
> accel/tcg/translate-all.c | 59 +++++++++++++++++++++++++++++++++++++++
> accel/tcg/translate-all.h | 2 ++
> exec.c | 7 +++++
> include/exec/exec-all.h | 3 ++
> include/exec/tb-context.h | 9 ++++++
> include/qom/cpu.h | 2 ++
> 6 files changed, 82 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..0bc670ffad 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,12 @@ static inline void code_gen_alloc(size_t tb_size)
> }
> }
>
> +static bool statistics_cmp(const void* ap, const void *bp) {
> + const TBStatistics *a = ap;
> + const TBStatistics *b = bp;
> + return a->pc == b->pc && a->cs_base == b->cs_base && a->flags ==
> b->flags;
Looking at tb_cmp() bellow this I wonder if we should also take into
account the page_address values. Some TB's will be translated over a
page boundary and in theory that can change with new mappings so we need
to ensure they are really equivalent.
> +}
> +
> static bool tb_cmp(const void *ap, const void *bp)
> {
> const TranslationBlock *a = ap;
> @@ -1137,6 +1143,7 @@ static void tb_htable_init(void)
> unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
> qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> + qht_init(&tb_ctx.tb_statistics, statistics_cmp, CODE_GEN_HTABLE_SIZE,
> QHT_MODE_AUTO_RESIZE);
> }
>
> /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1228,6 +1235,53 @@ static gboolean tb_host_size_iter(gpointer key,
> gpointer value, gpointer data)
> return false;
> }
>
> +static void do_tb_dump_exec_freq(void *p, uint32_t hash, void *userp)
> +{
> +#if TARGET_LONG_SIZE == 8
> + TBStatistics *tbs = p;
> + printf("%016lx\t%lu\n", tbs->pc, tbs->total_exec_freq);
> +#elif TARGET_LONG_SIZE == 4
> + TBStatistics *tbs = p;
> + printf("%016x\t%lu\n", tbs->pc, tbs->total_exec_freq);
> +#endif
> +}
This will need some fixing up so deal with output to the HMP monitor. We
don't want to be directly spamming stdout with results.
> +
> +void tb_dump_all_exec_freq(void)
> +{
> + tb_read_exec_freq();
> + qht_iter(&tb_ctx.tb_statistics, do_tb_dump_exec_freq, NULL);
> +}
I would be tempted to insert these into a sorted GList first so we can
dump the first N hotblocks.
> +
> +static void do_tb_read_exec_freq(void *p, uint32_t hash, void *userp)
> +{
> + TranslationBlock *tb = p;
> + TBStatistics tbscmp;
> + tbscmp.pc = tb->pc;
> + tbscmp.cs_base = tb->cs_base;
> + tbscmp.flags = tb->flags;
> +
> + TBStatistics *tbs = qht_lookup(userp, &tbscmp, hash);
> +
> + uint64_t exec_freq = tb_get_and_reset_exec_freq((TranslationBlock*) p);
> +
> + if (tbs) {
> + tbs->total_exec_freq += exec_freq;
> + } else {
> + void *existing;
> + tbs = malloc(sizeof(TBStatistics));
use g_new0(TBStatistics, 1)
> + tbs->total_exec_freq = exec_freq;
> + tbs->pc = tb->pc;
> + tbs->cs_base = tb->cs_base;
> + tbs->flags = tb->flags;
> + qht_insert(userp, tbs, hash, &existing);
> + }
> +}
> +
Comment here about what we are doing? Maybe tb_copy_old_counts()?
> +void tb_read_exec_freq(void)
> +{
> + qht_iter(&tb_ctx.htable, do_tb_read_exec_freq, &tb_ctx.tb_statistics);
> +}
> +
> /* flush all the translation blocks */
> static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
> {
> @@ -1252,6 +1306,10 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data
> tb_flush_count)
> cpu_tb_jmp_cache_clear(cpu);
> }
>
> + if (enable_freq_count) {
> + tb_read_exec_freq();
> + }
> +
> qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
> page_flush_tb();
>
> @@ -1723,6 +1781,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> tb->flags = flags;
> tb->cflags = cflags;
> tb->trace_vcpu_dstate = *cpu->trace_dstate;
> + tb->exec_freq = 0;
> tcg_ctx->tb_cflags = cflags;
> tb_overflow:
>
> diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h
> index 64f5fd9a05..e13088c36d 100644
> --- a/accel/tcg/translate-all.h
> +++ b/accel/tcg/translate-all.h
> @@ -32,6 +32,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start,
> tb_page_addr_t end,
> int is_cpu_write_access);
> void tb_check_watchpoint(CPUState *cpu);
>
> +extern bool enable_freq_count;
> +
> #ifdef CONFIG_USER_ONLY
> int page_unprotect(target_ulong address, uintptr_t pc);
> #endif
> diff --git a/exec.c b/exec.c
> index e7622d1956..9b64a012ee 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1013,6 +1013,13 @@ const char *parse_cpu_option(const char *cpu_option)
> return cpu_type;
> }
>
> +uint64_t tb_get_and_reset_exec_freq(TranslationBlock *tb)
> +{
> + uint64_t exec_freq = atomic_load_acquire(&tb->exec_freq);
> + atomic_store_release(&tb->exec_freq, 0);
I'm not sure you need acquire/release semantics here as you are only
reading this in an exclusive period when all in-flight updates should
be synced (locks are implicit barriers). Folding this up into
do_tb_read_exec_freq might make this clearer.
> + return exec_freq;
> +}
> +
> #if defined(CONFIG_USER_ONLY)
> void tb_invalidate_phys_addr(target_ulong addr)
> {
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index a80407622e..5d3d829d18 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -513,4 +513,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> /* vl.c */
> extern int singlestep;
>
> +void tb_read_exec_freq(void);
> +void tb_dump_all_exec_freq(void);
> +
> #endif
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..ba518d47a0 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -28,6 +28,14 @@
>
> typedef struct TranslationBlock TranslationBlock;
> typedef struct TBContext TBContext;
> +typedef struct TBStatistics TBStatistics;
> +
> +struct TBStatistics {
> + target_ulong pc;
> + target_ulong cs_base;
> + uint32_t flags;
> + uint64_t total_exec_freq;
> +};
>
> struct TBContext {
>
> @@ -35,6 +43,7 @@ struct TBContext {
>
> /* statistics */
> unsigned tb_flush_count;
> + struct qht tb_statistics;
> };
>
> extern TBContext tb_ctx;
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 5ee0046b62..593c1f1137 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -474,6 +474,8 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
> }
> }
>
> +uint64_t tb_get_and_reset_exec_freq(struct TranslationBlock*);
> +
> /**
> * qemu_tcg_mttcg_enabled:
> * Check whether we are running MultiThread TCG or not.
--
Alex Bennée
- [Qemu-devel] [PATCH 0/3] Collecting TB Execution Frequency, vandersonmr, 2019/06/14
- [Qemu-devel] [Qemu-Devel][PATCH 1/3] Adding an optional tb execution counter., vandersonmr, 2019/06/14
- [Qemu-devel] [Qemu-Devel][PATCH 2/3] Saving counters between tb_flush events., vandersonmr, 2019/06/14
- [Qemu-devel] [Qemu-Devel][PATCH 3/3] Adding command line option to linux-user., vandersonmr, 2019/06/14
- Re: [Qemu-devel] [PATCH 0/3] Collecting TB Execution Frequency, no-reply, 2019/06/14
- Re: [Qemu-devel] [PATCH 0/3] Collecting TB Execution Frequency, Alex Bennée, 2019/06/17
- Re: [Qemu-devel] [PATCH 0/3] Collecting TB Execution Frequency, Alex Bennée, 2019/06/17