qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 04/10] accel: replacing part of CONFIG_PROFIL


From: Alex Bennée
Subject: Re: [Qemu-devel] [PATCH v5 04/10] accel: replacing part of CONFIG_PROFILER with TBStats
Date: Thu, 15 Aug 2019 15:54:05 +0100
User-agent: mu4e 1.3.4; emacs 27.0.50

vandersonmr <address@hidden> writes:

> We add some of the statistics collected in the TCGProfiler
> into the TBStats, having the statistics not only for the whole
> emulation but for each TB. Then, we removed these stats
> from TCGProfiler and reconstruct the information for the
> "info jit" using the sum of all TBStats statistics.
>
> The goal is to have one unique and better way of collecting
> emulation statistics. Moreover, checking dynamiclly if the
> profiling is enabled showed to have an insignificant impact
> on the performance:
> https://wiki.qemu.org/Internships/ProjectIdeas/TCGCodeQuality#Overheads.
>
> Signed-off-by: Vanderson M. do Rosario <address@hidden>
> ---
>  accel/tcg/tb-stats.c      | 95 +++++++++++++++++++++++++++++++++++++++
>  accel/tcg/translate-all.c |  8 +---
>  include/exec/tb-stats.h   | 11 +++++
>  tcg/tcg.c                 | 93 +++++---------------------------------
>  tcg/tcg.h                 | 10 -----
>  5 files changed, 118 insertions(+), 99 deletions(-)
>
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index 3489133e9e..9b720d9b86 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -1,9 +1,104 @@
>  #include "qemu/osdep.h"
>
>  #include "disas/disas.h"
> +#include "exec/exec-all.h"
> +#include "tcg.h"
> +
> +#include "qemu/qemu-print.h"
>
>  #include "exec/tb-stats.h"
>
> +struct jit_profile_info {
> +    uint64_t translations;
> +    uint64_t aborted;
> +    uint64_t ops;
> +    unsigned ops_max;
> +    uint64_t del_ops;
> +    uint64_t temps;
> +    unsigned temps_max;
> +    uint64_t host;
> +    uint64_t guest;
> +    uint64_t search_data;
> +};
> +
> +/* accumulate the statistics from all TBs */
> +static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
> +{
> +    struct jit_profile_info *jpi = userp;
> +    TBStatistics *tbs = p;
> +
> +    jpi->translations += tbs->translations.total;
> +    jpi->ops += tbs->code.num_tcg_ops;
> +    if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
> +        jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
> +    }
> +    jpi->del_ops += tbs->code.deleted_ops;
> +    jpi->temps += tbs->code.temps;
> +    if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
> +        jpi->temps_max = stat_per_translation(tbs, code.temps);
> +    }
> +    jpi->host += tbs->code.out_len;
> +    jpi->guest += tbs->code.in_len;
> +    jpi->search_data += tbs->code.search_out_len;
> +}
> +
> +/* dump JIT statisticis using TCGProfile and TBStats */
> +void dump_jit_profile_info(TCGProfile *s)
> +{
> +    if (!tb_stats_collection_enabled()) {
> +        return;
> +    }
> +
> +    struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
> +
> +    qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
> +
> +    if (jpi->translations) {
> +        qemu_printf("translated TBs      %" PRId64 "\n", jpi->translations);
> +        qemu_printf("avg ops/TB          %0.1f max=%d\n",
> +                jpi->ops / (double) jpi->translations, jpi->ops_max);
> +        qemu_printf("deleted ops/TB      %0.2f\n",
> +                jpi->del_ops / (double) jpi->translations);
> +        qemu_printf("avg temps/TB        %0.2f max=%d\n",
> +                jpi->temps / (double) jpi->translations, jpi->temps_max);
> +        qemu_printf("avg host code/TB    %0.1f\n",
> +                jpi->host / (double) jpi->translations);
> +        qemu_printf("avg search data/TB  %0.1f\n",
> +                jpi->search_data / (double) jpi->translations);
> +
> +        if (s) {
> +            int64_t tot = s->interm_time + s->code_time;
> +            qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 
> GHz)\n",
> +                            tot, tot / 2.4e9);
> +            qemu_printf("cycles/op           %0.1f\n",
> +                        jpi->ops ? (double)tot / jpi->ops : 0);
> +            qemu_printf("cycles/in byte      %0.1f\n",
> +                        jpi->guest ? (double)tot / jpi->guest : 0);
> +            qemu_printf("cycles/out byte     %0.1f\n",
> +                        jpi->host ? (double)tot / jpi->host : 0);
> +            qemu_printf("cycles/search byte     %0.1f\n",
> +                        jpi->search_data ? (double)tot / jpi->search_data : 
> 0);
> +            if (tot == 0) {
> +                tot = 1;
> +            }
> +            qemu_printf("  gen_interm time   %0.1f%%\n",
> +                        (double)s->interm_time / tot * 100.0);
> +            qemu_printf("  gen_code time     %0.1f%%\n",
> +                        (double)s->code_time / tot * 100.0);
> +            qemu_printf("optim./code time    %0.1f%%\n",
> +                        (double)s->opt_time / (s->code_time ? s->code_time : 
> 1)
> +                        * 100.0);
> +            qemu_printf("liveness/code time  %0.1f%%\n",
> +                    (double)s->la_time / (s->code_time ? s->code_time : 1) * 
> 100.0);
> +            qemu_printf("cpu_restore count   %" PRId64 "\n",
> +                    s->restore_count);
> +            qemu_printf("  avg cycles        %0.1f\n",
> +                    s->restore_count ? (double)s->restore_time / 
> s->restore_count : 0);
> +        }
> +    }

I think the g_free(jpi) should be moved from the later patches to here.
Otherwise:

Reviewed-by: Alex Bennée <address@hidden>

--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]