qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB


From: Alex Bennée
Subject: Re: [Qemu-devel] [PATCH v3 1/6] accel/tcg: adding structure to store TB statistics
Date: Thu, 04 Jul 2019 15:05:15 +0100
User-agent: mu4e 1.3.2; emacs 26.1

vandersonmr <address@hidden> writes:

> We want to store statistics for each TB even after flushes.
> We do not want to modify or grow the TB struct.
> So we create a new struct to contain this statistics and
> we link one of it to each TB as they are generated.
>
> Signed-off-by: Vanderson M. do Rosario <address@hidden>
> ---
>  accel/tcg/translate-all.c | 60 +++++++++++++++++++++++++++++++++++++++
>  include/exec/exec-all.h   | 46 ++++++++++++++++++++++++++++++
>  include/exec/tb-context.h |  1 +
>  include/exec/tb-hash.h    |  7 +++++
>  include/qemu/log.h        |  1 +
>  5 files changed, 115 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..d05803a142 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1132,11 +1132,31 @@ static bool tb_cmp(const void *ap, const void *bp)
>          a->page_addr[1] == b->page_addr[1];
>  }
>
> +/*
> + * This is the more or less the same compare, but the data persists
> + * over tb_flush. We also aggregate the various variations of cflags
> + * under one record and ignore the details of page overlap (although
> + * we can count it).
> + */
> +bool tb_stats_cmp(const void *ap, const void *bp)
> +{
> +    const TBStatistics *a = ap;
> +    const TBStatistics *b = bp;
> +
> +    return a->phys_pc == b->phys_pc &&
> +        a->pc == b->pc &&
> +        a->cs_base == b->cs_base &&
> +        a->flags == b->flags;
> +}
> +
>  static void tb_htable_init(void)
>  {
>      unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>      qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {

So this should be a separate flag, not using qemu_loglevel_mask to
check. Something like tcg_collect_tbstats? Then the various things that
might need stats can just enable it.

> +        qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    }
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1586,6 +1606,31 @@ static inline void tb_page_add(PageDesc *p, 
> TranslationBlock *tb,
>  #endif
>  }
>
> +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
> +                                  target_ulong cs_base, uint32_t flags)
> +{
> +    TBStatistics *new_stats = g_new0(TBStatistics, 1);
> +    uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags);
> +    void *existing_stats = NULL;
> +    new_stats->phys_pc = phys_pc;
> +    new_stats->pc = pc;
> +    new_stats->cs_base = cs_base;
> +    new_stats->flags = flags;
> +
> +    qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
> +
> +    if (unlikely(existing_stats)) {
> +        /*
> +         * If there is already a TBStatistic for this TB from a previous 
> flush
> +         * then just make the new TB point to the older TBStatistic
> +         */
> +        g_free(new_stats);
> +        return existing_stats;
> +    } else {
> +        return new_stats;
> +    }
> +}
> +
>  /* add a new TB and link it to the physical page tables. phys_page2 is
>   * (-1) to indicate that only one page contains the TB.
>   *
> @@ -1732,6 +1777,21 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      ti = profile_getclock();
>  #endif
>
> +    /*
> +     * We want to fetch the stats structure before we start code
> +     * generation so we can count interesting things about this
> +     * generation.
> +     *
> +     * XXX: using loglevel is fugly - we should have a general flag
> +     */
> +    if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && 
> qemu_log_in_addr_range(tb->pc)) {

As the XXX says use a common flag. So something like:


  if (tcg_collect_tb_stats) {
     tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
     ...
  } else {
     tb->tb_stats = NULL;
  }

And then later inside that:

   if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc))
   {
        tb->tb_stats.stats_enabled |= TB_EXEC_STATS;
   }


> +        tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
> +        /* XXX: should we lock and update in bulk? */
> +        atomic_inc(&tb->tb_stats->translations.total);

XXX is a todo note for later... make a decision and remove the comment.

> +    } else {
> +        tb->tb_stats = NULL;
> +    }
> +
>      tcg_func_start(tcg_ctx);
>
>      tcg_ctx->cpu = env_cpu(env);
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 16034ee651..a4bcd9b6df 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -324,6 +324,49 @@ static inline void 
> tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
>  #define CODE_GEN_AVG_BLOCK_SIZE 150
>  #endif
>
> +typedef struct TBStatistics TBStatistics;
> +
> +/*
> + * This struct stores statistics such as execution count of the
> + * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags
> + * has its own TBStatistics which will persist over tb_flush.
> + *
> + * We include additional counters to track number of translations as
> + * well as variants for compile flags.
> + */
> +struct TBStatistics {
> +    tb_page_addr_t phys_pc;
> +    target_ulong pc;
> +    uint32_t     flags;
> +    /* cs_base isn't included in the hash but we do check for matches */
> +    target_ulong cs_base;
> +
> +    /* Translation stats */
> +    struct {
> +        unsigned long total;
> +        unsigned long uncached;
> +        unsigned long spanning;
> +        /* XXX: count invalidation? */
> +    } translations;
> +
> +    /* Execution stats */
> +    struct {
> +        unsigned long total;
> +        unsigned long atomic;
> +    } executions;
> +
> +    struct {
> +        unsigned num_guest_inst;
> +        unsigned num_host_inst;
> +        unsigned num_tcg_inst;
> +    } code;
> +
> +    /* HMP information - used for referring to previous search */
> +    int display_id;

Usually we introduce the fields into the structure in the patches that
need them. Otherwise you run the risk of adding a bunch of fields that
you planned to add but never quite got around to the initial code. Of
course usually you write code and then start splitting the commits up
when you are cleaning up your branch.


> +};
> +
> +bool tb_stats_cmp(const void *ap, const void *bp);
> +
>  /*
>   * Translation Cache-related fields of a TB.
>   * This struct exists just for convenience; we keep track of TB's in a binary
> @@ -403,6 +446,9 @@ struct TranslationBlock {
>      uintptr_t jmp_list_head;
>      uintptr_t jmp_list_next[2];
>      uintptr_t jmp_dest[2];
> +
> +    /* Pointer to a struct where statistics from the TB is stored */
> +    TBStatistics *tb_stats;
>  };
>
>  extern bool parallel_cpus;
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..a4decb5d1f 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -35,6 +35,7 @@ struct TBContext {
>
>      /* statistics */
>      unsigned tb_flush_count;
> +    struct qht tb_stats;
>  };
>
>  extern TBContext tb_ctx;
> diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
> index 4f3a37d927..54c477fe79 100644
> --- a/include/exec/tb-hash.h
> +++ b/include/exec/tb-hash.h
> @@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
> pc, uint32_t flags,
>      return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
>  }
>
> +static inline
> +uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
> +                            uint32_t flags)
> +{
> +    return qemu_xxhash5(phys_pc, pc, flags);
> +}
> +
>  #endif
> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index b097a6cae1..2fca65dd01 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
>  /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
>  #define CPU_LOG_TB_OP_IND  (1 << 16)
>  #define CPU_LOG_TB_FPU     (1 << 17)
> +#define CPU_LOG_HOT_TBS    (1 << 18)

this flag can be punted to when we introduce the actual tracking because
we are going to have a specific (internal) flag for initialising the
tracking machinery.

>
>  /* Lock output for a series of related logs.  Since this is not needed
>   * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we


--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]