[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 2/3] scripts/performance: Add topN_callgrind.py script
From: |
Aleksandar Markovic |
Subject: |
Re: [PATCH v3 2/3] scripts/performance: Add topN_callgrind.py script |
Date: |
Thu, 25 Jun 2020 11:54:08 +0200 |
сре, 24. јун 2020. у 17:32 Ahmed Karaman
<ahmedkhaledkaraman@gmail.com> је написао/ла:
>
> Python script that prints the top N most executed functions in QEMU
> using callgrind.
>
> Syntax:
> topN_callgrind.py [-h] [-n] <number of displayed top functions> -- \
> <qemu executable> [<qemu executable options>] \
> <target executable> [<target execurable options>]
>
> [-h] - Print the script arguments help message.
> [-n] - Specify the number of top functions to print.
> - If this flag is not specified, the tool defaults to 25.
>
> Example of usage:
> topN_callgrind.py -n 20 -- qemu-arm coulomb_double-arm
>
> Example Output:
> No. Percentage Name Source File
> ---- --------- ------------------ ------------------------------
> 1 24.577% 0x00000000082db000 ???
> 2 20.467% float64_mul <qemu>/fpu/softfloat.c
> 3 14.720% float64_sub <qemu>/fpu/softfloat.c
> 4 13.864% float64_add <qemu>/fpu/softfloat.c
> 5 4.876% helper_mulsd <qemu>/target/i386/ops_sse.h
> 6 3.767% helper_subsd <qemu>/target/i386/ops_sse.h
> 7 3.549% helper_addsd <qemu>/target/i386/ops_sse.h
> 8 2.185% helper_ucomisd <qemu>/target/i386/ops_sse.h
> 9 1.667% helper_lookup_tb_ptr <qemu>/include/exec/tb-lookup.h
> 10 1.662% f64_compare <qemu>/fpu/softfloat.c
> 11 1.509% helper_lookup_tb_ptr <qemu>/accel/tcg/tcg-runtime.c
> 12 0.635% helper_lookup_tb_ptr <qemu>/include/exec/exec-all.h
> 13 0.616% float64_div <qemu>/fpu/softfloat.c
> 14 0.502% helper_pand_xmm <qemu>/target/i386/ops_sse.h
> 15 0.502% float64_mul <qemu>/include/fpu/softfloat.h
> 16 0.476% helper_lookup_tb_ptr <qemu>/target/i386/cpu.h
> 17 0.437% float64_compare_quiet <qemu>/fpu/softfloat.c
> 18 0.414% helper_pxor_xmm <qemu>/target/i386/ops_sse.h
> 19 0.353% round_to_int <qemu>/fpu/softfloat.c
> 20 0.347% helper_cc_compute_all <qemu>/target/i386/cc_helper.c
>
> Signed-off-by: Ahmed Karaman <ahmedkhaledkaraman@gmail.com>
> ---
> scripts/performance/topN_callgrind.py | 139 ++++++++++++++++++++++++++
> 1 file changed, 139 insertions(+)
> create mode 100755 scripts/performance/topN_callgrind.py
>
> diff --git a/scripts/performance/topN_callgrind.py
> b/scripts/performance/topN_callgrind.py
> new file mode 100755
> index 0000000000..6136f72a74
> --- /dev/null
> +++ b/scripts/performance/topN_callgrind.py
> @@ -0,0 +1,139 @@
> +#!/usr/bin/env python3
> +
> +# Print the top N most executed functions in QEMU using callgrind.
> +# Syntax:
> +# topN_callgrind.py [-h] [-n] <number of displayed top functions> -- \
> +# <qemu executable> [<qemu executable options>] \
> +# <target executable> [<target execurable options>]
> +#
> +# [-h] - Print the script arguments help message.
> +# [-n] - Specify the number of top functions to print.
> +# - If this flag is not specified, the tool defaults to 25.
> +#
> +# Example of usage:
> +# topN_callgrind.py -n 20 -- qemu-arm coulomb_double-arm
> +#
> +# This file is a part of the project "TCG Continuous Benchmarking".
> +#
> +# Copyright (C) 2020 Ahmed Karaman <ahmedkhaledkaraman@gmail.com>
> +# Copyright (C) 2020 Aleksandar Markovic <aleksandar.qemu.devel@gmail.com>
> +#
> +# This program is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation, either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program. If not, see <https://www.gnu.org/licenses/>.
> +
> +import argparse
> +import os
> +import subprocess
> +import sys
> +
> +
> +# Parse the command line arguments
> +parser = argparse.ArgumentParser(
> + usage='topN_callgrind.py [-h] [-n] <number of displayed top functions>
> -- '
> + '<qemu executable> [<qemu executable options>] '
> + '<target executable> [<target executable options>]')
> +
> +parser.add_argument('-n', dest='top', type=int, default=25,
> + help='Specify the number of top functions to print.')
> +
> +parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS)
> +
> +args = parser.parse_args()
> +
> +# Extract the needed variables from the args
> +command = args.command
> +top = args.top
> +
> +# Insure that valgrind is installed
> +check_valgrind = subprocess.run(
check_valgrind_presence is better that check_valgrind.
> + ["which", "valgrind"], stdout=subprocess.DEVNULL)
> +if check_valgrind.returncode:
> + sys.exit("Please install valgrind before running the script!")
> +
> +# Run callgrind
> +callgrind = subprocess.run((["valgrind", "--tool=callgrind",
> + "--callgrind-out-file=callgrind.data"] +
> command),
> + stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
As I described in my comments for perf-related script, it is better to
use /tmp/callgrind.data, rather than just callgrind.data.
> +if callgrind.returncode:
> + sys.exit(callgrind.stderr.decode("utf-8"))
> +
> +# Save callgrind_annotate output to callgrind_annotate.out
> +with open("callgrind_annotate.out", "w") as output:
/tmp/callgrind_annotate.out
> + callgrind_annotate = subprocess.run(
> + ["callgrind_annotate", "callgrind.data"],
> + stdout=output,
> + stderr=subprocess.PIPE)
> + if callgrind_annotate.returncode:
> + os.unlink('callgrind.data')
> + output.close()
> + os.unlink('callgrind_annotate.out')
> + sys.exit(callgrind_annotate.stderr.decode("utf-8"))
> +
> +
> +# Read the callgrind_annotate output to callgrind_data[]
> +callgrind_data = []
> +with open('callgrind_annotate.out', 'r') as data:
> + callgrind_data = data.readlines()
> +
> +# Line number with the total number of instructions
> +total_instructions_line_number = 20
> +
> +# Get the total number of instructions
> +total_instructions_line_data = callgrind_data[total_instructions_line_number]
> +total_number_of_instructions = total_instructions_line_data.split(' ')[0]
> +total_number_of_instructions = int(
> + total_number_of_instructions.replace(',', ''))
> +
> +# Line number with the top function
> +first_func_line = 25
> +
> +# Number of functions recorded by callgrind, last two lines are always empty
> +number_of_functions = len(callgrind_data) - first_func_line - 2
> +
> +# Limit the number of top functions to "top"
> +number_of_top_functions = (top if number_of_functions >
> + top else number_of_functions)
> +
> +# Store the data of the top functions in top_functions[]
> +top_functions = callgrind_data[first_func_line:
> + first_func_line + number_of_top_functions]
> +
> +# Print table header
> +print('{:>4} {:>10} {:<30} {}\n{} {} {} {}'.format('No.',
> + 'Percentage',
> + 'Name',
Function Name
> + 'Source File',
> + '-' * 4,
> + '-' * 10,
> + '-' * 30,
> + '-' * 30,
> + ))
> +
> +# Print top N functions
> +for (index, function) in enumerate(top_functions, start=1):
> + function_data = function.split()
> + # Calculate function percentage
> + function_instructions = float(function_data[0].replace(',', ''))
> + function_percentage = (function_instructions /
> + total_number_of_instructions)*100
> + # Get function name and source files path
> + function_source_path, function_name = function_data[1].split(':')
Please replace 'function_source_path' with more accurate 'function_source_file'.
> + # Print extracted data
> + print('{:>4} {:>9.3f}% {:<30} {}'.format(index,
> + round(function_percentage,
> 3),
> + function_name,
> + function_source_path))
> +
> +# Remove intermediate files
> +os.unlink('callgrind.data')
> +os.unlink('callgrind_annotate.out')
os.unlink('/tmp/callgrind.data')
os.unlink('/tmp/callgrind_annotate.out')
Thanks,
Aleksandar
> --
> 2.17.1
>