From 1f86fb74396e07b0d230e0c204b6c7baab95f8b7 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sat, 5 Dec 2015 16:28:59 +0100 Subject: [PATCH 1/3] Add simple statistical profiler to runtime library This enables collection of profiling data via statistical sampling to every program built with CHICKEN. It relies on trace information for determining which procedure is running. This also means it has a finer granularity than the default instrumentation-based profiler. This can be an advantage or disadvantage depending on what you're trying to do. --- chicken.h | 1 + library.scm | 2 + manual/Using the compiler | 6 +- runtime.c | 214 ++++++++++++++++++++++++++++++++++++++++++++-- support.scm | 2 +- 5 files changed, 216 insertions(+), 9 deletions(-) diff --git a/chicken.h b/chicken.h index 5f78ac6..5a41f73 100644 --- a/chicken.h +++ b/chicken.h @@ -1827,6 +1827,7 @@ C_fctexport C_cpsproc(C_dump_heap_state) C_noret; C_fctexport C_cpsproc(C_filter_heap_objects) C_noret; C_fctexport time_t C_fcall C_seconds(C_long *ms) C_regparm; +C_fctexport C_word C_i_dump_statistical_profile(); C_fctexport C_word C_a_i_list(C_word **a, int c, ...); C_fctexport C_word C_a_i_string(C_word **a, int c, ...); C_fctexport C_word C_a_i_record(C_word **a, int c, ...); diff --git a/library.scm b/library.scm index a07d314..9fa4761 100644 --- a/library.scm +++ b/library.scm @@ -3946,6 +3946,8 @@ EOF (when (##sys#fudge 37) ; -:H given? (##sys#print "\n" #f ##sys#standard-error) (##sys#dump-heap-state)) + (when (##sys#fudge 45) ; -:p or -:P given? + (##core#inline "C_i_dump_statistical_profile")) (let loop () (let ((tasks ##sys#cleanup-tasks)) (set! ##sys#cleanup-tasks '()) diff --git a/manual/Using the compiler b/manual/Using the compiler index 39ec22c..3ea2f78 100644 --- a/manual/Using the compiler +++ b/manual/Using the compiler @@ -137,7 +137,7 @@ the source text should be read from standard input. ; -prelude EXPRESSIONS : Add {{EXPRESSIONS}} before all other toplevel expressions in the compiled file. This option may be given multiple times. Processing of this option takes place before processing of {{-prologue}}. ; -profile : -; -accumulate-profile : Instruments the source code to count procedure calls and execution times. After the program terminates (either via an explicit {{exit}} or implicitly), profiling statistics are written to a file named {{PROFILE.}}. Each line of the generated file contains a list with the procedure name, the number of calls and the time spent executing it. Use the {{chicken-profile}} program to display the profiling information in a more user-friendly form. Enter {{chicken-profile -help}} at the command line to get a list of available options. The {{-accumulate-profile}} option is similar to {{-profile}}, but the resulting profile information will be appended to any existing {{PROFILE}} file. {{chicken-profile}} will merge and sum up the accumulated timing information, if several entries for the same procedure calls exist. Only profiling information for global procedures will be collected. +; -accumulate-profile : Instruments the source code to count procedure calls and execution times. After the program terminates (either via an explicit {{exit}} or implicitly), profiling statistics are written to a file named {{PROFILE.}}. Each line of the generated file contains a list with the procedure name, the number of calls and the time spent executing it. Use the {{chicken-profile}} program to display the profiling information in a more user-friendly form. Enter {{chicken-profile -help}} at the command line to get a list of available options. The {{-accumulate-profile}} option is similar to {{-profile}}, but the resulting profile information will be appended to any existing {{PROFILE}} file. {{chicken-profile}} will merge and sum up the accumulated timing information, if several entries for the same procedure calls exist. Only profiling information for global procedures will be collected. See the {{-:p}} option under [[#runtime-options|"Runtime options"]] below for statistical profiling support. ; -profile-name FILENAME : Specifies name of the generated profile information (which defaults to {{PROFILE.}}. Implies {{-profile}}. @@ -224,6 +224,10 @@ compiler itself) accept a small set of runtime options: ; {{-:o}} : Disables detection of stack overflows at run-time. +; {{-:p}} : Enable collection of statistics for profiling purposes and write to PROFILE.{{pid}} on exit. This functions at a granularity defined by the trace information in the binary and libraries: each traced function will show up in the output. See the {{-profile}} compiler option for instrumentation-based profiling. The {{PROFILE.pid}} format is compatible with the format generated by instrumentation-based profiling. + +; {{-:Pfreq}} : Same as {{-:p}} but set the sampling frequency in microseconds (default is 10000 microseconds or every 10 milliseconds). + ; {{-:r}} : Writes trace output to stderr. This option has no effect with in files compiled with the {{-no-trace}} options. ; {{-:sNUMBER}} : Specifies stack size. diff --git a/runtime.c b/runtime.c index d65c3a5..57ebdc7 100644 --- a/runtime.c +++ b/runtime.c @@ -63,6 +63,15 @@ # define EOVERFLOW 0 #endif +/* ITIMER_PROF is more precise, but Cygwin doesn't support it... */ +#ifdef __CYGWIN__ +# define C_PROFILE_SIGNAL SIGALRM +# define C_PROFILE_TIMER ITIMER_REAL +#else +# define C_PROFILE_SIGNAL SIGPROF +# define C_PROFILE_TIMER ITIMER_PROF +#endif + /* TODO: Include sys/select.h? Windows doesn't seem to have it... */ #ifndef NO_POSIX_POLL # include @@ -154,6 +163,7 @@ static C_TLS int timezone; #define TEMPORARY_STACK_SIZE 4096 #define STRING_BUFFER_SIZE 4096 #define DEFAULT_MUTATION_STACK_SIZE 1024 +#define PROFILE_TABLE_SIZE 1024 #define MAX_PENDING_INTERRUPTS 100 @@ -302,6 +312,14 @@ typedef struct hdump_bucket_struct struct hdump_bucket_struct *next; } HDUMP_BUCKET; +typedef struct profile_bucket_struct +{ + C_char *key; + C_uword sample_count; /* Multiplied by profile freq = time spent */ + C_uword call_count; /* Distinct calls seen while sampling */ + struct profile_bucket_struct *next; +} PROFILE_BUCKET; + /* Variables: */ @@ -351,7 +369,9 @@ C_TLS C_uword C_heap_growth, C_heap_shrinkage; C_TLS C_uword C_maximal_heap_size; -C_TLS time_t C_startup_time_seconds; +C_TLS time_t + C_startup_time_seconds, + profile_frequency = 10000; C_TLS char **C_main_argv, *C_dlerror; @@ -424,7 +444,9 @@ static C_TLS int chicken_ran_once, pass_serious_signals = 1, callback_continuation_level; -static volatile C_TLS int serious_signal_occurred = 0; +static volatile C_TLS int + serious_signal_occurred = 0, + profiling = 0; static C_TLS unsigned int mutation_count, tracked_mutation_count, @@ -459,6 +481,7 @@ static C_TLS FINALIZER_NODE static C_TLS void *current_module_handle; static C_TLS int flonum_print_precision = FLONUM_PRINT_PRECISION; static C_TLS HDUMP_BUCKET **hdump_table; +static C_TLS PROFILE_BUCKET **profile_table = NULL; static C_TLS int pending_interrupts[ MAX_PENDING_INTERRUPTS ], pending_interrupts_count, @@ -491,6 +514,7 @@ static void C_fcall really_remark(C_word *x) C_regparm; static C_word C_fcall intern0(C_char *name) C_regparm; static void C_fcall update_locative_table(int mode) C_regparm; static LF_LIST *find_module_handle(C_char *name); +static void take_profile_sample(); static C_cpsproc(call_cc_wrapper) C_noret; static C_cpsproc(call_cc_values_wrapper) C_noret; @@ -715,12 +739,15 @@ int CHICKEN_initialize(int heap, int stack, int symbols, void *toplevel) dlopen_flags = 0; #endif - /* setup signal handlers */ - if(!pass_serious_signals) { #ifdef HAVE_SIGACTION sa.sa_flags = 0; sigfillset(&sa.sa_mask); /* See note in C_establish_signal_handler() */ sa.sa_handler = global_signal_handler; +#endif + + /* setup signal handlers */ + if(!pass_serious_signals) { +#ifdef HAVE_SIGACTION C_sigaction(SIGBUS, &sa, NULL); C_sigaction(SIGFPE, &sa, NULL); C_sigaction(SIGILL, &sa, NULL); @@ -759,6 +786,21 @@ int CHICKEN_initialize(int heap, int stack, int symbols, void *toplevel) callback_continuation_level = 0; gc_ms = 0; (void)C_randomize(C_fix(time(NULL))); + + if (profiling) { +#ifdef HAVE_SIGACTION + C_sigaction(C_PROFILE_SIGNAL, &sa, NULL); +#else + C_signal(C_PROFILE_SIGNAL, global_signal_handler); +#endif + + profile_table = (PROFILE_BUCKET **)C_malloc(PROFILE_TABLE_SIZE * sizeof(PROFILE_BUCKET *)); + + if(profile_table == NULL) + panic(C_text("out of memory - can not allocate profile table")); + + C_memset(profile_table, 0, sizeof(PROFILE_BUCKET *) * PROFILE_TABLE_SIZE); + } /* create k to invoke code for system-startup: */ k0 = (C_SCHEME_BLOCK *)C_align((C_word)C_fromspace_top); @@ -1081,7 +1123,10 @@ void global_signal_handler(int signum) } #endif - C_raise_interrupt(signal_mapping_table[ signum ]); + /* TODO: Make full use of sigaction: check that /our/ timer expired */ + if (signum == C_PROFILE_SIGNAL && profiling) take_profile_sample(); + else C_raise_interrupt(signal_mapping_table[ signum ]); + #ifndef HAVE_SIGACTION /* not necessarily needed, but older UNIXen may not leave the handler installed: */ C_signal(signum, global_signal_handler); @@ -1246,6 +1291,8 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st " -:hsPERCENTAGE set heap shrink percentage\n" " -:hSIZE set fixed heap size\n" " -:r write trace output to stderr\n" + " -:p collect statistical profile and write to file at exit\n" + " -:PFREQ like -:p, specifying sampling frequency in us (default: 10000)\n" " -:sSIZE set nursery (stack) size\n" " -:tSIZE set symbol-table size\n" " -:fSIZE set maximal number of pending finalizers\n" @@ -1340,6 +1387,15 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st C_enable_gcweak = 1; break; + case 'P': + profiling = 1; + profile_frequency = arg_val(ptr); + goto next; + + case 'p': + profiling = 1; + break; + case 'r': show_trace = 1; break; @@ -1408,6 +1464,18 @@ C_word CHICKEN_run(void *toplevel) chicken_is_running = chicken_ran_once = 1; return_to_host = 0; + if(profiling) { + struct itimerval itv; + + itv.it_value.tv_sec = profile_frequency / 1000000; + itv.it_value.tv_usec = profile_frequency % 1000000; + itv.it_interval.tv_sec = itv.it_value.tv_sec; + itv.it_interval.tv_usec = itv.it_value.tv_usec; + + if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1) + panic(C_text("error setting timer for profiling")); + } + #if C_STACK_GROWS_DOWNWARD C_stack_limit = (C_word *)((C_byte *)C_stack_pointer - stack_size); #else @@ -1436,6 +1504,18 @@ C_word CHICKEN_run(void *toplevel) ((C_proc)C_restart_trampoline)(C_restart_c, p); } + if(profiling) { + struct itimerval itv; + + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 0; + itv.it_interval.tv_sec = itv.it_value.tv_sec; + itv.it_interval.tv_usec = itv.it_value.tv_usec; + + if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1) + panic(C_text("error clearing timer for profiling")); + } + chicken_is_running = 0; return C_restore; } @@ -3786,6 +3866,59 @@ C_regparm void *C_fcall C_retrieve2_symbol_proc(C_word val, char *name) return C_fast_retrieve_proc(val); } +/* Bump profile count for current top of trace buffer */ +static void take_profile_sample() +{ + PROFILE_BUCKET **bp, *b; + C_char *key; + TRACE_INFO *tb; + /* To count distinct calls of a procedure, remember last call */ + static C_char *prev_key = NULL; + static TRACE_INFO *prev_tb = NULL; + + /* trace_buffer_top points *beyond* the topmost entry: Go back one */ + if (trace_buffer_top == trace_buffer) { + if (!trace_buffer_full) return; /* No data yet */ + tb = trace_buffer_limit - 1; + } else { + tb = trace_buffer_top - 1; + } + + key = tb->raw; + if (key == NULL) return; /* May happen while in C_trace() */ + + /* We could also just hash the pointer but that's a bit trickier */ + bp = profile_table + hash_string(C_strlen(key), key, PROFILE_TABLE_SIZE, 0, 0); + b = *bp; + + /* First try to find pre-existing item in hash table */ + while(b != NULL) { + if(b->key == key) { + b->sample_count++; + if (prev_key != key && prev_tb != tb) + b->call_count++; + goto done; + } + else b = b->next; + } + + /* Not found, allocate a new item and use it as bucket's new head */ + b = (PROFILE_BUCKET *)C_malloc(sizeof(PROFILE_BUCKET)); + + if(b == NULL) + panic(C_text("out of memory - cannot allocate profile table-bucket")); + + b->next = *bp; + b->key = key; + *bp = b; + b->sample_count = 1; + b->call_count = 1; + +done: + prev_tb = tb; + prev_key = key; +} + C_regparm void C_fcall C_trace(C_char *name) { @@ -3869,7 +4002,9 @@ C_char *C_dump_trace(int start) C_regparm void C_fcall C_clear_trace_buffer(void) { - int i; + int i, old_profiling = profiling; + + profiling = 0; if(trace_buffer == NULL) { if(C_trace_buffer_size < MIN_TRACE_BUFFER_SIZE) @@ -3890,15 +4025,18 @@ C_regparm void C_fcall C_clear_trace_buffer(void) trace_buffer[ i ].cooked2 = C_SCHEME_FALSE; trace_buffer[ i ].thread = C_SCHEME_FALSE; } + profiling = old_profiling; } C_word C_resize_trace_buffer(C_word size) { - int old_size = C_trace_buffer_size; + int old_size = C_trace_buffer_size, old_profiling = profiling; assert(trace_buffer); + profiling = 0; free(trace_buffer); trace_buffer = NULL; C_trace_buffer_size = C_unfix(size); C_clear_trace_buffer(); + profiling = old_profiling; return(C_fix(old_size)); } @@ -4417,6 +4555,9 @@ C_regparm C_word C_fcall C_fudge(C_word fudge_factor) case C_fix(44): /* whether debugger is active */ return C_mk_bool(C_debugging); + case C_fix(45): /* Whether we're currently profiling */ + return C_mk_bool(profiling); + default: return C_SCHEME_UNDEFINED; } } @@ -9264,6 +9405,65 @@ C_i_get_keyword(C_word kw, C_word args, C_word def) return def; } +C_word C_i_dump_statistical_profile() +{ + PROFILE_BUCKET *b, *b2, **bp; + FILE *fp; + C_char *k1, *k2 = NULL; + int n; + double ms; + struct itimerval itv; + + assert(profiling); + assert(profile_table != NULL); + + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 0; + itv.it_interval.tv_sec = itv.it_value.tv_sec; + itv.it_interval.tv_usec = itv.it_value.tv_usec; + + if (setitimer(C_PROFILE_TIMER, &itv, NULL) == -1) + panic(C_text("error clearing timer for profiling")); + + profiling = 0; /* In case a SIGPROF is delivered late */ + bp = profile_table; + + C_snprintf(buffer, STRING_BUFFER_SIZE, C_text("PROFILE.%d"), C_getpid()); + + if(debug_mode) + C_dbg(C_text("debug"), C_text("dumping statistical profile to `%s'...\n"), buffer); + + fp = C_fopen(buffer, "w"); + if (fp == NULL) + panic(C_text("could not write profile!")); + + for(n = 0; n < PROFILE_TABLE_SIZE; ++n) { + for(b = bp[ n ]; b != NULL; b = b2) { + b2 = b->next; + + k1 = b->key; + C_fputs(C_text("(|"), fp); + /* Dump raw C string as if it were a symbol */ + while((k2 = C_strpbrk(k1, C_text("\\|"))) != NULL) { + C_fwrite(k1, 1, k2-k1, fp); + C_fputc('\\', fp); + C_fputc(*k2, fp); + k1 = k2+1; + } + C_fputs(k1, fp); + ms = (double)b->sample_count * (double)profile_frequency / 1000.0; + C_fprintf(fp, C_text("| " UWORD_COUNT_FORMAT_STRING " %lf)\n"), + b->call_count, ms); + C_free(b); + } + } + + C_fclose(fp); + C_free(profile_table); + profile_table = NULL; + + return C_SCHEME_UNDEFINED; +} void C_ccall C_dump_heap_state(C_word c, C_word *av) { diff --git a/support.scm b/support.scm index 888933d..28437ff 100644 --- a/support.scm +++ b/support.scm @@ -168,7 +168,7 @@ ((string? x) (string->symbol x)) (else (string->symbol (sprintf "~a" x))) ) ) -(define (backslashify s) (string-translate (->string s) "\\" "\\\\")) +(define (backslashify s) (string-translate* (->string s) '(("\\" . "\\\\")))) (define (uncommentify s) (string-translate* (->string s) '(("*/" . "*_/")))) -- 2.1.4