From 899d3db27d1a2c1317543499f1762fad62ffd144 Mon Sep 17 00:00:00 2001 From: felix Date: Wed, 21 Nov 2018 18:48:48 +0100 Subject: [PATCH] Add unboxing pass to lfa2 After the lfa2 pass another pass is executed to eliminate unnecessary boxing + unboxing of floating point intermediate values. The process is roughly this: identify variables that are unassigned and are known to contain flonums, count all accesses, then count all accesses of these variables that are in direct operator position of an intrinsic that has an unboxed variant and, if the number of accesses in unboxed position is the same as the number of total accesses, then the variable can be let-bound using a specialized construct (##core#let_float) and all accesses be direct accesses (without any boxing/unboxing). Results of unboxable intrinsics are boxed automatically (using ##core#box_float), uses of ##core#inline_allocate on unboxable intrinsics are converted to ##core#inline forms. The lfa2 pass is now enabled at optimization levels 2 or higher. --- NEWS | 4 + batch-driver.scm | 9 +- c-backend.scm | 47 +++---- chicken.h | 18 ++- chicken.scm | 7 +- core.scm | 60 +++++---- lfa2.scm | 324 ++++++++++++++++++++++++++++++++++++---------- manual/Using the compiler | 2 +- support.scm | 4 +- 9 files changed, 349 insertions(+), 126 deletions(-) diff --git a/NEWS b/NEWS index c643784b..3eba6d6a 100644 --- a/NEWS +++ b/NEWS @@ -46,6 +46,10 @@ with the same version of the compiler. - the "-consult-type-file" and "-emit-type-file" options have been renamed to "-consult-types-file" and "-emit-types-file", respectively. + - Added an optimization pass for reducing the amount of boxing of + intermediate floating point values, enabled by the "-lfa2" compiler + option. + - The "lfa2" pass is now enabled at optimization levels 2 or higher. - Tools - The new "-link" option to csc allows linking with objects from extensions. diff --git a/batch-driver.scm b/batch-driver.scm index fc7afb04..4a4a370e 100644 --- a/batch-driver.scm +++ b/batch-driver.scm @@ -802,8 +802,13 @@ (when do-lfa2 (begin-time) (debugging 'p "doing lfa2") - (perform-secondary-flow-analysis node2 db) - (end-time "secondary flow analysis")) + (let ((floatvars (perform-secondary-flow-analysis node2 db))) + (end-time "secondary flow analysis") + (unless (null? floatvars) + (begin-time) + (debugging 'p "doing unboxing") + (set! node2 (perform-unboxing node2 floatvars))) + (end-time "unboxing"))) (print-node "optimized" '|7| node2) ;; inlining into a file with interrupts enabled would ;; change semantics diff --git a/c-backend.scm b/c-backend.scm index babb2ac3..952fa8ea 100644 --- a/c-backend.scm +++ b/c-backend.scm @@ -124,6 +124,9 @@ (if (vector? lit) (gen "((C_word)li" (vector-ref lit 0) ")") (gen "lf[" (first params) #\])) ) ) + + ((##core#float) + (gen (first params))) ((if) (gen #t "if(C_truep(") @@ -146,12 +149,25 @@ (loop (cdr bs) (add1 i) (sub1 count)) ] [else (expr (car bs) i)] ) ) ) - ((##core#let_unboxed) - (let ((name (first params))) - (gen #t name #\=) + ((##core#let_float) + (let ((fi (first params))) + (gen #t #\f fi #\=) (expr (first subs) i) (gen #\;) (expr (second subs) i))) + + ((##core#float-variable) + (gen #\f (first params))) + + ((##core#unbox_float) + (gen "C_flonum_magnitude(") + (expr (first subs) i) + (gen ")")) + + ((##core#box_float) + (gen "C_flonum(&a,") + (expr (first subs) i) + (gen ")")) ((##core#ref) (gen "((C_word*)") @@ -474,19 +490,6 @@ (expr (second subs) i) (gen "),C_SCHEME_UNDEFINED)") ) ) - ((##core#unboxed_ref) - (gen (first params))) - - ((##core#unboxed_set!) - (gen "((" (first params) #\=) - (expr (first subs) i) - (gen "),C_SCHEME_UNDEFINED)")) - - ((##core#inline_unboxed) ;XXX is this needed? - (gen (first params) "(") - (expr-args subs i) - (gen #\))) - ((##core#switch) (gen #t "switch(") (expr (first subs) i) @@ -804,7 +807,7 @@ (direct (lambda-literal-direct ll)) (rest-mode (lambda-literal-rest-argument-mode ll)) (temps (lambda-literal-temporaries ll)) - (ubtemps (lambda-literal-unboxed-temporaries ll)) + (ftemps (lambda-literal-float-temporaries ll)) (topname (toplevel unit-name))) (when empty-closure (debugging 'o "dropping unused closure argument" id)) (gen #t #t) @@ -842,11 +845,11 @@ (do ([i n (add1 i)] [j (+ temps (if looping (sub1 n) 0)) (sub1 j)] ) ((zero? j)) - (gen #t "C_word t" i #\;) ) - (for-each - (lambda (ubt) - (gen #t (utype (cdr ubt)) #\space (car ubt) #\;)) - ubtemps))) + (gen #t "C_word t" i #\;)) + (for-each + (lambda (i) + (gen #t "double f" i #\;)) + ftemps))) (cond ((eq? 'toplevel id) (let ([ldemand (foldl (lambda (n lit) (+ n (literal-size lit))) 0 literals)] [llen (length literals)] ) diff --git a/chicken.h b/chicken.h index 141ec2ee..430b7fff 100644 --- a/chicken.h +++ b/chicken.h @@ -1507,15 +1507,19 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret; #define C_ub_i_flonum_times(x, y) ((x) * (y)) #define C_ub_i_flonum_quotient(x, y) ((x) / (y)) -#define C_ub_i_flonum_equalp(n1, n2) ((n1) == (n2)) -#define C_ub_i_flonum_greaterp(n1, n2) ((n1) > (n2)) -#define C_ub_i_flonum_lessp(n1, n2) ((n1) < (n2)) -#define C_ub_i_flonum_greater_or_equal_p(n1, n2) ((n1) >= (n2)) -#define C_ub_i_flonum_less_or_equal_p(n1, n2) ((n1) <= (n2)) +#define C_ub_i_flonum_equalp(n1, n2) C_mk_bool((n1) == (n2)) +#define C_ub_i_flonum_greaterp(n1, n2) C_mk_bool((n1) > (n2)) +#define C_ub_i_flonum_lessp(n1, n2) C_mk_bool((n1) < (n2)) +#define C_ub_i_flonum_greater_or_equal_p(n1, n2) C_mk_bool((n1) >= (n2)) +#define C_ub_i_flonum_less_or_equal_p(n1, n2) C_mk_bool((n1) <= (n2)) + +#define C_ub_i_flonum_nanp(x) C_mk_bool(C_isnan(x)) +#define C_ub_i_flonum_infinitep(x) C_mk_bool(C_isinf(x)) +#define C_ub_i_flonum_finitep(x) C_mk_bool(C_isfinite(x)) #define C_ub_i_pointer_inc(p, n) ((void *)((unsigned char *)(p) + (n))) -#define C_ub_i_pointer_eqp(p1, p2) ((p1) == (p2)) -#define C_ub_i_null_pointerp(p) ((p) == NULL) +#define C_ub_i_pointer_eqp(p1, p2) C_mk_bool((p1) == (p2)) +#define C_ub_i_null_pointerp(p) C_mk_bool((p) == NULL) #define C_ub_i_pointer_u8_ref(p) (*((unsigned char *)(p))) #define C_ub_i_pointer_s8_ref(p) (*((signed char *)(p))) diff --git a/chicken.scm b/chicken.scm index 78920f3b..501da969 100644 --- a/chicken.scm +++ b/chicken.scm @@ -97,14 +97,13 @@ ((1) (set! options (cons* 'optimize-leaf-routines - ;XXX 'lfa2 options)) ) ((2) (set! options (cons* 'optimize-leaf-routines 'inline ;XXX 'clustering - ;XXX 'lfa2 + 'lfa2 options)) ) ((3) (set! options @@ -113,7 +112,7 @@ 'inline-global 'local ;XXX 'clustering - ;XXX 'lfa2 + 'lfa2 'specialize options) ) ) ((4) @@ -123,7 +122,7 @@ 'inline-global 'specialize ;XXX 'clustering - ;XXX 'lfa2 + 'lfa2 'local 'unsafe options) ) ) (else diff --git a/core.scm b/core.scm index 3ecdd817..09e9e1e3 100644 --- a/core.scm +++ b/core.scm @@ -154,13 +154,18 @@ ; - Core language: ; ; [##core#variable {}] +; [##core#float-variable {}] ; [if {} )] -; [quote {}] +; [quote {}] +; [##core#float {}] ; [let {} ] ; [##core#lambda { (... [. ]) } ] ; [set! { [always-immediate?]} ] ; [##core#undefined {}] ; [##core#primitive {}] +; [##core#let_float {} ] +; [##core#box_float {} ] +; [##core#unbox_float {} ] ; [##core#inline {} ...] ; [##core#inline_allocate { } ...] ; [##core#inline_ref { }] @@ -185,18 +190,19 @@ ; ; [if {} ] ; [quote {}] +; [##core#float {}] ; [##core#bind {} ... ] -; [##core#let_unboxed { } ] +; [##core#float-variable {}] ; [##core#undefined {}] -; [##core#unboxed_ref { []}] -; [##core#unboxed_set! { } ] +; [##core#let_float {} ] +; [##core#box_float {} ] +; [##core#unbox_float {} ] ; [##core#inline {} ...] ; [##core#inline_allocate {} ...] ; [##core#inline_ref { }] ; [##core#inline_update { } ] ; [##core#inline_loc_ref {} ] ; [##core#inline_loc_update {} ] -; [##core#inline_unboxed {} ...] ; [##core#debug-event { }] ; [##core#closure {} ...] ; [##core#box {} ] @@ -311,7 +317,7 @@ foreign-stub-cps foreign-stub-id foreign-stub-name foreign-stub-return-type lambda-literal-id lambda-literal-external lambda-literal-argument-count lambda-literal-rest-argument lambda-literal-rest-argument-mode - lambda-literal-temporaries lambda-literal-unboxed-temporaries + lambda-literal-temporaries lambda-literal-float-temporaries lambda-literal-callee-signatures lambda-literal-allocated lambda-literal-closure-size lambda-literal-looping lambda-literal-customizable lambda-literal-body lambda-literal-direct @@ -1912,7 +1918,8 @@ (params (node-parameters n)) (class (node-class n)) ) (case (node-class n) - ((##core#variable quote ##core#undefined ##core#primitive ##core#provide) (k n)) + ((##core#variable quote ##core#undefined ##core#primitive ##core#provide) + (k n)) ((if) (let* ((t1 (gensym 'k)) (t2 (gensym 'r)) (k1 (lambda (r) (make-node '##core#call (list #t) (list (varnode t1) r)))) ) @@ -2530,7 +2537,9 @@ (class (node-class n)) ) (case class - ((quote ##core#undefined ##core#provide ##core#proc) n) + ((quote ##core#undefined ##core#provide ##core#proc ##core#float + ##core#float-variable) + n) ((##core#variable) (let* ((var (first params)) @@ -2542,7 +2551,8 @@ ((if ##core#call ##core#inline ##core#inline_allocate ##core#callunit ##core#inline_ref ##core#inline_update ##core#debug-event ##core#switch ##core#cond ##core#direct_call ##core#recurse ##core#return - ##core#inline_loc_ref + ##core#let_float ##core#box_float ##core#unbox_float + ##core#inline_loc_ref ##core#inline_loc_update) (make-node (node-class n) params (maptransform subs here closure)) ) @@ -2692,7 +2702,7 @@ (define-record-type lambda-literal (make-lambda-literal id external arguments argument-count rest-argument temporaries - unboxed-temporaries callee-signatures allocated directly-called + float-temporaries callee-signatures allocated directly-called closure-size looping customizable rest-argument-mode body direct) lambda-literal? (id lambda-literal-id) ; symbol @@ -2702,7 +2712,7 @@ (argument-count lambda-literal-argument-count) ; integer (rest-argument lambda-literal-rest-argument) ; symbol | #f (temporaries lambda-literal-temporaries) ; integer - (unboxed-temporaries lambda-literal-unboxed-temporaries) ; ((sym . utype) ...) + (float-temporaries lambda-literal-float-temporaries) ; (integer ...) (callee-signatures lambda-literal-callee-signatures) ; (integer ...) (allocated lambda-literal-allocated) ; integer ;; lambda-literal-directly-called is used nowhere @@ -2722,7 +2732,7 @@ ;; Use analysis db as optimistic heuristic for procedure table size (lambda-table (make-vector (fx* (fxmax current-analysis-database-size 1) 3) '())) (temporaries 0) - (ubtemporaries '()) + (float-temporaries '()) (allocated 0) (looping 0) (signatures '()) @@ -2764,7 +2774,7 @@ (class (node-class n)) ) (case class - ((##core#undefined ##core#proc) n) + ((##core#undefined ##core#proc ##core#float) n) ((##core#variable) (walk-var (first params) e e-count #f) ) @@ -2785,7 +2795,11 @@ ((##core#inline_allocate) (set! allocated (+ allocated (second params))) (make-node class params (mapwalk subs e e-count here boxes)) ) - + + ((##core#box_float) + (set! allocated (+ allocated 4)) ;; words-per-flonum + (make-node class params (mapwalk subs e e-count here boxes))) + ((##core#inline_ref) (set! allocated (+ allocated (bytes->words (estimate-foreign-result-size (second params))))) (make-node class params '()) ) @@ -2822,13 +2836,13 @@ ((##core#lambda ##core#direct_lambda) (let ((temps temporaries) - (ubtemps ubtemporaries) + (ftemps float-temporaries) (sigs signatures) (lping looping) (alc allocated) (direct (eq? class '##core#direct_lambda)) ) (set! temporaries 0) - (set! ubtemporaries '()) + (set! float-temporaries '()) (set! allocated 0) (set! signatures '()) (set! looping 0) @@ -2867,7 +2881,7 @@ argc rest (add1 temporaries) - ubtemporaries + float-temporaries signatures allocated (or direct (memq id direct-call-ids)) @@ -2883,7 +2897,7 @@ direct) ) (set! looping lping) (set! temporaries temps) - (set! ubtemporaries ubtemps) + (set! float-temporaries ftemps) (set! allocated alc) (set! signatures (lset-adjoin/eq? sigs argc)) (make-node '##core#proc (list (first params)) '()) ) ) ) ) ) @@ -2900,12 +2914,12 @@ (append (##sys#fast-reverse params) e) (fx+ e-count 1) here (append boxvars boxes)) ) ) ) ) - ((##core#let_unboxed) - (let* ((var (first params)) - (val (first subs)) ) - (set! ubtemporaries (alist-cons var (second params) ubtemporaries)) + ((##core#let_float) + (let ((i (first params)) + (val (first subs))) + (set! float-temporaries (cons i float-temporaries)) (make-node - '##core#let_unboxed params + '##core#let_float params (list (walk val e e-count here boxes) (walk (second subs) e e-count here boxes) ) ) ) ) diff --git a/lfa2.scm b/lfa2.scm index a3e1c114..81b5fab4 100644 --- a/lfa2.scm +++ b/lfa2.scm @@ -29,7 +29,9 @@ ;; by inlined accessors (for example when using record structures). ;; Specialization takes place before inlining, so even though we have ;; the type-information, later inlining will still keep the code for -;; checking argument types. +;; checking argument types. Additionally, this pass detects unboxing +;; opportunities for floating point values and replaces uses of certain +;; fp operations with unboxed ones. (declare @@ -37,7 +39,7 @@ (uses extras support)) (module chicken.compiler.lfa2 - (perform-secondary-flow-analysis) + (perform-secondary-flow-analysis perform-unboxing) (import scheme chicken.base @@ -48,18 +50,6 @@ (include "tweaks") (include "mini-srfi-1.scm") -(define d-depth 0) -(define lfa2-debug #t) - -(define (d fstr . args) - (when (and lfa2-debug (##sys#debug-mode?)) - (printf "[debug|~a] ~a~?~%" d-depth (make-string d-depth #\space) fstr args)) ) - -(define dd d) - -(define-syntax d (syntax-rules () ((_ . _) (void)))) -(define-syntax dd (syntax-rules () ((_ . _) (void)))) - ;;; Maps checks to types @@ -164,21 +154,84 @@ ("C_a_i_list7" pair) ("C_a_i_list8" pair) ("C_a_i_cons" pair) - ("C_a_i_flonum" flonum) - ("C_a_i_fix_to_flo" flonum) - ("C_a_i_big_to_flo" flonum) + ("C_a_i_flonum" float) + ("C_a_i_fix_to_flo" float) + ("C_a_i_big_to_flo" float) ("C_a_i_fix_to_big" bignum) ("C_a_i_bignum0" bignum) ("C_a_i_bignum1" bignum) ("C_a_i_bignum2" bignum) - ;;XXX there are endless more - is it worth it? + ("C_a_i_fix_to_flo" float) + ("C_a_i_flonum_abs" float) + ("C_a_i_flonum_acos" float) + ("C_a_i_flonum_actual_quotient_checked" float) + ("C_a_i_flonum_asin" float) + ("C_a_i_flonum_atan2" float) + ("C_a_i_flonum_atan" float) + ("C_a_i_flonum_ceiling" float) + ("C_a_i_flonum_cos" float) + ("C_a_i_flonum_difference" float) + ("C_a_i_flonum_exp" float) + ("C_a_i_flonum_expt" float) + ("C_a_i_flonum_floor" float) + ("C_a_i_flonum_gcd" float) + ("C_a_i_flonum_log" float) + ("C_a_i_flonum_modulo_checked" float) + ("C_a_i_flonum_negate" float) + ("C_a_i_flonum_plus" float) + ("C_a_i_flonum_quotient_checked" float) + ("C_a_i_flonum_quotient" float) + ("C_a_i_flonum_remainder_checked" float) + ("C_a_i_flonum_round" float) + ("C_a_i_flonum_round_proper" float) + ("C_a_i_flonum_sin" float) + ("C_a_i_flonum_sqrt" float) + ("C_a_i_flonum_tan" float) + ("C_a_i_flonum_times" float) + ("C_a_i_flonum_truncate" float) + ("C_a_u_i_f64vector_ref" float) + ("C_a_u_i_f32vector_ref" float) + ;;XXX are there more? )) +(define +unboxed-map+ + '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op) + ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op) + ("C_a_i_flonum_times" "C_ub_i_flonum_times" op) + ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op) + ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred) + ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred) + ("C_flonum_lessp" "C_ub_i_flonum_lessp" pred) + ("C_flonum_greater_or_equal_p" "C_ub_i_flonum_greater_or_equal_p" pred) + ("C_flonum_less_or_equal_p" "C_ub_i_flonum_less_or_equal_p" pred) + ("C_u_i_flonum_nanp" "C_ub_i_flonum_nanp" pred) + ("C_u_i_flonum_infinitep" "C_ub_i_flonum_infnitep" pred) + ("C_u_i_flonum_finitepp" "C_ub_i_flonum_finitep" pred) + ("C_a_i_flonum_sin" "C_sin" op) + ("C_a_i_flonum_cos" "C_cos" op) + ("C_a_i_flonum_tan" "C_tan" op) + ("C_a_i_flonum_asin" "C_asin" op) + ("C_a_i_flonum_acos" "C_acos" op) + ("C_a_i_flonum_atan" "C_atan" op) + ("C_a_i_flonum_atan2" "C_atan2" op) + ("C_a_i_flonum_exp" "C_exp" op) + ("C_a_i_flonum_expr" "C_pow" op) + ("C_a_i_flonum_log" "C_log" op) + ("C_a_i_flonum_sqrt" "C_sqrt" op) + ("C_a_i_flonum_truncate" "C_trunc" op) + ("C_a_i_flonum_ceiling" "C_ceil" op) + ("C_a_i_flonum_floor" "C_floor" op) + ("C_a_i_flonum_round" "C_round" op) + ("C_a_i_flonum_abs" "C_fabs" op) + ("C_a_u_i_f32vector_ref" "C_ub_i_f32vector_ref" acc) + ("C_a_u_i_f64vector_ref" "C_ub_i_f64vector_ref" acc))) + ;;; Walk nodes and perform simplified type-analysis (define (perform-secondary-flow-analysis node db) - (let ((stats '())) + (let ((stats '()) + (floatvars '())) (define (constant-result lit) ;; a simplified variant of the one in scrutinizer.scm @@ -255,6 +308,27 @@ (assq (caar ae) te) ) => cdr) (else (loop (cdr ae)))))))) + + (define (varnode? n) + (eq? '##core#variable (node-class n))) + + (define (floatvar? var) + (assq var floatvars)) + + (define (eliminate-floatvar var) + (set! floatvars + (remove (lambda (a) (eq? var (car a))) floatvars))) + + (define (count-floatvar node acc #!optional (n 1)) + (cond ((and (varnode? node) + (assq (first (node-parameters node)) floatvars)) + => + (lambda (a) + (set-car! (acc a) (+ n (car (acc a)))))))) + + (define (add-boxed node) (count-floatvar node cdr)) + (define (add-unboxed node) (count-floatvar node cddr)) + (define (sub-boxed node) (count-floatvar node cdr -1)) (define (walk n te ae) (let ((class (node-class n)) @@ -262,6 +336,10 @@ (subs (node-subexpressions n))) (case class ((##core#variable) + (when (and (floatvar? (first params)) + (not (assq (first params) te))) + (eliminate-floatvar (first params))) + (add-boxed n) (vartype (first params) te ae)) ((if ##core#cond) (let ((tr (walk (first subs) te ae))) @@ -279,13 +357,20 @@ ((let) (let* ((val (first subs)) (var (first params)) - (r (walk val te ae))) - (walk (second subs) - (if (assigned? var) + (r (walk val te ae)) + (avar (assigned? var))) + (cond ((and (not avar) + (eq? 'float r) + (not (floatvar? var))) + (set! floatvars (cons (list var 0 0) floatvars)) + (add-unboxed val)) + (else (add-boxed val))) + (walk (second subs) + (if avar te (alist-cons var r te)) - (if (and (eq? '##core#variable (node-class val)) - (not (assigned? var)) + (if (and (varnode? val) + (not avar) (not (assigned? (first (node-parameters val))))) (let ((var2 (first (node-parameters val)))) (alist-cons var var2 (alist-cons var2 var ae))) @@ -297,16 +382,26 @@ (walk (first subs) '() '()) 'procedure) ((set! ##core#set!) ;XXX is ##core#set! still used? - (walk (first subs) te ae) - 'undefined) + (let ((val (first subs))) + (when (and (varnode? val) + (floatvar? (first (node-parameters val)))) + (eliminate-floatvar (first (node-parameters val)))) + (walk val te ae) + 'undefined)) ((##core#undefined) 'undefined) ((##core#primitive) 'procedure) ((##core#inline ##core#inline_allocate) - (for-each (cut walk <> te ae) subs) + (let ((ubop (assoc (first params) +unboxed-map+))) + (for-each + (lambda (arg) + (walk arg te ae) + (when ubop (add-unboxed arg))) + subs)) (cond ((assoc (first params) +type-check-map+) => (lambda (a) (let ((r1 (walk (first subs) te ae))) (cond (unsafe + (sub-boxed (first subs)) (extinguish! n "C_i_noop")) ((eq? '*struct* (cadr a)) ;; handle known structure type @@ -316,12 +411,15 @@ (let ((st (first (node-parameters (second subs))))) (when (and (symbol? st) (eq? st (second r1))) + (sub-boxed (first subs)) (extinguish! n "C_i_noop"))))) ((and (pair? r1) (eq? 'boolean (car r1))) (when (memq 'boolean (cdr a)) + (sub-boxed (first subs)) (extinguish! n "C_i_noop"))) ;; handle other types ((member r1 (cdr a)) + (sub-boxed (first subs)) (extinguish! n "C_i_noop"))) '*))) ((assoc (first params) +ffi-type-check-map+) => @@ -329,48 +427,53 @@ (let ((arg (first subs)) (r1 (walk (first subs) te ae))) (when (member r1 (cdr a)) - (node-class-set! n (node-class arg)) - (node-parameters-set! n (node-parameters arg)) - (node-subexpressions-set! n (node-subexpressions arg))) - ;; the ffi checks are enforcing so we always end up with + (sub-boxed (first subs)) + (node-class-set! n (node-class arg)) + (node-parameters-set! n (node-parameters arg)) + (node-subexpressions-set! n (node-subexpressions arg))) + ;; the ffi checks are enforcing so we always end up with ;; the correct type r1))) ((assoc (first params) +predicate-map+) => (lambda (a) (let ((arg (first subs))) - (if (eq? '##core#variable (node-class arg)) - `(boolean - ((,(first (node-parameters arg)) - . - ,(if (eq? '*struct* (cadr a)) - (if (eq? 'quote (node-class (second subs))) - (let ((st (first - (node-parameters - (second subs))))) - (if (symbol? st) - `(struct ,st) - 'struct)) - 'struct) - (cadr a)))) - ()) - (let ((r1 (walk (first subs) te ae))) - (cond ((eq? '*struct* (cadr a)) - ;; known structure type - (when (and (pair? r1) - (eq? 'struct (first r1)) - (eq? 'quote (node-class (second subs)))) - (let ((st (first - (node-parameters (second subs))))) - (when (and (symbol? st) - (eq? st (second r1))) - (extinguish! n "C_i_true"))))) - ((and (pair? r1) (eq? 'boolean (car r1))) - (when (memq 'boolean (cdr a)) - (extinguish! n "C_i_true"))) - ;; other types - ((member r1 (cdr a)) - (extinguish! n "C_i_true"))) - 'boolean))))) + (cond ((varnode? arg) + `(boolean + ((,(first (node-parameters arg)) + . + ,(if (eq? '*struct* (cadr a)) + (if (eq? 'quote (node-class (second subs))) + (let ((st (first + (node-parameters + (second subs))))) + (if (symbol? st) + `(struct ,st) + 'struct)) + 'struct) + (cadr a)))) + ())) + (else + (let ((r1 (walk arg te ae))) + (cond ((eq? '*struct* (cadr a)) + ;; known structure type + (when (and (pair? r1) + (eq? 'struct (first r1)) + (eq? 'quote (node-class (second subs)))) + (let ((st (first + (node-parameters (second subs))))) + (when (and (symbol? st) + (eq? st (second r1))) + (sub-boxed arg) + (extinguish! n "C_i_true"))))) + ((and (pair? r1) (eq? 'boolean (car r1))) + (when (memq 'boolean (cdr a)) + (sub-boxed arg) + (extinguish! n "C_i_true"))) + ;; other types + ((member r1 (cdr a)) + (sub-boxed arg) + (extinguish! n "C_i_true"))) + 'boolean)))))) ((assoc (first params) +constructor-map+) => (lambda (a) (let ((arg1 (and (pair? subs) (first subs)))) @@ -394,5 +497,96 @@ (print "eliminated type checks:") (for-each (lambda (ss) (printf " ~a:\t~a~%" (car ss) (cdr ss))) - stats)))))) + stats)))) + floatvars)) + + +(define (perform-unboxing node floatvar-counts) + (let ((floatvars (filter-map + (lambda (a) + (and (= (cadr a) (caddr a)) + (car a))) + floatvar-counts)) + (count 0)) + + (define (walk/unbox n) + (let ((class (node-class n)) + (params (node-parameters n)) + (subs (node-subexpressions n))) + (case class + ((quote) + (let ((c (first params))) + (if (##core#inline "C_i_flonump" c) + (make-node '##core#float (list c) '()) + n))) + ((##core#variable) + (let ((i (posq (first params) floatvars))) + (if i + (make-node '##core#float-variable (cons i params) '()) + (make-node '##core#unbox_float '() (list n))))) + ((##core#inline ##core#inline_allocate) + (cond ((assoc (first params) +unboxed-map+) => + (lambda (a) + (let ((ub (second a)) + (type (third a))) + (set! count (add1 count)) + (make-node '##core#inline + (list ub) + (map (if (eq? type 'op) + walk/unbox + walk) + subs))))) + (else + (make-node '##core#unbox_float '() + (list (make-node class params + (map walk subs))))))) + (else (make-node '##core#unbox_float '() (list (walk n))))))) + + (define (walk n) + (let ((class (node-class n)) + (params (node-parameters n)) + (subs (node-subexpressions n))) + (case class + ((##core#variable) + (let ((i (posq (first params) floatvars))) + (if i + (make-node '##core#box_float '() + (list (make-node '##core#float-variable + (cons i params) '()))) + n))) + ((let) + (let* ((val (first subs)) + (var (first params)) + (i (posq var floatvars))) + (if i + (make-node '##core#let_float (list i var) + (list (walk/unbox val) + (walk (second subs)))) + (make-node 'let params (map walk subs))))) + ((##core#inline ##core#inline_allocate) + (cond ((assoc (first params) +unboxed-map+) => + (lambda (a) + (let ((ub (second a)) + (type (third a))) + (set! count (add1 count)) + (let ((n (make-node '##core#inline + (list ub) + (map walk/unbox subs)))) + (case type + ((pred) n) + (else (make-node '##core#box_float '() + (list n)))))))) + (else (make-node class params (map walk subs))))) + (else (make-node class params (map walk subs)))))) + + (let ((node (walk node))) + (with-debugging-output + '(x o) + (lambda () + (printf "number of unboxed float variables: ~a\n" + (length floatvars)) + (printf "number of inline operations replaced with unboxed ones: ~a\n" + count))) + node))) + ) diff --git a/manual/Using the compiler b/manual/Using the compiler index a2fe75a8..2448528f 100644 --- a/manual/Using the compiler +++ b/manual/Using the compiler @@ -121,7 +121,7 @@ the source text should be read from standard input. ; -optimize-leaf-routines : Enable leaf routine optimization. -; -optimize-level LEVEL : Enables certain sets of optimization options. {{LEVEL}} should be an integer. Level {{0}} is equivalent to {{-no-usual-integrations -no-compiler-syntax}} (no optimization), level {{1} is equivalent to {{-optimize-leaf-routines}} (minimal optimization), level {{2}} is equivalent to {{-optimize-leaf-routines -inline}} (enable optimizations that do not break standard compliance, this is the default), level {{3}} is equivalent to {{-optimize-leaf-routines -local -inline -inline-global -specialize}} (maximal optimization, while still "safe"), level {{4}} is equivalent to {{-optimize-leaf-routines -local -inline -inline-global -specialize -unsafe}} (maximal optimization, "unsafe") and any higher level is equivalent to {{-optimize-leaf-routines -block -inline -inline-global -specialize -unsafe -disable-interrupts -no-trace -no-lambda-info -clustering -lfa2}} (all possible optimizations, "unsafe"). +; -optimize-level LEVEL : Enables certain sets of optimization options. {{LEVEL}} should be an integer. Level {{0}} is equivalent to {{-no-usual-integrations -no-compiler-syntax}} (no optimization), level {{1} is equivalent to {{-optimize-leaf-routines}} (minimal optimization), level {{2}} is equivalent to {{-optimize-leaf-routines -inline -lfa2}} (enable optimizations that do not break standard compliance, this is the default), level {{3}} is equivalent to {{-optimize-leaf-routines -local -inline -lfa2 -inline-global -specialize}} (maximal optimization, while still "safe"), level {{4}} is equivalent to {{-optimize-leaf-routines -local -inline -lfa2 -inline-global -specialize -unsafe}} (maximal optimization, "unsafe") and any higher level is equivalent to {{-optimize-leaf-routines -block -inline -lfa2 -inline-global -specialize -unsafe -disable-interrupts -no-trace -no-lambda-info -clustering}} (all possible optimizations, "unsafe"). ; -output-file FILENAME : Specifies the pathname of the generated C file. Default is to use the source filename with the extension replaced by {{.c}}. diff --git a/support.scm b/support.scm index 8d9baac2..de66b51b 100644 --- a/support.scm +++ b/support.scm @@ -254,7 +254,7 @@ (loop (cdr chars)) ) (cons c (loop (cdr chars))) ) ) ) ) ) ) ) -;; XXX: This too, but it's used only in compiler.scm, WTF? +;; XXX: This too, but it's used only in core.scm, WTF? (define (valid-c-identifier? name) (let ([str (string->list (->string name))]) (and (pair? str) @@ -645,7 +645,7 @@ (loop (- n 1) (cdr vals) (cons (walk (car vals)) bindings)) ) ) ) ((##core#unbox ##core#ref ##core#update ##core#update_i) (cons* class (walk (car subs)) params (map walk (cdr subs))) ) - ((##core#inline_allocate ##core#let_unboxed) + ((##core#inline_allocate) (cons* class params (map walk subs))) (else (cons class (append params (map walk subs)))) ) ) ) ) -- 2.16.2