From d38071cfa345c1a7f7b09983e43d8a54e991e57e Mon Sep 17 00:00:00 2001
From: felix <felix@call-with-current-continuation.org>
Date: Sun, 10 Apr 2022 16:22:48 +0200
Subject: [PATCH] Add fused multiply add operator for floats (suggested by
 Christian Himpe)

---
 NEWS                           | 3 +++
 c-platform.scm                 | 3 ++-
 chicken.h                      | 3 +++
 lfa2.scm                       | 2 ++
 library.scm                    | 6 ++++++
 manual/Acknowledgements        | 2 +-
 manual/Module (chicken flonum) | 3 ++-
 tests/library-tests.scm        | 2 ++
 types.db                       | 3 +++
 9 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index cbc75812..dffa9027 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,9 @@
 
 - Core libraries
   - Added "locative-index", kindly contributed by John Croisant.
+  - Added "fp*+" (fused multiply-add) to "chicken.flonum" module 
+    (suggested by Christian Himpe).
+
 
 - Build system
   - Default "cc" on BSD systems for building CHICKEN to avoid ABI problems
diff --git a/c-platform.scm b/c-platform.scm
index 6bb57ab4..fdbb1b83 100644
--- a/c-platform.scm
+++ b/c-platform.scm
@@ -149,7 +149,7 @@
 
 (define-constant +flonum-bindings+
   (map (lambda (x) (symbol-append 'chicken.flonum# x))
-       '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd
+       '(fp/? fp+ fp- fp* fp/ fp> fp< fp= fp>= fp<= fpmin fpmax fpneg fpgcd fp*+
 	 fpfloor fpceiling fptruncate fpround fpsin fpcos fptan fpasin fpacos
 	 fpatan fpatan2 fpexp fpexpt fplog fpsqrt fpabs fpinteger?)))
 
@@ -653,6 +653,7 @@
 (rewrite 'chicken.flonum#fp/? 16 2 "C_a_i_flonum_quotient_checked" #f words-per-flonum)
 (rewrite 'chicken.flonum#fpneg 16 1 "C_a_i_flonum_negate" #f words-per-flonum)
 (rewrite 'chicken.flonum#fpgcd 16 2 "C_a_i_flonum_gcd" #f words-per-flonum)
+(rewrite 'chicken.flonum#fp*+ 16 3 "C_a_i_flonum_multiply_add" #f words-per-flonum)
 
 (rewrite 'scheme#zero? 5 "C_eqp" 0 'fixnum)
 (rewrite 'scheme#zero? 2 1 "C_u_i_zerop2" #f)
diff --git a/chicken.h b/chicken.h
index 9274606a..9d15ab74 100644
--- a/chicken.h
+++ b/chicken.h
@@ -982,6 +982,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 # define C_access                   access
 # define C_getpid                   getpid
 # define C_getenv                   getenv
+# define C_fma                      fma
 #else
 /* provide this file and define C_PROVIDE_LIBC_STUBS if you want to use
    your own libc-replacements or -wrappers */
@@ -1204,6 +1205,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_a_i_flonum_plus(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) + C_flonum_magnitude(n2))
 #define C_a_i_flonum_difference(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) - C_flonum_magnitude(n2))
 #define C_a_i_flonum_times(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) * C_flonum_magnitude(n2))
+#define C_a_i_flonum_multiply_add(ptr, c, n1, n2, n3) C_flonum(ptr, fma(C_flonum_magnitude(n1), C_flonum_magnitude(n2), C_flonum_magnitude(n3)))
 #define C_a_i_flonum_quotient(ptr, c, n1, n2) C_flonum(ptr, C_flonum_magnitude(n1) / C_flonum_magnitude(n2))
 #define C_a_i_flonum_negate(ptr, c, n)  C_flonum(ptr, -C_flonum_magnitude(n))
 #define C_a_u_i_flonum_signum(ptr, n, x) (C_flonum_magnitude(x) == 0.0 ? (x) : ((C_flonum_magnitude(x) < 0.0) ? C_flonum(ptr, -1.0) : C_flonum(ptr, 1.0)))
@@ -1513,6 +1515,7 @@ typedef void (C_ccall *C_proc)(C_word, C_word *) C_noret;
 #define C_ub_i_flonum_difference(x, y)  ((x) - (y))
 #define C_ub_i_flonum_times(x, y)       ((x) * (y))
 #define C_ub_i_flonum_quotient(x, y)    ((x) / (y))
+#define C_ub_i_flonum_multiply_add(x, y, z)    C_fma((x), (y), (z))
 
 #define C_ub_i_flonum_equalp(n1, n2)    C_mk_bool((n1) == (n2))
 #define C_ub_i_flonum_greaterp(n1, n2)  C_mk_bool((n1) > (n2))
diff --git a/lfa2.scm b/lfa2.scm
index c9296e3d..38ed4da2 100644
--- a/lfa2.scm
+++ b/lfa2.scm
@@ -191,6 +191,7 @@
     ("C_a_i_flonum_sqrt" float)
     ("C_a_i_flonum_tan" float)
     ("C_a_i_flonum_times" float)
+    ("C_a_i_flonum_multiply_add" float)
     ("C_a_i_flonum_truncate" float)
     ("C_a_u_i_f64vector_ref" float)
     ("C_a_u_i_f32vector_ref" float)
@@ -201,6 +202,7 @@
   '(("C_a_i_flonum_plus" "C_ub_i_flonum_plus" op)
     ("C_a_i_flonum_difference" "C_ub_i_flonum_difference" op)
     ("C_a_i_flonum_times" "C_ub_i_flonum_times" op)
+    ("C_a_i_flonum_multiply_add" "C_ub_i_flonum_multiply_add" op)
     ("C_a_i_flonum_quotient" "C_ub_i_flonum_quotient" op)
     ("C_flonum_equalp" "C_ub_i_flonum_equalp" pred)
     ("C_flonum_greaterp" "C_ub_i_flonum_greaterp" pred)
diff --git a/library.scm b/library.scm
index 5c5c7280..870e0906 100644
--- a/library.scm
+++ b/library.scm
@@ -1590,6 +1590,12 @@ EOF
   (fp-check-flonums x y 'fp/)
   (##core#inline_allocate ("C_a_i_flonum_quotient" 4) x y) )
 
+(define (fp*+ x y z) 
+  (unless (and (flonum? x) (flonum? y) (flonum? z))
+    (##sys#error-hook (foreign-value "C_BAD_ARGUMENT_TYPE_NO_FLONUM_ERROR" int)
+      'fp*+ x y z) )
+  (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) x y z) )
+
 (define (fpgcd x y)
   (fp-check-flonums x y 'fpgcd)
   (##core#inline_allocate ("C_a_i_flonum_gcd" 4) x y))
diff --git a/manual/Acknowledgements b/manual/Acknowledgements
index 3ed9f829..73de8708 100644
--- a/manual/Acknowledgements
+++ b/manual/Acknowledgements
@@ -22,7 +22,7 @@ Martin Gasbichler, Abdulaziz Ghuloum, Joey Gibson, Stephen C. Gilardi,
 Mario Domenech Goulart, Joshua Griffith, Johannes Groedem, Damian
 Gryski, Matt Gushee, Andreas Gustafsson, Sven Hartrumpf, Jun-ichiro
 itojun Hagino, Ahdi Hargo, Matthias Heiler, Karl M. Hegbloom, Moritz Heidkamp,
-William P. Heinemann, Bill Hoffman, Eric Hoffman, Bruce Hoult, Hans HÃ¼bner,
+William P. Heinemann, Christian Himpe, Bill Hoffman, Eric Hoffman, Bruce Hoult, Hans HÃ¼bner,
 Markus HÃ¼lsmann, GÃ¶tz Isenmann, Paulo Jabardo, Wietse Jacobs, David Janssens,
 Christian JÃ¤ger, Robert Jensen, Matt Jones, Dale Jordan, Valentin Kamyshenko,
 Daishi Kato, Peter Keller, Christian Kellermann, Brad Kind, Ron Kneusel, "Kooda",
diff --git a/manual/Module (chicken flonum) b/manual/Module (chicken flonum)
index d780185b..69aab2fc 100644
--- a/manual/Module (chicken flonum)	
+++ b/manual/Module (chicken flonum)	
@@ -20,6 +20,7 @@ your code.
 <procedure>(fp- X Y)</procedure>
 <procedure>(fp* X Y)</procedure>
 <procedure>(fp/ X Y)</procedure>
+<procedure>(fp*+ X Y Z)</procedure>
 <procedure>(fpgcd X Y)</procedure>
 <procedure>(fpneg X)</procedure>
 <procedure>(fpmin X Y)</procedure>
@@ -52,7 +53,7 @@ Arithmetic floating-point operations.
 In safe mode, these procedures throw a type error when given non-float
 arguments. In unsafe mode, these procedures do not check their
 arguments. A non-flonum argument in unsafe mode can crash the
-application.
+application. {{fp*+}} implements fused multiply-add {{(X * Y) + Z}}.
 
 Note: {{fpround}} uses the rounding mode that your C library
 implements, which is usually different from R5RS.
diff --git a/tests/library-tests.scm b/tests/library-tests.scm
index d331871e..241203f4 100644
--- a/tests/library-tests.scm
+++ b/tests/library-tests.scm
@@ -304,6 +304,8 @@
 (assert (inexact= -42.0 (fpceiling -42.2)))
 (assert (not (fpinteger? 2.3)))
 (assert (fpinteger? 1.0))
+(assert (inexact= 7.0 (fp*+ 2.0 3.0 1.0)))
+(assert (inexact= 53.0 (fp*+ 10.0 5.0 3.0)))
 
 ;; string->symbol
 
diff --git a/types.db b/types.db
index e1de2124..acd9d80b 100644
--- a/types.db
+++ b/types.db
@@ -1194,6 +1194,9 @@
 (chicken.flonum#fp+ (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp+ (float float) float)
      ((float float) (##core#inline_allocate ("C_a_i_flonum_plus" 4) #(1) #(2)) ))
 
+(chicken.flonum#fp*+ (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp*+ (float float float) float)
+     ((float float float) (##core#inline_allocate ("C_a_i_flonum_multiply_add" 4) #(1) #(2) #(3)) ))
+
 (chicken.flonum#fp< (#(procedure #:clean #:enforce #:foldable) chicken.flonum#fp< (float float) boolean)
      ((float float) (##core#inline "C_flonum_lessp" #(1) #(2)) ))
 
-- 
2.28.0