guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

41/66: doc: Add CiSE article.


From: Ludovic Courtès
Subject: 41/66: doc: Add CiSE article.
Date: Wed, 29 Jun 2022 11:32:02 -0400 (EDT)

civodul pushed a commit to branch master
in repository maintenance.

commit 455acc4f2e5adf639843d0f2588d14b3a33212b2
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Mar 5 14:48:33 2022 +0100

    doc: Add CiSE article.
    
    * doc/cise-2022: New directory.
    * doc/programming-2022/security.sbib (lamb2021:reproducibile):
    Coma-separate authors.
---
 doc/cise-2022/GNUmakefile                    |  21 +
 doc/cise-2022/cpu-tuning.skb                 | 601 +++++++++++++++++++++++++++
 doc/cise-2022/hpc.sbib                       |  96 +++++
 doc/cise-2022/images/cpu-simd-extensions.tex |  15 +
 doc/cise-2022/manifest.scm                   |  16 +
 doc/programming-2022/security.sbib           |   2 +-
 6 files changed, 750 insertions(+), 1 deletion(-)

diff --git a/doc/cise-2022/GNUmakefile b/doc/cise-2022/GNUmakefile
new file mode 100644
index 0000000..f305659
--- /dev/null
+++ b/doc/cise-2022/GNUmakefile
@@ -0,0 +1,21 @@
+SKRIBILO := skribilo
+PDFLATEX := pdflatex
+RUBBER   := rubber
+DOT      := dot
+DOT-OPTS := -Gratio=1.2 -Gwidth=15cm
+
+.DEFAULT_GOAL := cpu-tuning.pdf
+ILLUSTRATIONS :=                               \
+  images/commit-graph.pdf                      \
+  images/commit-graph-intro.pdf
+
+supply-chain.pdf: $(ILLUSTRATIONS)
+%.pdf: %.tex
+       $(RUBBER) --pdf -I $$PWD "$<"
+
+%.tex: %.skb
+       $(SKRIBILO)  -t latex -o "$@" "$<"
+
+%.pdf: %.dot
+       $(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp"
+       mv "$@.tmp" "$@"
diff --git a/doc/cise-2022/cpu-tuning.skb b/doc/cise-2022/cpu-tuning.skb
new file mode 100644
index 0000000..ff7220c
--- /dev/null
+++ b/doc/cise-2022/cpu-tuning.skb
@@ -0,0 +1,601 @@
+(use-modules (skribilo engine)
+             (skribilo engine latex)
+             (skribilo ast)
+             (skribilo writer)
+             (skribilo output)
+             (skribilo utils strings)
+             (skribilo lib)
+             (skribilo evaluator)
+             (skribilo biblio author)
+             (skribilo source)
+             (skribilo source lisp)
+             (skribilo source parameters)
+             (ice-9 match)
+             (rnrs io ports))
+
+(define (---) ; emdash
+  (resolve (lambda (n e env)
+             (if (engine-format? "html" e)
+                 (! "&mdash;")
+                 (! "---")))))
+
+(define (--) ; endash
+  (resolve (lambda (n e env)
+             (if (engine-format? "html" e)
+                 (! "&ndash;")
+                 (! "--")))))
+
+(define (dash-dash)
+  (resolve (lambda (n e env)
+             (if (engine-format? "latex" e)
+                 (! "{-}{-}")
+                 "--"))))
+
+(define (url url)
+  (ref :text (tt url) :url url))
+
+(define (=>)
+  (symbol "=>"))
+
+;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'.
+(define latex-tt-encoding
+  `((#\- "-\\-")
+    (#\h "h\\-")   ;“authorizations”, “authenticate”
+    ,@(@@ (skribilo engine latex) latex-tt-encoding)))
+
+(markup-writer 'tt (find-engine 'latex)
+   :before "{\\texttt{"
+   :action (lambda (n e)
+              (let ((ne (make-engine
+                           (gensym "latex")
+                           :delegate e
+                           :filter (make-string-replace latex-tt-encoding)
+                           :custom (engine-customs e)
+                           :symbol-table (engine-symbol-table e))))
+                 (output (markup-body n) ne)))
+   :after "}}")
+
+;; For pdflatex.
+(engine-custom-set! (find-engine 'latex) 'image-format '("pdf"))
+
+;; Avoid "option clash" with acmart.
+(engine-custom-set! (find-engine 'latex) 'hyperref #f)
+
+(engine-custom-set! (find-engine 'latex) 'usepackage
+                    (let ((u (engine-custom (find-engine 'latex)
+                                            'usepackage)))
+                      ;; See 
<https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing>
+                      ;; and 
<http://tug.org/pipermail/texhax/2010-September/015596.html>.
+                      (string-append u "\n"
+                                    "\\usepackage{inconsolata}\n"
+                                    "\\usepackage{tikz}\n"
+                                    "\\usetikzlibrary{arrows,shapes,shadows}\n"
+                                    
"\\definecolor{guixorange1}{RGB}{243,154,38}  % guixorange P\n"
+                                    "\\definecolor{guixblue2}{RGB}{10,50,80} % 
guixblue S\n"
+                                    "\\definecolor{guixred2}{RGB}{230,68,57}  
% red S\n"
+                                    
"\\definecolor{guixdarkgrey}{RGB}{46,47,55} % guixdarkgrey S\n"
+
+                                    ;; Trick so that ‘…’ is properly
+                                    ;; typeset inside teletype text.
+                                    
"\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n"
+
+                                    ;; Improve hyphenation.
+                                    "\\hyphenation{Open-PGP}\n")))
+
+(let ((latex (find-engine 'latex)))
+   (engine-custom-set! latex 'documentclass
+                       "\\documentclass{IEEEcsmag}")
+   (engine-custom-set! latex 'maketitle #f)
+
+   (markup-writer '&latex-author latex
+      :action (lambda (n e)
+                 (let ((body (markup-body n)))
+                    (for-each (lambda (a)
+                                 (display "\\author{{")
+                                 (output (markup-option a :name) e)
+                                 (display "}}\n\\affil{\n")
+                                 (output (markup-option a :affiliation) e)
+                                 (display "}\n\n"))
+                              (if (pair? body) body (list body))))))
+
+  (markup-writer 'image latex
+     :options '(:file :url :width :height :zoom)
+     :action (lambda (n e)
+               (format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n"
+                       (or (markup-option n :width) 0.5)
+                       (markup-option n :file))))
+
+  (markup-writer 'prog latex
+     :class "small"
+     :options '(:line :mark)
+     :before "\n\n\\vspace{3mm}\n\\begin{footnotesize}\n"
+     :action (lambda (n e)
+               ;; Delegate actual work to the "real" 'prog'.
+               (output (prog :line (markup-option n :line)
+                             :mark (markup-option n :mark)
+                             (node-body n))
+                       e))
+     :after "\n\\end{footnotesize}\n")
+
+  (markup-writer 'figure latex
+     :options '(:legend :number :multicolumns)
+     :action (lambda (n e)
+                (let ((ident (markup-ident n))
+                      (legend (markup-option n :legend))
+                      (mc (markup-option n :multicolumns)))
+                   (display "\\begin{figure}[ht]\n\\begin{scriptsize}\n")
+                   (output (markup-body n) e)
+                   (display "\n\\end{scriptsize}\n")
+                   (format #t "\\caption{\\label{~a}"
+                           (string-canonicalize ident))
+                   (output legend e)
+                   (display "}\\end{figure}\n"))))
+                   
+   (markup-writer '&bib-entry-author
+      :action (lambda (n e)
+                (let ((names (markup-body n)))
+                  (evaluate-document
+                   (if (string? names)
+                       (abbreviate-first-names
+                        names
+                        abbreviate-author-first-names)
+                       names)
+                   e)))))
+
+(define (word-count)
+  "Emit the word count."
+  (define (body-words body)
+    (match body
+      ((? string? str)
+       (length (string-tokenize str)))
+      ((? ast?) 0)  ;don’t double-count
+      ((things ...)
+       (apply + (map body-words things)))))
+
+  (color :fg "red" (bold
+     [,(resolve (lambda (n env engine)
+                   (ast-fold (lambda (n r)
+                               (cond ((is-markup? n 'figure)
+                                      (+ r 250))
+                                     ((and (is-markup? n 'chapter)
+                                           (equal? (markup-option n :title)
+                                                   "References"))
+                                      (+ r 250))
+                                     ((container? n)
+                                      r)
+                                     ((markup? n)
+                                      (let ((body (markup-body n)))
+                                        (+ r (body-words body))))
+                                     (else r)))
+                              (+ 250 250)  ;abstract + biography
+                              (ast-document n)))) words.])))
+                                
+
+(define (abstract . body)
+  (!latex "\n\\begin{abstract}\n$1\n\\end{abstract}\n\n" body))
+
+(bibliography "../els-2013/guix.sbib")
+(bibliography "../reppar-2015/reppar.sbib")
+(bibliography "../programming-2022/security.sbib")
+(bibliography "hpc.sbib")
+
+
+(document :title [Reproducibility and Performance: Why Choose?]
+   ;;[Conciliating Performance and Reproducibility]
+   :author (list (author :name "Ludovic Courtès"
+                    :affiliation "Inria"
+                    :address "Bordeaux, France"))
+
+   (abstract [Research processes often rely on high-performance
+computing (HPC), but HPC is often seen as antithetical to
+“reproducibility”: one would have to choose between software that
+achieves high performance, and software that can be deployed in a
+reproducible fashion.  However, by giving up on reproducibility we would
+give up on verifiability, a foundation of the scientific process.  How
+can we conciliate performance and reproducibility?  This article looks
+at two performance-critical aspects in HPC: message passing (MPI) and
+CPU micro-architecture tuning.  Engineering work that has gone into
+performance portability has already proved fruitful, but some areas
+remain unaddressed when it comes to CPU tuning.  We propose package
+multi-versioning, a technique developed for GNU Guix, a tool for
+reproducible software deployment, and show that it allows us to
+implement CPU tuning without compromising on reproducibility and
+provenance tracking.])
+
+   (!latex "\n\\maketitle\n")
+   (!latex "\n\\chapterinitial{Introduction.}\n")
+
+   ;; (word-count)
+
+   (p [It should come as no surprise that the execution speed of programs is a
+primary concern in high-performance computing (HPC).  Many HPC
+practitioners would tell you that, among their top concerns, is the
+performance of high-speed networks used by the Message Passing Interface
+(MPI) and use of the latest vectorization extensions of modern CPUs.])
+
+   (p [This article focuses on the latter: tuning code for specific CPU
+micro-architectures, to reap the benefits of modern CPUs.  This question
+is particularly acute in the context of GNU Guix, a software deployment
+tool with strong support for ,(emph [reproducible deployment]).  We like
+to present Guix as a key element of the reproducible research toolbox:
+as more research output is produced by software, the ability to ,(emph
+[verify and validate]) research results depends on the ability to ,(emph
+[re-deploy and re-run]) the software.  We present a recently-introduced
+CPU-tuning option for Guix, the design choices we made, and how this
+affects reproducibility.])
+
+   (p [But let us first consider this central question in the HPC and
+scientific community: can “reproducibility” be achieved ,(emph
+[without]) sacrificing performance?  Our answer is a resounding “yes”,
+but that deserves clarifications.])
+
+  (chapter :title [Reproducibility & High Performance]
+     :number #f
+
+     (p [The author remembers advice heard at the beginning of their
+career in HPC—advice still given today—: that to get optimal MPI
+performance, you would have to use the vendor-provided MPI library; that
+to get your code to perform well on this new cluster, you would have to
+recompile the complete software stack locally; that using generic,
+pre-built binaries from a GNU/Linux distribution will not give you good
+performance.])
+
+     (p [From a software engineering viewpoint, this looks like a sad
+situation and an inefficient approach, dismissing the benefits of
+automated software deployment as pioneered by Debian, Red Hat, and
+others in the 90’s or, more recently, as popularized with container
+images.  It also means doing away with reproducibility, where
+“reproducibility” is to be understood in two different ways: first as
+the ability to re-deploy the same software stack on another machine or
+at a different point in time, and second as the ability to ,(emph [verify]) 
that
+binaries being run match the source code—the latter is what reproducible
+builds are concerned with ,(ref :bib 'lamb2021:reproducible).])
+
+     (p [But does it really have to be this way?  Engineering efforts to
+support ,(emph [performance portability]) suggest otherwise.  A mature
+MPI implementation like Open MPI, today, does achieve performance
+portability: it takes advantage of high-speed networking hardware by
+determining, at run-time, which drivers to use to obtain optimal
+performance for the network at hand—no recompilation is needed ,(ref :bib
+'courtes2019:openmpi).])
+
+     (p [Likewise, generic, pre-built binaries can and indeed often do
+take advantage of modern CPUs by selecting at run-time the most
+efficient implementation of performance-sensitive routines for the host
+CPU ,(ref :bib 'courtes2018:prebuilt).  There are cases, though, where
+this is ,(emph [not]) the case; these are those we will focus on in the
+remainder of this article.]))
+
+   (chapter :title [The Jungle of SIMD Extensions]
+      :number #f
+
+      (p [While major CPU architectures such as x86_64, AArch64, and
+POWER9 were defined years ago, CPU vendors regularly extend them.
+Extensions that matter most in HPC are vector extensions: single
+instruction/multiple data (SIMD) instructions and registers.  In this
+area, a ,(emph [lot]) has happened on x86_64 CPUs since the baseline
+instruction set architecture (ISA) was defined.  As shown in ,(numref
+:text [Figure] :ident "fig-simd-extensions"), Intel and AMD have been
+tacking ever more powerful SIMD extensions to their CPUs over the years,
+from SSE3 to AVX-512, leading to a wealth of CPU “micro-architectures”.
+This gives a high-level view, but just looking at generations of Intel
+processors by their code name—from “Nehalem” to “Skylake” ,(it [via])
+“Ivybridge”—shows an already more complicated story.])
+
+      (figure :legend [Timeline of x86_64 SIMD extensions]
+         :ident "fig-simd-extensions"
+         :multicolumns #t
+         (!latex (call-with-input-file "images/cpu-simd-extensions.tex"
+                    get-string-all)))
+
+      (p [Linear algebra routines that scientific software relies on
+greatly benefit from SIMD extensions.  For example, on a modest Intel
+CORE i7 processor (of the Skylake generation), the AVX2-optimized
+version of the dense matrix multiplication routines of Eigen ,(ref :bib
+'guennebaud2022:eigen), built with GCC 10.3, peaks at about 40 Gflops/s,
+compared to 11 Gflops/s for its baseline x86_64 version—four times
+faster!]))
+
+   (chapter :title [Portable Performance Through Function Multi-Versioning]
+      :number #f
+
+      (p [How to create binaries that are portable, yet are able to get
+the most out of the CPU on which they are executed?  This has been an
+important question for distributors of binaries.  Distributions such as
+Debian and CentOS provide the convenience of fast automated deployment,
+thanks to pre-built binaries; asking users to either recompile part of
+their software stack or give up on performance is not a reasonable
+alternative.])
+
+      (p [To address this and achieve performance portability,
+developers have largely adopted ,(emph [function multi-versioning])
+(FMV): the implementation provides multiple versions of “hot” routines,
+one for each relevant CPU micro-architecture, and picks the best one for
+the host CPU at run time.  Many pieces of performance-critical software
+already use this technique: the C standard library (libc) contains
+multiple versions of its string handling and math routines, the GMP
+library for multi-precision arithmetic uses FMV, and so do software
+packages ranging from cryptography libraries (Libgcrypt, Nettle) to
+linear algebra (OpenBLAS, FFTW).])
+
+      (p [To make it easier for developers to adopt FMV, the GNU
+compilation tool chain (GCC, the Binary Utilities, and the C Library),
+which is widely used in HPC, provides helpers at different levels.
+Developers can annotate relevant functions with the ,(tt [target_clone])
+attribute to instruct the compiler to generate optimized versions of the
+function for each selected architecture.  GCC not only generates these
+versions, but also generates code to choose the right function version
+for the host CPU at load time, with support from the dynamic linker,
+,(tt [ld.so]).  That relieves developers from the need to implement
+their own ad-hoc machinery.  From that perspective, it would seem that
+performance portability, ,(it [via]) FMV, is a solved problem.])
+
+   #;(stuff on auto-fmv commented out!
+
+      (p [To make the case for FMV, we wanted to see what it would take us to
+actually add FMV support to code that would benefit from it.  In the
+spirit of the Clear Linux automatic FMV patch
+generator (https://github.com/clearlinux/make-fmv-patch), we wrote an
+automatic FMV tool for
+Guix (https://gitlab.inria.fr/guix-hpc/function-multi-versioning): you
+would give it a package name, and it would:])
+
+      (itemize
+         (item [Build the package with the ,(tt [-fopt-info-vec]) compiler 
flag to gather
+     information about vectorization opportunities and their source code
+     location.])
+
+         (item [Generate a patch that, for each C function with vectorization
+     opportunities, adds the ,(tt [target_clone])
+     attribute to generate a couple of vectorized versions—generic,
+AVX2, and
+     AVX-512.])
+
+         (item [Build the package with this FMV patch.]))
+
+      (p [The tool can successfully FMV-patch a variety of packages
+written in C, such as the GNU Scientific Library (FIXME
+https://www.gnu.org/software/gsl), which contains plain sequential
+implementations of a variety of math routines.  It was an exciting
+engineering experiment… but we found it to be all too often
+inapplicable, for two reasons: performance-critical software already
+does FMV, or it is not written in C.]))
+
+      (p [There is at least one common pattern though where FMV is not
+applicable, or at least is not applied: C++ header-only libraries.
+These are libraries that provide generic template code in header files;
+that code is specialized ,(emph [at build time]) in software that uses
+them.  There is no shortage of C++ header-only math libraries providing
+efficient, optimized SIMD versions of their routines: Eigen, MIPP, xsimd
+and xtensor, SIMD Everywhere (SIMDe), Highway, and many more.  All
+these, except Highway, have in common that they do ,(emph [not]) support
+FMV.  Since they “just” provide headers, it is up to ,(emph [each])
+package using them to figure out what to do in terms of performance
+portability.])
+
+      (p [In practice though, software using these C++ header-only
+libraries rarely makes provisions for performance portability.  Thus,
+when compiling those packages for the baseline ISA, one misses out on
+all the vectorized implementations that libraries like Eigen provide.
+This is a known issue in search of a solution ,(ref :bib
+'larsen2021:eigen-fmv).  It can have a very concrete impact on
+performance since many scientific packages—the ARPACK-NG library for
+solving eigenvalue problems, the Ceres solver for optimization problems,
+the FEniCSx platform for solving differential equations, to name a
+few—depend on Eigen.])
+
+      #;(p [Fundamentally, run-time dispatch is at odds with the 
all-compile-time
+approach that header-only C++ template libraries are about.
+Furthermore, Eigen, for example, supports fine-grain vectorization; it
+may be used to operate on small matrices, as is common in computer
+graphics, and in that case inlining matrix operations is key to good
+performance—run-time dispatch would have to be done at a higher
+level.]))
+
+   (chapter :title [Reproducible Deployment]
+      :number #f
+
+      (p [Distributions such as Debian and Fedora that provide pre-built
+binaries miss out on SIMD optimizations of C++ header-only libraries
+like Eigen because they provide binaries targeting the baseline CPU
+architecture so that those binaries run on any CPU.  The Spack ,(ref
+:bib 'gamblin2015:spack) and EasyBuild ,(ref :bib 'geimer2014:easybuild)
+package managers address that by ,(emph [rebuilding]) software on the
+target computer, which allows them to instruct the compiler to optimize
+for the host CPU.])
+
+      (p [Unfortunately, EasyBuild and Spack both have limited support
+for reproducible deployment—they do not, in general, guarantee that you
+can redeploy the same software environment on different machines, or at
+different points in time.  This is because they build upon software
+provided by the host system—the compiler tool chain, “system” libraries,
+etc.—and that foundation differs from one system to another—e.g., CentOS
+might provide some version of GCC, and Ubuntu might provide another.])
+
+      (p [To avoid that, Guix builds software in ,(emph [isolated
+environments]), as pioneered by Nix ,(ref :bib '(dolstra2004:nix
+courtes2013:functional)), and its package collection is ,(emph
+[self-contained])—it does not rely on external software packages.  This
+is what makes Guix builds reproducible bit-for-bit—or in other words,
+,(emph [verifiable]) ,(ref :bib 'lamb2021:reproducible).  Given binaries
+and provenance data, anyone can independently verify the
+binary/source-code correspondence.])
+
+      (p [Guix provides a command-line interface similar to that of
+other package managers: ,(tt [guix install python]), for instance,
+installs the Python interpreter.  Package management is per-user rather
+than system-wide and does not require system administrator privileges,
+which makes it suitable for multi-user HPC clusters ,(ref :bib
+'courtes2015:reproducible).  To offer the level of flexibility that HPC
+users expect, Guix lets users customize packages ,(it [via]) ,(emph
+[package transformation options]) on the command line—for instance to
+swap two packages in the dependency graph—or through programming
+interfaces ,(ref :bib 'courtes2015:reproducible).])
+
+      (p [Quite uniquely, Guix supports ,(emph [“time traveling”]): with
+,(tt [guix time-machine]), users can run a specific revision of Guix and
+use it to deploy packages as they were defined in that revision.  The
+typical use case is redeploying software that was used to produce
+computational results for a scientific publication ,(ref :bib
+'(hinsen2020:staged-computation courtes2020:storage
+perkel2020:challenge)).  The command below deploys Python, NumPy, and
+their dependencies as they were defined in a Guix revision from October
+2021:])
+      
+      (prog :class "small" :line #f [
+guix time-machine --commit=b0735c79b0d1d341 -- \\
+  shell python python-numpy
+])
+
+      (p [Whether you run it today or two years from now, it will deploy
+the ,(emph [exact same binaries]), bit-for-bit, down to the C
+library.]))
+
+   (chapter :title [Package Multi-Versioning]
+      :number #f
+
+      (p [With our packaging hammer, one could envision a solution to
+these CPU tuning problems: if we cannot do function multi-versioning,
+what about implementing ,(emph [package]) multi-versioning?  Guix makes
+it easy to define package variants, so we can define package variants
+optimized for a specific CPU—compiled with ,(tt [-march=skylake]), for
+instance.  What we need is to define those variants “on the fly”.])
+
+      (p [The recently-introduced ,(tt [--tune]) package transformation
+option works along those lines.  Users can pass ,(tt [--tune]) to any of
+the command-line tools (,(tt [guix install]), ,(tt [guix shell]), etc.)
+and that causes “tunable” packages to be optimized for the host CPU.
+For example, here is how you would run Eigen’s matrix multiplication
+benchmark from the ,(tt [eigen-benchmarks]) package with
+micro-architecture tuning:])
+
+      (prog :class "small" :line #f [
+$ guix shell --tune eigen-benchmarks -- \\
+    benchBlasGemm 240 240 240
+guix shell: tuning for CPU skylake
+240 x 240 x 240
+cblas: 0.208547 (15.908 GFlops/s)
+eigen : 0.0720303 (46.06 GFlops/s)
+l1: 32768
+l2: 262144
+])
+
+      (p [,(tt [--tune]) determines the name of the host CPU as
+recognized by GCC’s (and Clang’s) ,(tt [-march]) option.  Users can
+override auto-detection by passing a CPU name—e.g., ,(tt
+[--tune=skylake-avx512]).  As mentioned earlier, we made the conscious
+choice of letting ,(tt [--tune]) affect solely software that packagers
+explicitly marked as “tunable”.  This ensures Guix does not end up
+rebuilding packages that could not possibly benefit from
+micro-architecture-specific optimizations, which would be a waste of
+resources.])
+
+      #;(p [(For the same
+reason, we rejected the idea of defining separate system types for the
+various x86_64 CPU micro-architectures the way Nix 2.4 did (FIXME
+https://discourse.nixos.org/t/nix-2-4-released/15822#other-features-2).)])
+
+      #;(p [In the spirit of avoiding needless package rebuilds, ,(tt [--tune])
+leverages the “graft” mechanism (XREF
+https://guix.gnu.org/manual/en/html_node/Security-Updates.html): package
+variants are ,(emph [grafted]) to the dependency graph, such that dependents of
+a tuned package do not need to be rebuilt.  To illustrate that, consider
+the figure below:])
+
+      ;;![Dependency graph of OpenCV, where the tuned variant of VTK is 
grafted.](/static/images/blog/cpu-tuning-graft.png)
+
+      #;(p [OpenCV depends on VTK, which depends on Eigen, as shown by the
+dotted arrows.  VTK is marked as tunable so it can benefit from SIMD
+optimizations in Eigen.  When ,(tt [--tune]) is passed, the optimized variant
+of VTK built with ,(tt [-march=skylake]) is generated and grafted onto the
+dependency graph, such that OpenCV itself does not need to be recompiled
+and instead is relinked against the optimized VTK variant.])
+
+      (p [This implementation of package multi-versioning does not
+sacrifice reproducibility.  When ,(tt [--tune]) is used, from Guix’s
+viewpoint, it is just an alternate, but well-defined dependency graph
+that gets built.  Guix records package transformation options that were
+used so it can “replay” them.  For example, one can export a ,(emph
+[manifest]) representing packages that have been deployed:])
+
+      (prog :class "small" :line #f [
+$ guix shell eigen-benchmarks --tune
+guix shell: tuning for CPU skylake
+\[env\]$ guix package --export-manifest \\
+              -p $GUIX_ENVIRONMENT
+(use-modules (guix transformations))
+
+(define transform1
+  (options->transformation
+    '((tune . "skylake"))))
+
+(packages->manifest
+  (list (transform1
+          (specification->package
+            "eigen-benchmarks"))))
+])
+
+      (p [The manifest above is a code snippet that can be passed to
+,(tt [guix shell]) or ,(tt [guix package]) to redeploy the package with
+the same tuning parameters.  Like other transformation options, ,(tt
+[--tune]) is accepted by all the commands; for example, here is how you
+would build a Docker image tuned for a particular CPU:])
+
+      (prog :class "small" :line #f [
+guix pack -f docker -S /bin=bin \
+  eigen-benchmarks --tune=skylake
+])
+
+      #;(p [This comes in handy if you want to prepare an image to run on
+another cluster, where you know you can rely on a given CPU extension.])
+
+      #;(p [The Guix build farm is set up to build a few optimized package
+variants.  That way, users of ,(tt [--tune]) are likely to get pre-built
+binaries even for the optimized variants, making deployment just as fast
+as with non-tuned packages.  To achieve this, ,(tt [--tune]) skips
+running test suites when building packages: we cannot be sure that build
+machines implement the CPU micro-architecture at hand.]))
+
+   (chapter :title [Conclusion and Outlook]
+      :number #f
+
+      (p [We implemented what we call “package multi-versioning” for
+C/C++ software that lacks function multi-versioning and run-time
+dispatch, a notable example of which is optimized C++ header-only
+libraries.  It is another way to ensure that users do not have to trade
+reproducibility for performance.])
+
+      ;; refs:
+      ;; (FIXME https://docs.julialang.org/en/v1/devdocs/sysimg/)
+      ;; (FIXME https://docs.rs/multiversion/0.6.1/multiversion/)
+      (p [The scientific programming landscape has been evolving over
+the last few years.  It is encouraging to see that Julia offers function
+multi-versioning for its “system image”, and that, similarly, Rust
+supports it with annotations similar to GCC’s ,(tt [target_clones]).
+Hopefully these new development environments will support performance
+portability well enough that users and packagers will not need to worry
+about it.])
+      
+      (p [But first and foremost, it is up to us, research software
+engineers and scientists, to dispel the myth that performance is a valid
+excuse for non-reproducible computational workflows.]))
+
+   (chapter :title "References"
+      :number #f
+      (flush :side 'left
+      (the-bibliography
+      :sort bib-sort/first-author-last-name)))
+   
+   (!latex 
+    "\n\\begin{IEEEbiography}{Ludovic Courtès}\n$1\n\\end{IEEEbiography}\n"
+    [is a research software engineer at Inria, France.  He has been
+contributing to the development of GNU Guix since its inception in 2012
+and works on its use in support of reproducible research workflows.  He
+holds a PhD in computer science from LAAS-CNRS.  You can reach him at
+,(it [ludovic.courtes@inria.fr]).]))
+
+;; Local Variables:
+;; ispell-local-dictionary: "american"
+;; compile-command: "guix shell -m manifest.scm -- make -j5"
+;; eval: (setq indent-tabs-mode nil)
+;; End:
diff --git a/doc/cise-2022/hpc.sbib b/doc/cise-2022/hpc.sbib
new file mode 100644
index 0000000..8094028
--- /dev/null
+++ b/doc/cise-2022/hpc.sbib
@@ -0,0 +1,96 @@
+(misc courtes2019:openmpi
+  (title "Optimized and Portable Open MPI Packaging")
+  (author "Ludovic Courtès")
+  (year "2019")
+  (month "December")
+  (url 
"https://hpc.guix.info/blog/2019/12/optimized-and-portable-open-mpi-packaging/";))
+
+(misc courtes2018:prebuilt
+  (title "Pre-Built Binaries vs. Performance")
+  (author "Ludovic Courtès")
+  (year "2018")
+  (month "January")
+  (url 
"https://hpc.guix.info/blog/2018/01/pre-built-binaries-vs-performance/";))
+
+(misc guennebaud2022:eigen
+  (title "Eigen C++ linear algebra library")
+  (author "Gaël Guennebaud, Benoît Jacob, et al.")
+  (year "2022")
+  (month "March")
+  (url "https://eigen.tuxfamily.org";))
+  
+(inproceedings cassagne2018:mipp
+  (author "Adrien Cassagne, Olivier Aumage, Denis Barthou, Camille Leroux, 
Christophe Jégo")
+  (title "MIPP: A Portable C++ SIMD Wrapper and Its Use for Error Correction 
Coding in 5G Standard")
+  (year "2018")
+  (isbn "9781450356466")
+  (publisher "Association for Computing Machinery")
+  (address "New York, NY, USA")
+  (url "https://doi.org/10.1145/3178433.3178435";)
+  (doi "10.1145/3178433.3178435")
+  (booktitle "Proceedings of the 2018 4th Workshop on Programming Models for 
SIMD/Vector Processing")
+  (articleno "2")
+  (numpages "8")
+  (keywords "wrapper, channel code, C++, SSE, SIMD, AVX-512, NEON, AVX")
+  (location "Vienna, Austria")
+  (series "WPMVP'18"))
+       
+(misc larsen2021:eigen-fmv
+  (title "Linking modules compiled for different SIMD instruction sets")
+  (author "Rasmus Munk Larsen, Benoît Jacob, Antonio Sánchez")
+  (url "https://gitlab.com/libeigen/eigen/-/issues/2344";)
+  (year "2021")
+  (month "October"))
+
+(inproceedings gamblin2015:spack
+  (author "Todd Gamblin, Matthew LeGendre, Michael R. Collette, Gregory L. 
Lee, Adam Moody, Bronis R. de Supinski, Scott Futral")
+  (title "The Spack Package Manager: Bringing Order to HPC Software Chaos")
+  (year "2015")
+  (isbn "9781450337236")
+  (publisher "Association for Computing Machinery")
+  (address "New York, NY, USA")
+  (url "https://doi.org/10.1145/2807591.2807623";)
+  (doi "10.1145/2807591.2807623")
+  (booktitle "Proceedings of the International Conference for High Performance 
Computing, Networking, Storage and Analysis")
+  (articleno "40")
+  (numpages "12")
+  (location "Austin, Texas")
+  (series "SC '15"))
+
+(inproceedings courtes2015:reproducible
+  (url "https://hal.inria.fr/hal-01161771/en";)
+  (series "Lecture Notes in Computer Science")
+  (month "August")
+  (pages "579--591")
+  (year "2015")
+  (booktitle "Euro-Par 2015: Parallel Processing Workshops")
+  (author "Ludovic Courtès, Ricardo Wurmus")
+  (title "Reproducible and User-Controlled Software Environments in HPC with 
Guix"))
+
+
+(article courtes2020:storage
+  (note "https://doi.org/10.5281/zenodo.3886739";)
+  (doi "10.5281/zenodo.3886739")
+  (month "June")
+  (year "2020")
+  (number "1")
+  (volume "6")
+  (journal "ReScience C")
+  (author "Ludovic Courtès")
+  (title "[Re] Storage Tradeoffs in a Collaborative Backup Service for Mobile 
Devices"))
+
+(article perkel2020:challenge
+  (month "August")
+  (year "2020")
+  (note "https://www.nature.com/articles/d41586-020-02462-7";)
+  (journal "Nature")
+  (author "Jeffrey M. Perkel")
+  (title "Challenge to Scientists: Does Your Ten-Year-Old Code Still Run?"))
+
+#|
+(defun skr-from-bibtex ()
+  "Vaguely convert the BibTeX snippets after POINT to SBibTeX."
+  (interactive)
+  (while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$" 
nil nil)
+    (replace-match "(\\1 \"\\2\")")))
+|#
diff --git a/doc/cise-2022/images/cpu-simd-extensions.tex 
b/doc/cise-2022/images/cpu-simd-extensions.tex
new file mode 100644
index 0000000..e9474b1
--- /dev/null
+++ b/doc/cise-2022/images/cpu-simd-extensions.tex
@@ -0,0 +1,15 @@
+  \begin{tikzpicture}[
+        box/.style = { fill=guixblue2, text=white, inner sep=3mm, rounded 
corners, font=\bf\sf }
+      ]
+    \matrix[row sep=6mm, column sep=6mm] {
+      \node(sse2) [box, draw=guixorange1, thick] {SSE2 (ca. 2003)}; & & 
\node(avx512) [box] {AVX-512 (2013)}; \\
+      \node(sse3) [box] {SSE3}; & \node {\large\textbf{x86\_64}}; & 
\node(avx2) [box] {AVX2}; \\
+      \node(ssse3) [box] {SSSE3}; & & \node(avx) [box] {AVX}; \\
+    };
+
+    \path[very thick, draw=guixorange1] (sse2) edge [->] (sse3);
+    \path[very thick, draw=guixorange1] (sse3) edge [->] (ssse3);
+    \path[very thick, draw=guixorange1] (ssse3) edge [->] (avx);
+    \path[very thick, draw=guixorange1] (avx) edge [->] (avx2);
+    \path[very thick, draw=guixorange1] (avx2) edge [->] (avx512);
+  \end{tikzpicture}
diff --git a/doc/cise-2022/manifest.scm b/doc/cise-2022/manifest.scm
new file mode 100644
index 0000000..b89f6c9
--- /dev/null
+++ b/doc/cise-2022/manifest.scm
@@ -0,0 +1,16 @@
+(specifications->manifest
+ '("rubber"
+
+   "texlive-base"
+   "texlive-latex-wrapfig"
+
+   "texlive-microtype"
+   "texlive-latex-listings" "texlive-hyperref"
+
+   ;; PGF/TikZ
+   "texlive-latex-pgf"
+
+   ;; Additional fonts.
+   "texlive-cm-super" "texlive-amsfonts"
+   "texlive-inconsolata" "texlive-latex-xkeyval" "texlive-latex-upquote"
+   "texlive-times" "texlive-helvetic" "texlive-courier"))
diff --git a/doc/programming-2022/security.sbib 
b/doc/programming-2022/security.sbib
index c5a18b6..1de465b 100644
--- a/doc/programming-2022/security.sbib
+++ b/doc/programming-2022/security.sbib
@@ -1,5 +1,5 @@
 (article lamb2021:reproducible
-  (author "Chris Lamb and Stefano Zacchiroli")
+  (author "Chris Lamb, Stefano Zacchiroli")
   (title "Reproducible Builds: Increasing the Integrity of Software Supply 
Chains")
   (publisher "IEEE Computer Society")
   (year "2021")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]