[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
41/66: doc: Add CiSE article.
From: |
Ludovic Courtès |
Subject: |
41/66: doc: Add CiSE article. |
Date: |
Wed, 29 Jun 2022 11:32:02 -0400 (EDT) |
civodul pushed a commit to branch master
in repository maintenance.
commit 455acc4f2e5adf639843d0f2588d14b3a33212b2
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Mar 5 14:48:33 2022 +0100
doc: Add CiSE article.
* doc/cise-2022: New directory.
* doc/programming-2022/security.sbib (lamb2021:reproducibile):
Coma-separate authors.
---
doc/cise-2022/GNUmakefile | 21 +
doc/cise-2022/cpu-tuning.skb | 601 +++++++++++++++++++++++++++
doc/cise-2022/hpc.sbib | 96 +++++
doc/cise-2022/images/cpu-simd-extensions.tex | 15 +
doc/cise-2022/manifest.scm | 16 +
doc/programming-2022/security.sbib | 2 +-
6 files changed, 750 insertions(+), 1 deletion(-)
diff --git a/doc/cise-2022/GNUmakefile b/doc/cise-2022/GNUmakefile
new file mode 100644
index 0000000..f305659
--- /dev/null
+++ b/doc/cise-2022/GNUmakefile
@@ -0,0 +1,21 @@
+SKRIBILO := skribilo
+PDFLATEX := pdflatex
+RUBBER := rubber
+DOT := dot
+DOT-OPTS := -Gratio=1.2 -Gwidth=15cm
+
+.DEFAULT_GOAL := cpu-tuning.pdf
+ILLUSTRATIONS := \
+ images/commit-graph.pdf \
+ images/commit-graph-intro.pdf
+
+supply-chain.pdf: $(ILLUSTRATIONS)
+%.pdf: %.tex
+ $(RUBBER) --pdf -I $$PWD "$<"
+
+%.tex: %.skb
+ $(SKRIBILO) -t latex -o "$@" "$<"
+
+%.pdf: %.dot
+ $(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp"
+ mv "$@.tmp" "$@"
diff --git a/doc/cise-2022/cpu-tuning.skb b/doc/cise-2022/cpu-tuning.skb
new file mode 100644
index 0000000..ff7220c
--- /dev/null
+++ b/doc/cise-2022/cpu-tuning.skb
@@ -0,0 +1,601 @@
+(use-modules (skribilo engine)
+ (skribilo engine latex)
+ (skribilo ast)
+ (skribilo writer)
+ (skribilo output)
+ (skribilo utils strings)
+ (skribilo lib)
+ (skribilo evaluator)
+ (skribilo biblio author)
+ (skribilo source)
+ (skribilo source lisp)
+ (skribilo source parameters)
+ (ice-9 match)
+ (rnrs io ports))
+
+(define (---) ; emdash
+ (resolve (lambda (n e env)
+ (if (engine-format? "html" e)
+ (! "—")
+ (! "---")))))
+
+(define (--) ; endash
+ (resolve (lambda (n e env)
+ (if (engine-format? "html" e)
+ (! "–")
+ (! "--")))))
+
+(define (dash-dash)
+ (resolve (lambda (n e env)
+ (if (engine-format? "latex" e)
+ (! "{-}{-}")
+ "--"))))
+
+(define (url url)
+ (ref :text (tt url) :url url))
+
+(define (=>)
+ (symbol "=>"))
+
+;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'.
+(define latex-tt-encoding
+ `((#\- "-\\-")
+ (#\h "h\\-") ;“authorizations”, “authenticate”
+ ,@(@@ (skribilo engine latex) latex-tt-encoding)))
+
+(markup-writer 'tt (find-engine 'latex)
+ :before "{\\texttt{"
+ :action (lambda (n e)
+ (let ((ne (make-engine
+ (gensym "latex")
+ :delegate e
+ :filter (make-string-replace latex-tt-encoding)
+ :custom (engine-customs e)
+ :symbol-table (engine-symbol-table e))))
+ (output (markup-body n) ne)))
+ :after "}}")
+
+;; For pdflatex.
+(engine-custom-set! (find-engine 'latex) 'image-format '("pdf"))
+
+;; Avoid "option clash" with acmart.
+(engine-custom-set! (find-engine 'latex) 'hyperref #f)
+
+(engine-custom-set! (find-engine 'latex) 'usepackage
+ (let ((u (engine-custom (find-engine 'latex)
+ 'usepackage)))
+ ;; See
<https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing>
+ ;; and
<http://tug.org/pipermail/texhax/2010-September/015596.html>.
+ (string-append u "\n"
+ "\\usepackage{inconsolata}\n"
+ "\\usepackage{tikz}\n"
+ "\\usetikzlibrary{arrows,shapes,shadows}\n"
+
"\\definecolor{guixorange1}{RGB}{243,154,38} % guixorange P\n"
+ "\\definecolor{guixblue2}{RGB}{10,50,80} %
guixblue S\n"
+ "\\definecolor{guixred2}{RGB}{230,68,57}
% red S\n"
+
"\\definecolor{guixdarkgrey}{RGB}{46,47,55} % guixdarkgrey S\n"
+
+ ;; Trick so that ‘…’ is properly
+ ;; typeset inside teletype text.
+
"\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n"
+
+ ;; Improve hyphenation.
+ "\\hyphenation{Open-PGP}\n")))
+
+(let ((latex (find-engine 'latex)))
+ (engine-custom-set! latex 'documentclass
+ "\\documentclass{IEEEcsmag}")
+ (engine-custom-set! latex 'maketitle #f)
+
+ (markup-writer '&latex-author latex
+ :action (lambda (n e)
+ (let ((body (markup-body n)))
+ (for-each (lambda (a)
+ (display "\\author{{")
+ (output (markup-option a :name) e)
+ (display "}}\n\\affil{\n")
+ (output (markup-option a :affiliation) e)
+ (display "}\n\n"))
+ (if (pair? body) body (list body))))))
+
+ (markup-writer 'image latex
+ :options '(:file :url :width :height :zoom)
+ :action (lambda (n e)
+ (format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n"
+ (or (markup-option n :width) 0.5)
+ (markup-option n :file))))
+
+ (markup-writer 'prog latex
+ :class "small"
+ :options '(:line :mark)
+ :before "\n\n\\vspace{3mm}\n\\begin{footnotesize}\n"
+ :action (lambda (n e)
+ ;; Delegate actual work to the "real" 'prog'.
+ (output (prog :line (markup-option n :line)
+ :mark (markup-option n :mark)
+ (node-body n))
+ e))
+ :after "\n\\end{footnotesize}\n")
+
+ (markup-writer 'figure latex
+ :options '(:legend :number :multicolumns)
+ :action (lambda (n e)
+ (let ((ident (markup-ident n))
+ (legend (markup-option n :legend))
+ (mc (markup-option n :multicolumns)))
+ (display "\\begin{figure}[ht]\n\\begin{scriptsize}\n")
+ (output (markup-body n) e)
+ (display "\n\\end{scriptsize}\n")
+ (format #t "\\caption{\\label{~a}"
+ (string-canonicalize ident))
+ (output legend e)
+ (display "}\\end{figure}\n"))))
+
+ (markup-writer '&bib-entry-author
+ :action (lambda (n e)
+ (let ((names (markup-body n)))
+ (evaluate-document
+ (if (string? names)
+ (abbreviate-first-names
+ names
+ abbreviate-author-first-names)
+ names)
+ e)))))
+
+(define (word-count)
+ "Emit the word count."
+ (define (body-words body)
+ (match body
+ ((? string? str)
+ (length (string-tokenize str)))
+ ((? ast?) 0) ;don’t double-count
+ ((things ...)
+ (apply + (map body-words things)))))
+
+ (color :fg "red" (bold
+ [,(resolve (lambda (n env engine)
+ (ast-fold (lambda (n r)
+ (cond ((is-markup? n 'figure)
+ (+ r 250))
+ ((and (is-markup? n 'chapter)
+ (equal? (markup-option n :title)
+ "References"))
+ (+ r 250))
+ ((container? n)
+ r)
+ ((markup? n)
+ (let ((body (markup-body n)))
+ (+ r (body-words body))))
+ (else r)))
+ (+ 250 250) ;abstract + biography
+ (ast-document n)))) words.])))
+
+
+(define (abstract . body)
+ (!latex "\n\\begin{abstract}\n$1\n\\end{abstract}\n\n" body))
+
+(bibliography "../els-2013/guix.sbib")
+(bibliography "../reppar-2015/reppar.sbib")
+(bibliography "../programming-2022/security.sbib")
+(bibliography "hpc.sbib")
+
+
+(document :title [Reproducibility and Performance: Why Choose?]
+ ;;[Conciliating Performance and Reproducibility]
+ :author (list (author :name "Ludovic Courtès"
+ :affiliation "Inria"
+ :address "Bordeaux, France"))
+
+ (abstract [Research processes often rely on high-performance
+computing (HPC), but HPC is often seen as antithetical to
+“reproducibility”: one would have to choose between software that
+achieves high performance, and software that can be deployed in a
+reproducible fashion. However, by giving up on reproducibility we would
+give up on verifiability, a foundation of the scientific process. How
+can we conciliate performance and reproducibility? This article looks
+at two performance-critical aspects in HPC: message passing (MPI) and
+CPU micro-architecture tuning. Engineering work that has gone into
+performance portability has already proved fruitful, but some areas
+remain unaddressed when it comes to CPU tuning. We propose package
+multi-versioning, a technique developed for GNU Guix, a tool for
+reproducible software deployment, and show that it allows us to
+implement CPU tuning without compromising on reproducibility and
+provenance tracking.])
+
+ (!latex "\n\\maketitle\n")
+ (!latex "\n\\chapterinitial{Introduction.}\n")
+
+ ;; (word-count)
+
+ (p [It should come as no surprise that the execution speed of programs is a
+primary concern in high-performance computing (HPC). Many HPC
+practitioners would tell you that, among their top concerns, is the
+performance of high-speed networks used by the Message Passing Interface
+(MPI) and use of the latest vectorization extensions of modern CPUs.])
+
+ (p [This article focuses on the latter: tuning code for specific CPU
+micro-architectures, to reap the benefits of modern CPUs. This question
+is particularly acute in the context of GNU Guix, a software deployment
+tool with strong support for ,(emph [reproducible deployment]). We like
+to present Guix as a key element of the reproducible research toolbox:
+as more research output is produced by software, the ability to ,(emph
+[verify and validate]) research results depends on the ability to ,(emph
+[re-deploy and re-run]) the software. We present a recently-introduced
+CPU-tuning option for Guix, the design choices we made, and how this
+affects reproducibility.])
+
+ (p [But let us first consider this central question in the HPC and
+scientific community: can “reproducibility” be achieved ,(emph
+[without]) sacrificing performance? Our answer is a resounding “yes”,
+but that deserves clarifications.])
+
+ (chapter :title [Reproducibility & High Performance]
+ :number #f
+
+ (p [The author remembers advice heard at the beginning of their
+career in HPC—advice still given today—: that to get optimal MPI
+performance, you would have to use the vendor-provided MPI library; that
+to get your code to perform well on this new cluster, you would have to
+recompile the complete software stack locally; that using generic,
+pre-built binaries from a GNU/Linux distribution will not give you good
+performance.])
+
+ (p [From a software engineering viewpoint, this looks like a sad
+situation and an inefficient approach, dismissing the benefits of
+automated software deployment as pioneered by Debian, Red Hat, and
+others in the 90’s or, more recently, as popularized with container
+images. It also means doing away with reproducibility, where
+“reproducibility” is to be understood in two different ways: first as
+the ability to re-deploy the same software stack on another machine or
+at a different point in time, and second as the ability to ,(emph [verify])
that
+binaries being run match the source code—the latter is what reproducible
+builds are concerned with ,(ref :bib 'lamb2021:reproducible).])
+
+ (p [But does it really have to be this way? Engineering efforts to
+support ,(emph [performance portability]) suggest otherwise. A mature
+MPI implementation like Open MPI, today, does achieve performance
+portability: it takes advantage of high-speed networking hardware by
+determining, at run-time, which drivers to use to obtain optimal
+performance for the network at hand—no recompilation is needed ,(ref :bib
+'courtes2019:openmpi).])
+
+ (p [Likewise, generic, pre-built binaries can and indeed often do
+take advantage of modern CPUs by selecting at run-time the most
+efficient implementation of performance-sensitive routines for the host
+CPU ,(ref :bib 'courtes2018:prebuilt). There are cases, though, where
+this is ,(emph [not]) the case; these are those we will focus on in the
+remainder of this article.]))
+
+ (chapter :title [The Jungle of SIMD Extensions]
+ :number #f
+
+ (p [While major CPU architectures such as x86_64, AArch64, and
+POWER9 were defined years ago, CPU vendors regularly extend them.
+Extensions that matter most in HPC are vector extensions: single
+instruction/multiple data (SIMD) instructions and registers. In this
+area, a ,(emph [lot]) has happened on x86_64 CPUs since the baseline
+instruction set architecture (ISA) was defined. As shown in ,(numref
+:text [Figure] :ident "fig-simd-extensions"), Intel and AMD have been
+tacking ever more powerful SIMD extensions to their CPUs over the years,
+from SSE3 to AVX-512, leading to a wealth of CPU “micro-architectures”.
+This gives a high-level view, but just looking at generations of Intel
+processors by their code name—from “Nehalem” to “Skylake” ,(it [via])
+“Ivybridge”—shows an already more complicated story.])
+
+ (figure :legend [Timeline of x86_64 SIMD extensions]
+ :ident "fig-simd-extensions"
+ :multicolumns #t
+ (!latex (call-with-input-file "images/cpu-simd-extensions.tex"
+ get-string-all)))
+
+ (p [Linear algebra routines that scientific software relies on
+greatly benefit from SIMD extensions. For example, on a modest Intel
+CORE i7 processor (of the Skylake generation), the AVX2-optimized
+version of the dense matrix multiplication routines of Eigen ,(ref :bib
+'guennebaud2022:eigen), built with GCC 10.3, peaks at about 40 Gflops/s,
+compared to 11 Gflops/s for its baseline x86_64 version—four times
+faster!]))
+
+ (chapter :title [Portable Performance Through Function Multi-Versioning]
+ :number #f
+
+ (p [How to create binaries that are portable, yet are able to get
+the most out of the CPU on which they are executed? This has been an
+important question for distributors of binaries. Distributions such as
+Debian and CentOS provide the convenience of fast automated deployment,
+thanks to pre-built binaries; asking users to either recompile part of
+their software stack or give up on performance is not a reasonable
+alternative.])
+
+ (p [To address this and achieve performance portability,
+developers have largely adopted ,(emph [function multi-versioning])
+(FMV): the implementation provides multiple versions of “hot” routines,
+one for each relevant CPU micro-architecture, and picks the best one for
+the host CPU at run time. Many pieces of performance-critical software
+already use this technique: the C standard library (libc) contains
+multiple versions of its string handling and math routines, the GMP
+library for multi-precision arithmetic uses FMV, and so do software
+packages ranging from cryptography libraries (Libgcrypt, Nettle) to
+linear algebra (OpenBLAS, FFTW).])
+
+ (p [To make it easier for developers to adopt FMV, the GNU
+compilation tool chain (GCC, the Binary Utilities, and the C Library),
+which is widely used in HPC, provides helpers at different levels.
+Developers can annotate relevant functions with the ,(tt [target_clone])
+attribute to instruct the compiler to generate optimized versions of the
+function for each selected architecture. GCC not only generates these
+versions, but also generates code to choose the right function version
+for the host CPU at load time, with support from the dynamic linker,
+,(tt [ld.so]). That relieves developers from the need to implement
+their own ad-hoc machinery. From that perspective, it would seem that
+performance portability, ,(it [via]) FMV, is a solved problem.])
+
+ #;(stuff on auto-fmv commented out!
+
+ (p [To make the case for FMV, we wanted to see what it would take us to
+actually add FMV support to code that would benefit from it. In the
+spirit of the Clear Linux automatic FMV patch
+generator (https://github.com/clearlinux/make-fmv-patch), we wrote an
+automatic FMV tool for
+Guix (https://gitlab.inria.fr/guix-hpc/function-multi-versioning): you
+would give it a package name, and it would:])
+
+ (itemize
+ (item [Build the package with the ,(tt [-fopt-info-vec]) compiler
flag to gather
+ information about vectorization opportunities and their source code
+ location.])
+
+ (item [Generate a patch that, for each C function with vectorization
+ opportunities, adds the ,(tt [target_clone])
+ attribute to generate a couple of vectorized versions—generic,
+AVX2, and
+ AVX-512.])
+
+ (item [Build the package with this FMV patch.]))
+
+ (p [The tool can successfully FMV-patch a variety of packages
+written in C, such as the GNU Scientific Library (FIXME
+https://www.gnu.org/software/gsl), which contains plain sequential
+implementations of a variety of math routines. It was an exciting
+engineering experiment… but we found it to be all too often
+inapplicable, for two reasons: performance-critical software already
+does FMV, or it is not written in C.]))
+
+ (p [There is at least one common pattern though where FMV is not
+applicable, or at least is not applied: C++ header-only libraries.
+These are libraries that provide generic template code in header files;
+that code is specialized ,(emph [at build time]) in software that uses
+them. There is no shortage of C++ header-only math libraries providing
+efficient, optimized SIMD versions of their routines: Eigen, MIPP, xsimd
+and xtensor, SIMD Everywhere (SIMDe), Highway, and many more. All
+these, except Highway, have in common that they do ,(emph [not]) support
+FMV. Since they “just” provide headers, it is up to ,(emph [each])
+package using them to figure out what to do in terms of performance
+portability.])
+
+ (p [In practice though, software using these C++ header-only
+libraries rarely makes provisions for performance portability. Thus,
+when compiling those packages for the baseline ISA, one misses out on
+all the vectorized implementations that libraries like Eigen provide.
+This is a known issue in search of a solution ,(ref :bib
+'larsen2021:eigen-fmv). It can have a very concrete impact on
+performance since many scientific packages—the ARPACK-NG library for
+solving eigenvalue problems, the Ceres solver for optimization problems,
+the FEniCSx platform for solving differential equations, to name a
+few—depend on Eigen.])
+
+ #;(p [Fundamentally, run-time dispatch is at odds with the
all-compile-time
+approach that header-only C++ template libraries are about.
+Furthermore, Eigen, for example, supports fine-grain vectorization; it
+may be used to operate on small matrices, as is common in computer
+graphics, and in that case inlining matrix operations is key to good
+performance—run-time dispatch would have to be done at a higher
+level.]))
+
+ (chapter :title [Reproducible Deployment]
+ :number #f
+
+ (p [Distributions such as Debian and Fedora that provide pre-built
+binaries miss out on SIMD optimizations of C++ header-only libraries
+like Eigen because they provide binaries targeting the baseline CPU
+architecture so that those binaries run on any CPU. The Spack ,(ref
+:bib 'gamblin2015:spack) and EasyBuild ,(ref :bib 'geimer2014:easybuild)
+package managers address that by ,(emph [rebuilding]) software on the
+target computer, which allows them to instruct the compiler to optimize
+for the host CPU.])
+
+ (p [Unfortunately, EasyBuild and Spack both have limited support
+for reproducible deployment—they do not, in general, guarantee that you
+can redeploy the same software environment on different machines, or at
+different points in time. This is because they build upon software
+provided by the host system—the compiler tool chain, “system” libraries,
+etc.—and that foundation differs from one system to another—e.g., CentOS
+might provide some version of GCC, and Ubuntu might provide another.])
+
+ (p [To avoid that, Guix builds software in ,(emph [isolated
+environments]), as pioneered by Nix ,(ref :bib '(dolstra2004:nix
+courtes2013:functional)), and its package collection is ,(emph
+[self-contained])—it does not rely on external software packages. This
+is what makes Guix builds reproducible bit-for-bit—or in other words,
+,(emph [verifiable]) ,(ref :bib 'lamb2021:reproducible). Given binaries
+and provenance data, anyone can independently verify the
+binary/source-code correspondence.])
+
+ (p [Guix provides a command-line interface similar to that of
+other package managers: ,(tt [guix install python]), for instance,
+installs the Python interpreter. Package management is per-user rather
+than system-wide and does not require system administrator privileges,
+which makes it suitable for multi-user HPC clusters ,(ref :bib
+'courtes2015:reproducible). To offer the level of flexibility that HPC
+users expect, Guix lets users customize packages ,(it [via]) ,(emph
+[package transformation options]) on the command line—for instance to
+swap two packages in the dependency graph—or through programming
+interfaces ,(ref :bib 'courtes2015:reproducible).])
+
+ (p [Quite uniquely, Guix supports ,(emph [“time traveling”]): with
+,(tt [guix time-machine]), users can run a specific revision of Guix and
+use it to deploy packages as they were defined in that revision. The
+typical use case is redeploying software that was used to produce
+computational results for a scientific publication ,(ref :bib
+'(hinsen2020:staged-computation courtes2020:storage
+perkel2020:challenge)). The command below deploys Python, NumPy, and
+their dependencies as they were defined in a Guix revision from October
+2021:])
+
+ (prog :class "small" :line #f [
+guix time-machine --commit=b0735c79b0d1d341 -- \\
+ shell python python-numpy
+])
+
+ (p [Whether you run it today or two years from now, it will deploy
+the ,(emph [exact same binaries]), bit-for-bit, down to the C
+library.]))
+
+ (chapter :title [Package Multi-Versioning]
+ :number #f
+
+ (p [With our packaging hammer, one could envision a solution to
+these CPU tuning problems: if we cannot do function multi-versioning,
+what about implementing ,(emph [package]) multi-versioning? Guix makes
+it easy to define package variants, so we can define package variants
+optimized for a specific CPU—compiled with ,(tt [-march=skylake]), for
+instance. What we need is to define those variants “on the fly”.])
+
+ (p [The recently-introduced ,(tt [--tune]) package transformation
+option works along those lines. Users can pass ,(tt [--tune]) to any of
+the command-line tools (,(tt [guix install]), ,(tt [guix shell]), etc.)
+and that causes “tunable” packages to be optimized for the host CPU.
+For example, here is how you would run Eigen’s matrix multiplication
+benchmark from the ,(tt [eigen-benchmarks]) package with
+micro-architecture tuning:])
+
+ (prog :class "small" :line #f [
+$ guix shell --tune eigen-benchmarks -- \\
+ benchBlasGemm 240 240 240
+guix shell: tuning for CPU skylake
+240 x 240 x 240
+cblas: 0.208547 (15.908 GFlops/s)
+eigen : 0.0720303 (46.06 GFlops/s)
+l1: 32768
+l2: 262144
+])
+
+ (p [,(tt [--tune]) determines the name of the host CPU as
+recognized by GCC’s (and Clang’s) ,(tt [-march]) option. Users can
+override auto-detection by passing a CPU name—e.g., ,(tt
+[--tune=skylake-avx512]). As mentioned earlier, we made the conscious
+choice of letting ,(tt [--tune]) affect solely software that packagers
+explicitly marked as “tunable”. This ensures Guix does not end up
+rebuilding packages that could not possibly benefit from
+micro-architecture-specific optimizations, which would be a waste of
+resources.])
+
+ #;(p [(For the same
+reason, we rejected the idea of defining separate system types for the
+various x86_64 CPU micro-architectures the way Nix 2.4 did (FIXME
+https://discourse.nixos.org/t/nix-2-4-released/15822#other-features-2).)])
+
+ #;(p [In the spirit of avoiding needless package rebuilds, ,(tt [--tune])
+leverages the “graft” mechanism (XREF
+https://guix.gnu.org/manual/en/html_node/Security-Updates.html): package
+variants are ,(emph [grafted]) to the dependency graph, such that dependents of
+a tuned package do not need to be rebuilt. To illustrate that, consider
+the figure below:])
+
+ ;;![Dependency graph of OpenCV, where the tuned variant of VTK is
grafted.](/static/images/blog/cpu-tuning-graft.png)
+
+ #;(p [OpenCV depends on VTK, which depends on Eigen, as shown by the
+dotted arrows. VTK is marked as tunable so it can benefit from SIMD
+optimizations in Eigen. When ,(tt [--tune]) is passed, the optimized variant
+of VTK built with ,(tt [-march=skylake]) is generated and grafted onto the
+dependency graph, such that OpenCV itself does not need to be recompiled
+and instead is relinked against the optimized VTK variant.])
+
+ (p [This implementation of package multi-versioning does not
+sacrifice reproducibility. When ,(tt [--tune]) is used, from Guix’s
+viewpoint, it is just an alternate, but well-defined dependency graph
+that gets built. Guix records package transformation options that were
+used so it can “replay” them. For example, one can export a ,(emph
+[manifest]) representing packages that have been deployed:])
+
+ (prog :class "small" :line #f [
+$ guix shell eigen-benchmarks --tune
+guix shell: tuning for CPU skylake
+\[env\]$ guix package --export-manifest \\
+ -p $GUIX_ENVIRONMENT
+(use-modules (guix transformations))
+
+(define transform1
+ (options->transformation
+ '((tune . "skylake"))))
+
+(packages->manifest
+ (list (transform1
+ (specification->package
+ "eigen-benchmarks"))))
+])
+
+ (p [The manifest above is a code snippet that can be passed to
+,(tt [guix shell]) or ,(tt [guix package]) to redeploy the package with
+the same tuning parameters. Like other transformation options, ,(tt
+[--tune]) is accepted by all the commands; for example, here is how you
+would build a Docker image tuned for a particular CPU:])
+
+ (prog :class "small" :line #f [
+guix pack -f docker -S /bin=bin \
+ eigen-benchmarks --tune=skylake
+])
+
+ #;(p [This comes in handy if you want to prepare an image to run on
+another cluster, where you know you can rely on a given CPU extension.])
+
+ #;(p [The Guix build farm is set up to build a few optimized package
+variants. That way, users of ,(tt [--tune]) are likely to get pre-built
+binaries even for the optimized variants, making deployment just as fast
+as with non-tuned packages. To achieve this, ,(tt [--tune]) skips
+running test suites when building packages: we cannot be sure that build
+machines implement the CPU micro-architecture at hand.]))
+
+ (chapter :title [Conclusion and Outlook]
+ :number #f
+
+ (p [We implemented what we call “package multi-versioning” for
+C/C++ software that lacks function multi-versioning and run-time
+dispatch, a notable example of which is optimized C++ header-only
+libraries. It is another way to ensure that users do not have to trade
+reproducibility for performance.])
+
+ ;; refs:
+ ;; (FIXME https://docs.julialang.org/en/v1/devdocs/sysimg/)
+ ;; (FIXME https://docs.rs/multiversion/0.6.1/multiversion/)
+ (p [The scientific programming landscape has been evolving over
+the last few years. It is encouraging to see that Julia offers function
+multi-versioning for its “system image”, and that, similarly, Rust
+supports it with annotations similar to GCC’s ,(tt [target_clones]).
+Hopefully these new development environments will support performance
+portability well enough that users and packagers will not need to worry
+about it.])
+
+ (p [But first and foremost, it is up to us, research software
+engineers and scientists, to dispel the myth that performance is a valid
+excuse for non-reproducible computational workflows.]))
+
+ (chapter :title "References"
+ :number #f
+ (flush :side 'left
+ (the-bibliography
+ :sort bib-sort/first-author-last-name)))
+
+ (!latex
+ "\n\\begin{IEEEbiography}{Ludovic Courtès}\n$1\n\\end{IEEEbiography}\n"
+ [is a research software engineer at Inria, France. He has been
+contributing to the development of GNU Guix since its inception in 2012
+and works on its use in support of reproducible research workflows. He
+holds a PhD in computer science from LAAS-CNRS. You can reach him at
+,(it [ludovic.courtes@inria.fr]).]))
+
+;; Local Variables:
+;; ispell-local-dictionary: "american"
+;; compile-command: "guix shell -m manifest.scm -- make -j5"
+;; eval: (setq indent-tabs-mode nil)
+;; End:
diff --git a/doc/cise-2022/hpc.sbib b/doc/cise-2022/hpc.sbib
new file mode 100644
index 0000000..8094028
--- /dev/null
+++ b/doc/cise-2022/hpc.sbib
@@ -0,0 +1,96 @@
+(misc courtes2019:openmpi
+ (title "Optimized and Portable Open MPI Packaging")
+ (author "Ludovic Courtès")
+ (year "2019")
+ (month "December")
+ (url
"https://hpc.guix.info/blog/2019/12/optimized-and-portable-open-mpi-packaging/"))
+
+(misc courtes2018:prebuilt
+ (title "Pre-Built Binaries vs. Performance")
+ (author "Ludovic Courtès")
+ (year "2018")
+ (month "January")
+ (url
"https://hpc.guix.info/blog/2018/01/pre-built-binaries-vs-performance/"))
+
+(misc guennebaud2022:eigen
+ (title "Eigen C++ linear algebra library")
+ (author "Gaël Guennebaud, Benoît Jacob, et al.")
+ (year "2022")
+ (month "March")
+ (url "https://eigen.tuxfamily.org"))
+
+(inproceedings cassagne2018:mipp
+ (author "Adrien Cassagne, Olivier Aumage, Denis Barthou, Camille Leroux,
Christophe Jégo")
+ (title "MIPP: A Portable C++ SIMD Wrapper and Its Use for Error Correction
Coding in 5G Standard")
+ (year "2018")
+ (isbn "9781450356466")
+ (publisher "Association for Computing Machinery")
+ (address "New York, NY, USA")
+ (url "https://doi.org/10.1145/3178433.3178435")
+ (doi "10.1145/3178433.3178435")
+ (booktitle "Proceedings of the 2018 4th Workshop on Programming Models for
SIMD/Vector Processing")
+ (articleno "2")
+ (numpages "8")
+ (keywords "wrapper, channel code, C++, SSE, SIMD, AVX-512, NEON, AVX")
+ (location "Vienna, Austria")
+ (series "WPMVP'18"))
+
+(misc larsen2021:eigen-fmv
+ (title "Linking modules compiled for different SIMD instruction sets")
+ (author "Rasmus Munk Larsen, Benoît Jacob, Antonio Sánchez")
+ (url "https://gitlab.com/libeigen/eigen/-/issues/2344")
+ (year "2021")
+ (month "October"))
+
+(inproceedings gamblin2015:spack
+ (author "Todd Gamblin, Matthew LeGendre, Michael R. Collette, Gregory L.
Lee, Adam Moody, Bronis R. de Supinski, Scott Futral")
+ (title "The Spack Package Manager: Bringing Order to HPC Software Chaos")
+ (year "2015")
+ (isbn "9781450337236")
+ (publisher "Association for Computing Machinery")
+ (address "New York, NY, USA")
+ (url "https://doi.org/10.1145/2807591.2807623")
+ (doi "10.1145/2807591.2807623")
+ (booktitle "Proceedings of the International Conference for High Performance
Computing, Networking, Storage and Analysis")
+ (articleno "40")
+ (numpages "12")
+ (location "Austin, Texas")
+ (series "SC '15"))
+
+(inproceedings courtes2015:reproducible
+ (url "https://hal.inria.fr/hal-01161771/en")
+ (series "Lecture Notes in Computer Science")
+ (month "August")
+ (pages "579--591")
+ (year "2015")
+ (booktitle "Euro-Par 2015: Parallel Processing Workshops")
+ (author "Ludovic Courtès, Ricardo Wurmus")
+ (title "Reproducible and User-Controlled Software Environments in HPC with
Guix"))
+
+
+(article courtes2020:storage
+ (note "https://doi.org/10.5281/zenodo.3886739")
+ (doi "10.5281/zenodo.3886739")
+ (month "June")
+ (year "2020")
+ (number "1")
+ (volume "6")
+ (journal "ReScience C")
+ (author "Ludovic Courtès")
+ (title "[Re] Storage Tradeoffs in a Collaborative Backup Service for Mobile
Devices"))
+
+(article perkel2020:challenge
+ (month "August")
+ (year "2020")
+ (note "https://www.nature.com/articles/d41586-020-02462-7")
+ (journal "Nature")
+ (author "Jeffrey M. Perkel")
+ (title "Challenge to Scientists: Does Your Ten-Year-Old Code Still Run?"))
+
+#|
+(defun skr-from-bibtex ()
+ "Vaguely convert the BibTeX snippets after POINT to SBibTeX."
+ (interactive)
+ (while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$"
nil nil)
+ (replace-match "(\\1 \"\\2\")")))
+|#
diff --git a/doc/cise-2022/images/cpu-simd-extensions.tex
b/doc/cise-2022/images/cpu-simd-extensions.tex
new file mode 100644
index 0000000..e9474b1
--- /dev/null
+++ b/doc/cise-2022/images/cpu-simd-extensions.tex
@@ -0,0 +1,15 @@
+ \begin{tikzpicture}[
+ box/.style = { fill=guixblue2, text=white, inner sep=3mm, rounded
corners, font=\bf\sf }
+ ]
+ \matrix[row sep=6mm, column sep=6mm] {
+ \node(sse2) [box, draw=guixorange1, thick] {SSE2 (ca. 2003)}; & &
\node(avx512) [box] {AVX-512 (2013)}; \\
+ \node(sse3) [box] {SSE3}; & \node {\large\textbf{x86\_64}}; &
\node(avx2) [box] {AVX2}; \\
+ \node(ssse3) [box] {SSSE3}; & & \node(avx) [box] {AVX}; \\
+ };
+
+ \path[very thick, draw=guixorange1] (sse2) edge [->] (sse3);
+ \path[very thick, draw=guixorange1] (sse3) edge [->] (ssse3);
+ \path[very thick, draw=guixorange1] (ssse3) edge [->] (avx);
+ \path[very thick, draw=guixorange1] (avx) edge [->] (avx2);
+ \path[very thick, draw=guixorange1] (avx2) edge [->] (avx512);
+ \end{tikzpicture}
diff --git a/doc/cise-2022/manifest.scm b/doc/cise-2022/manifest.scm
new file mode 100644
index 0000000..b89f6c9
--- /dev/null
+++ b/doc/cise-2022/manifest.scm
@@ -0,0 +1,16 @@
+(specifications->manifest
+ '("rubber"
+
+ "texlive-base"
+ "texlive-latex-wrapfig"
+
+ "texlive-microtype"
+ "texlive-latex-listings" "texlive-hyperref"
+
+ ;; PGF/TikZ
+ "texlive-latex-pgf"
+
+ ;; Additional fonts.
+ "texlive-cm-super" "texlive-amsfonts"
+ "texlive-inconsolata" "texlive-latex-xkeyval" "texlive-latex-upquote"
+ "texlive-times" "texlive-helvetic" "texlive-courier"))
diff --git a/doc/programming-2022/security.sbib
b/doc/programming-2022/security.sbib
index c5a18b6..1de465b 100644
--- a/doc/programming-2022/security.sbib
+++ b/doc/programming-2022/security.sbib
@@ -1,5 +1,5 @@
(article lamb2021:reproducible
- (author "Chris Lamb and Stefano Zacchiroli")
+ (author "Chris Lamb, Stefano Zacchiroli")
(title "Reproducible Builds: Increasing the Integrity of Software Supply
Chains")
(publisher "IEEE Computer Society")
(year "2021")
- 43/66: cise-2022: Inline two references., (continued)
- 43/66: cise-2022: Inline two references., Ludovic Courtès, 2022/06/29
- 48/66: programming-2022: Distinguish model and implementation., Ludovic Courtès, 2022/06/29
- 52/66: programming-2022: Clarify QEMU options, as suggested by reviewers., Ludovic Courtès, 2022/06/29
- 65/66: doc: programming-2022: Add PDF., Ludovic Courtès, 2022/06/29
- 24/66: icse-2022: Mention SLSA and Git{Lab,Hub}., Ludovic Courtès, 2022/06/29
- 27/66: icse-2022: Add reviews and response., Ludovic Courtès, 2022/06/29
- 28/66: icse-2022: Repurpose for <Programming> 2022., Ludovic Courtès, 2022/06/29
- 37/66: programming-2022: Improve rendering of in-line 'prog'., Ludovic Courtès, 2022/06/29
- 39/66: programming-2022: Add illustrations., Ludovic Courtès, 2022/06/29
- 40/66: programming-2022: Tweak., Ludovic Courtès, 2022/06/29
- 41/66: doc: Add CiSE article.,
Ludovic Courtès <=
- 34/66: programming-2022: Clarify bits., Ludovic Courtès, 2022/06/29
- 46/66: programming-2022: Fix typos and wording issues reported by reviewers., Ludovic Courtès, 2022/06/29
- 50/66: programming-2022: Address comments from Reviewer A., Ludovic Courtès, 2022/06/29
- 54/66: programming-2022: Use BibTeX for bibliography; include DOI., Ludovic Courtès, 2022/06/29
- 55/66: programming-2022: Clean up bibliography entries., Ludovic Courtès, 2022/06/29
- 56/66: programming-2022: Mention SSH signatures., Ludovic Courtès, 2022/06/29
- 57/66: programming-2022: Add channels and manifest., Ludovic Courtès, 2022/06/29
- 59/66: programming-2022: Cite actual full-source bootstrap., Ludovic Courtès, 2022/06/29
- 60/66: programming-2022: Add acknowledgments., Ludovic Courtès, 2022/06/29
- 61/66: programming-2022: Add \paperdetails for publication., Ludovic Courtès, 2022/06/29