guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

45/66: programming-2022: Add diff document.


From: Ludovic Courtès
Subject: 45/66: programming-2022: Add diff document.
Date: Wed, 29 Jun 2022 11:32:03 -0400 (EDT)

civodul pushed a commit to branch master
in repository maintenance.

commit 5396abd651f9bcebabd363010e8ce8d80913d88c
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Mon Apr 4 17:19:35 2022 +0200

    programming-2022: Add diff document.
    
    * doc/programming-2022/supply-chain.submitted.skb,
    doc/programming-2022/diff.skb: New files.
---
 doc/programming-2022/diff.skb                   |   33 +
 doc/programming-2022/supply-chain.submitted.skb | 1374 +++++++++++++++++++++++
 2 files changed, 1407 insertions(+)

diff --git a/doc/programming-2022/diff.skb b/doc/programming-2022/diff.skb
new file mode 100644
index 0000000..6ced4d0
--- /dev/null
+++ b/doc/programming-2022/diff.skb
@@ -0,0 +1,33 @@
+(use-modules (skribilo writer)
+             (skribilo engine)
+             (skribilo output)
+             (skribilo ast)
+             (skribilo package diff))
+
+(markup-writer 'diff:deletion (find-engine 'latex)
+  :action (lambda (n e)
+            (output (! "{\\marginpar{$1}}"
+                       (color :fg "red" (symbol "bullet")))
+                    e)))
+
+(markup-writer 'diff:insertion (find-engine 'latex)
+  :action (lambda (n e)
+            (output (! "{\\marginpar{$1}}"
+                       (color :fg "blue" (symbol "bullet")))
+                    e)
+            (output (color :fg "blue" (markup-body n)) e)))
+
+
+(markup-writer 'diff:replacement (find-engine 'latex)
+  :action (lambda (n e)
+            (output (! "{\\marginpar{$1}}"
+                       (color :fg "green" (symbol "bullet")))
+                    e)
+            (output (color :fg "green" (markup-body n)) e)))
+
+(make-diff-document-from-files "supply-chain.submitted.skb"
+                              "supply-chain.skb")
+
+;; Local Variables:
+;; mode: scheme
+;; End:
diff --git a/doc/programming-2022/supply-chain.submitted.skb 
b/doc/programming-2022/supply-chain.submitted.skb
new file mode 100644
index 0000000..677fcdd
--- /dev/null
+++ b/doc/programming-2022/supply-chain.submitted.skb
@@ -0,0 +1,1374 @@
+(use-modules (skribilo package acmproc)
+            (skribilo engine)
+            (skribilo engine latex)
+            (skribilo ast)
+            (skribilo writer)
+            (skribilo output)
+            (skribilo utils strings)
+            (skribilo lib)
+            (skribilo evaluator)
+            (skribilo biblio author)
+            (skribilo source)
+            (skribilo source lisp)
+            (skribilo source parameters)
+            (rnrs io ports))
+
+(define (---) ; emdash
+  (resolve (lambda (n e env)
+             (if (engine-format? "html" e)
+                 (! "&mdash;")
+                 (! "---")))))
+
+(define (--) ; endash
+  (resolve (lambda (n e env)
+             (if (engine-format? "html" e)
+                 (! "&ndash;")
+                 (! "--")))))
+                
+(define (dash-dash)
+  (resolve (lambda (n e env)
+             (if (engine-format? "latex" e)
+                (! "{-}{-}")
+                "--"))))
+
+(define (url url)
+  (ref :text (tt url) :url url))
+
+(define (=>)
+  (symbol "=>"))
+  
+;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'.
+(define latex-tt-encoding
+  `((#\- "-\\-")
+    (#\h "h\\-")   ;“authorizations”, “authenticate”
+    ,@(@@ (skribilo engine latex) latex-tt-encoding)))
+
+(markup-writer 'tt (find-engine 'latex)
+   :before "{\\texttt{"
+   :action (lambda (n e)
+             (let ((ne (make-engine
+                          (gensym "latex")
+                          :delegate e
+                          :filter (make-string-replace latex-tt-encoding)
+                          :custom (engine-customs e)
+                          :symbol-table (engine-symbol-table e))))
+                (output (markup-body n) ne)))
+   :after "}}")
+
+;; For pdflatex.
+(engine-custom-set! (find-engine 'latex) 'image-format '("pdf"))
+                   
+;; Avoid "option clash" with acmart.
+(engine-custom-set! (find-engine 'latex) 'hyperref #f)
+                
+(engine-custom-set! (find-engine 'latex) 'usepackage
+                   (let ((u (engine-custom (find-engine 'latex)
+                                           'usepackage)))
+                     ;; See 
<https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing>
+                     ;; and 
<http://tug.org/pipermail/texhax/2010-September/015596.html>.
+                     (string-append u "\n" 
+                                    "\\usepackage{tikz}\n"
+                                   "\\usetikzlibrary{arrows,shapes,shadows}\n"
+                                   
"\\definecolor{guixorange1}{RGB}{243,154,38}  % guixorange P\n"
+                                   "\\definecolor{guixblue2}{RGB}{10,50,80} % 
guixblue S\n"
+                                   "\\definecolor{guixred2}{RGB}{230,68,57}  % 
red S\n"
+                                   "\\definecolor{guixdarkgrey}{RGB}{46,47,55} 
% guixdarkgrey S\n"
+
+                                   ;; Trick so that ‘…’ is properly
+                                   ;; typeset inside teletype text.
+                                   
"\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n"
+
+                                   ;; Improve hyphenation.
+                                   "\\hyphenation{Open-PGP}\n")))
+
+(let ((latex (find-engine 'latex)))
+   (engine-custom-set! latex 'documentclass
+                      
"\\documentclass[english,phase=submission,code=tt]{programming}")
+
+   (markup-writer '&latex-author latex
+      :action (lambda (n e)
+                (let ((body (markup-body n)))
+                   (for-each (lambda (a)
+                                (display "\\author{")
+                                (output (markup-option a :name) e)
+                                (display "}\n\\affiliation{")
+                                (output (markup-option a :affiliation) e)
+                                (display "}\n\\authorinfo{")
+                                (output (markup-option a :address) e)
+                                (display "}\n"))
+                             (if (pair? body) body (list body))))))
+
+   (markup-writer '&acm-category latex
+      :options '(:index :section :subsection)
+      :action (lambda (n e)
+                (display "\\ccsdesc[")
+                (display (markup-option n :index))
+                (display "]")
+                (display "{")
+                (display (markup-option n :section))
+                (display "}\n")))
+
+  (markup-writer 'image latex
+     :options '(:file :url :width :height :zoom)
+     :action (lambda (n e)
+              (format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n"
+                      (or (markup-option n :width) 1.0)
+                      (markup-option n :file))))
+
+
+  (markup-writer 'prog latex
+     :class "inline"
+     :options '(:line :mark)
+     :before "\n\n\\blockquote{\n"
+     :action (lambda (n e)
+               ;; Delegate actual work to the "real" 'prog'.
+               (output (prog :line (markup-option n :line)
+                            :mark (markup-option n :mark)
+                            (node-body n))
+                      e))
+     :after "\n}\n\n")
+
+  (markup-writer 'figure latex
+     :options '(:legend :number :multicolumns)
+     :action (lambda (n e)
+               (let ((ident (markup-ident n))
+                     (legend (markup-option n :legend))
+                     (mc (markup-option n :multicolumns)))
+                  (display "\\begin{figure}[ht]\n\\begin{scriptsize}\n")
+                  (output (markup-body n) e)
+                  (display "\n\\end{scriptsize}\n")
+                  (format #t "\\caption{\\label{~a}"
+                          (string-canonicalize ident))
+                  (output legend e)
+                  (display "}\\end{figure}\n")))))
+                     
+(define (acmart-abstract . body)
+  (!latex "\\begin{abstract}\n$1\n\\end{abstract}\n" body))
+
+(bibliography "../els-2013/guix.sbib")
+(bibliography "../reppar-2015/reppar.sbib")
+(bibliography "security.sbib")
+
+
+(document :title [Building a Secure Software Supply Chain with GNU Guix]
+   :author (list (author :name "Ludovic Courtès"
+                   :affiliation "Inria"
+                   :email "ludovic.courtes@inria.fr"
+                   :address [works as a research engineer at Inria,
+France.  He has been contributing to the development of GNU Guix since
+its inception in 2012 and works on its use in support of reproducible
+research workflows.]))
+
+   (!latex "\\paperdetails{perspective=engineering,
+area={Security programming}, license=cc-by-sa}\n")
+
+   ;; See <http://dl.acm.org/ccs/ccs_flat.cfm>.
+   (!latex "\\input{categories.tex}\n")
+
+   (acm-keywords [software deployment, security, version control, Git])
+
+   (acmart-abstract
+
+      ;; See <https://programming-journal.org/submission/>.
+      ;;
+      ;; Context: supply chain, vulns, executive order, Guix.
+      ;; Inquiry: how can we secure updates? others did nothing
+      ;; Approach: designed secure update
+      ;; Knowledge: provided secure updates
+      ;; Grounding: implemented, deployed
+      ;; Importance: show full deployment solution addressing issues
+
+     (p [The ,(emph [software supply chain]) is becoming a widespread
+analogy to designate the series of steps taken to go from source code
+published by developers to executables running on the users’ computers.
+A security vulnerability in any of these steps puts users at risk, and
+evidence shows that attacks on the supply chain are becoming more
+common.  The consequences of an attack on the software supply chain can
+be tragic in a society that relies on many interconnected software
+systems, and this has led research interest as well as governmental
+incentives for supply chain security to rise.])
+     (p [GNU Guix is a software deployment tool that supports provenance
+tracking, reproducible builds, and reproducible software environments.
+Guix is first and foremost source code: it provides a set of package
+definitions that describe how to build code from source.  Together,
+these properties set it apart from many deployment tools that center on
+the distribution of binaries.])
+     (p [This paper focuses on one research question: how can Guix and
+similar systems allow users to securely update their software?  Guix
+source code is distributed using the Git version control system;
+updating Guix-installed software packages means, first, updating the
+local copy of the Guix source code.  Prior work on secure software
+updates focuses on systems very different from Guix—systems such as
+Debian, Fedora, or PyPI where updating consists in fetching metadata
+about the latest binary artifacts available—and is largely inapplicable in
+the context of Guix.  Deployment tools that more closely resemble Guix,
+from Nix to Portage, either lack secure update mechanisms or suffer from
+shortcomings.])
+     (p [Our main contribution is a model and tool to authenticate new
+Git revisions.  We further show how, building on Git semantics, we build
+protections against downgrade attacks and related threats.  We explain
+implementation choices.  This work has been deployed in production two
+years ago, giving us insight on its actual use at scale every day.  The
+Git checkout authentication at its core is applicable beyond the
+specific use case of Guix, and we think it could benefit to developer
+teams that use Git.])
+     (p [As attacks on the software supply chain appear, security
+research is now looking at every link of the supply chain.  Secure
+updates are one important aspect of the supply chain, but this paper
+also looks at the broader context: how Guix models and implements the
+supply chain, from upstream source code to binaries running on
+computers.  While much recent work focuses on attestation—certifying
+each link of the supply chain—Guix takes a more radical approach:
+enabling independent ,(emph [verification]) of each step, building on
+reproducible builds, “bootstrappable” builds, and provenance tracking.
+The big picture shows how Guix can be used as the foundation of secure
+software supply chains.]))
+
+   
+   (chapter :title [Introduction]
+
+      (p [Package managers and related software deployment tools are in
+a key position when it comes to securing the “software supply
+chain”—they take source code fresh from repositories and providing users
+with ready-to-use binaries.  Between source code repositories and
+binaries users run, many things can go wrong: binaries can be
+compromised on their way to the user's machine ,(ref :bib
+'cappos2008:attacks), on the provider's servers, or possibly indirectly
+,(it [via]) toolchain compromission ,(ref :bib
+"thompson1984:trusting-trust").  Every software installation and every
+upgrade can put users at risk.  As the “last kilometer” of this supply
+chain, the way package managers distribute binaries and associated
+metadata led to security vulnerabilities that are now better understood
+and addressed ,(ref :bib 'samuel2010:survivable).  But recent
+high-profile cases have reminded us that software supply chain attacks
+occurring ,(emph [before]) the distribution step are a very real threat
+,(ref :bib '(lamb2021:reproducible peisert2021:solarwinds)).  This led,
+for example, the US Government to call for work in this area in its
+Executive Order on cybersecurity, explicitly mentioning actions such as
+“using administratively separate build environments” and “employing
+automated tools (…) to maintain trusted source code supply chains” ,(ref
+:bib 'biden2021:executive-order).  That there is room for improvement in
+current practices and tools is unquestioned.])
+      
+      (p [GNU Guix is a set of software deployment tools and a
+standalone GNU/Linux distribution.  It includes a package manager with a
+command-line interface similar to that of Debian’s apt or Fedora’s yum,
+allowing users to search for software packages, to install them, and to
+upgrade them.  Unlike apt, yum, and many popular package managers, Guix
+builds upon the ,(emph [functional deployment model]) pioneered by Nix
+,(ref :bib "dolstra2004:nix"), a foundation for reproducible deployment,
+reproducible and verifiable builds, and provenance tracking.  Guix is 
essentially a
+“source-based” deployment tool: the ,(emph [model]) is that of a system
+where every piece of software is built from source, and pre-built
+binaries are viewed as a mere optimization and not as a central aspect
+of its design.])
+      
+     (p [This paper focuses on one research question: how can Guix and
+similar systems allow users to securely update their software?  Guix
+source code is distributed using the Git version control system;
+updating Guix-installed software packages means, first, updating the
+local copy of the Guix source code.  Prior work on secure software
+updates ,(ref :bib '(samuel2010:survivable kuppusamy2017:mercury))
+focuses on systems very different from Guix—systems such as
+Debian, Fedora, or PyPI where updating consists in fetching metadata
+about the latest binary artifacts available—and is largely
+inapplicable in the context of Guix.  Deployment tools that more closely
+resemble Guix, from Nix to Portage and BSD Ports ,(ref :bib
+'(dolstra2004:nix brew2022:github condaforge2022:web
+freebsd2022:handbook pkgsrc2022:guide gentoo2022:portage-security)),
+either lack secure update mechanisms or suffer from shortcomings.])
+
+;;       (p [More generally, contrary to recent work on supply chain
+;; security that revolves around ,(emph [attestation]) of the various
+;; supply chain links ,(ref :bib '(torresarias2019:intoto google2021:slsa
+;; sigstore2021:web)), Guix takes a radical approach to support ,(emph
+;; [independent verification]).])
+
+      (p [We describe the design and implementation of Guix’s
+secure update mechanism.  ,(numref :text [Section] :ident "background")
+gives background information necessary to understand the overall
+deployment model of Guix, showing how it supports ,(emph [independent
+verification]) of key links of the software supply chain.
+,(numref :text [Section] :ident "rationale")
+presents our goals and threat model for the design of secure updates.
+,(numref :text [Section] :ident "authenticating") describes our design
+of a Git checkout authentication mechanism and ,(numref :text [Section]
+:ident "bootstrapping") discusses trust establishment.  ,(numref :text
+[Section] :ident "downgrade") shows how we address downgrade attacks
+while ,(numref :text [Section] :ident "mirrors") focuses on the related
+risk of distributing stale revisions.  In ,(numref :text [Section]
+:ident "implementation") we provide key elements of the implementation
+and report on our experience.  Last, ,(numref :text [Section]
+:ident "related") compares to related work and ,(numref :text [Section]
+:ident "conclusion") concludes.]))
+   
+   (chapter :title [Background] :ident "background"
+      
+      (p [Users of free operating systems such as GNU/Linux are familiar with
+,(emph [package managers]) like Debian's ,(tt [apt]), which allow them
+to install, upgrade, and remove software from a large collection of free
+software packages.  GNU Guix,(footnote (url "https://guix.gnu.org";)) is
+such a tool, though it can be thought of more broadly as a toolbox for a
+software deployment with salient features and processes that improve
+security: a foundation for ,(emph [reproducible builds]), and what we
+call ,(emph [bootstrappable builds]).])
+      
+      (section :title [A Deployment Toolbox]
+
+        (p [Guix provides a command-line interface similar to that of
+other package managers: ,(tt [guix install python]), for instance,
+installs the Python interpreter, ,(tt [guix pull]) updates Guix itself
+and the set of available packages, and ,(tt [guix upgrade]) upgrades
+previously-installed packages to their latest available version.
+Package management is per-user rather than system-wide; it does not
+require system administrator privileges, nor does it require mutual
+trust among users.])
+
+        (p [Providing more than 20,000 software packages today, Guix is
+used as a general purpose day-to-day GNU/Linux distribution that
+provides the additional safety net of ,(emph [transactional upgrades and
+rollbacks]) for all software deployment operations.  For example, if an
+upgrade has undesired effects, users can run ,(tt [guix package
+--roll-back]) to immediately restore packages as they were before the
+upgrade.  Its ability to reproduce software environments, bit for bit,
+at different points in time and on different machines, makes it a tool
+of choice in support of reproducible computational experiments and
+software engineering ,(ref :bib 'hinsen2020:staged-computation).])
+        
+        (p [Guix can be used on top of another system; the only
+requirement is that the system runs the Linux kernel—be it Android or a
+GNU/Linux distribution.  Guix packages stand alone: they provide all the
+user-land software they need, down to the C library; this guarantees
+they behave the same on any system.])
+
+        (p [There are other tools beyond the “package manager”
+interface.  The ,(tt [guix shell]) command, for example, creates a
+one-off development environment containing the given packages.
+The ,(tt [guix pack]) command creates
+standalone ,(emph [application bundles]) or ,(emph [container images])
+providing one or more software packages and all the packages they depend
+on at run time.  The container images can be loaded by Docker, podman,
+and similar “container tools” to run the software on any other
+machine.])
+                
+        (p [Last, Guix can be used as a standalone GNU/Linux
+distribution called Guix System.  Its salient feature are that it lets
+users declare the ,(emph [whole system configuration])—from user
+accounts, to services and installed packages—using a domain-specific
+language (DSL) embedded in Scheme, a functional programming language of
+the Lisp family ,(ref :bib 'sperber09:r6rs).  The ,(tt [guix system
+reconfigure]) command changes the running system to match the
+user-provided configuration.  This is an atomic operation and users can
+always roll back to an older “generation” of the system, should anything
+go wrong.  The ,(tt [guix system image]) command can create system
+images in a variety of formats, including the QCOW2 format commonly-used
+for virtual machines (VMs) and emulators such as QEMU.  ,(tt [guix
+deploy]) goes a step further and can deploy Guix System ,(emph [on a set
+of machines]), be it over secure shell (SSH) connections or using the
+interfaces of a virtual private server (VPS) provider.]))
+      
+      (section :title [Reproducible Builds]
+
+        (p [At its core, Guix is a ,(emph [functional]) deployment tool
+that builds upon the ideas developed for the Nix package manager by
+Dolstra ,(it [et al.]) ,(ref :bib '(dolstra2004:nix
+courtes2013:functional)).  The term “functional” means that software
+build processes are considered as ,(emph [pure functions]): given a set
+of inputs (compiler, libraries, build scripts, and so on), a package’s
+build function is assumed to always produce the same result.  Build
+results are stored in an immutable persistent data structure, the ,(emph
+[store]), implemented as a single directory, ,(tt [/gnu/store]).  Each
+entry in ,(tt [/gnu/store]) has a file name composed of the hash of all
+the build inputs used to produce it, followed by a symbolic name.  For
+example, ,(tt [/gnu/store/yr9rk90jf…-gcc-10.3.0]) identifies a specific
+build of GCC 10.3.  A variant of GCC 10.3, for instance one using
+different build options or different dependencies, would get a different
+hash.  Thus, each store file name uniquely identifies build results.
+This model is the foundation of ,(emph [end-to-end provenance
+tracking]): Guix records and uniquely identifies the inputs leading to
+build results available in ,(tt [/gnu/store]).])
+
+        (p [Guix, like Nix and unlike Debian or Fedora, is essentially a
+,(emph [source-based distribution]): Guix package definitions describe
+how to build packages from source.  When running a command such as ,(tt
+[guix install gcc]), Guix proceeds as if it were to build GCC from
+source.  As an optimization, users can enable fetching pre-built
+binaries—called ,(emph [substitutes]) because they are substitutes for a
+local build.  In that case, instead of building locally, Guix asks one
+or more servers for substitutes.  In the example above, it would ask
+specifically for substitutes for ,(tt
+[/gnu/store/yr9rk90jf…-gcc-10.3.0]), which unambiguously identifies the
+desired build output.  Substitutes are cryptographically signed by the
+server and Guix rejects substitutes not signed by one of the keys the
+user authorized.])
+
+        (figure :legend [The ,(tt [guix]) command makes remote
+procedure calls (RPCs) to a build daemon, which spawns hermetic builds
+on its behalf.]
+           :ident "fig-build-daemon"
+          (!latex (call-with-input-file "images/guix-build-daemon.tex"
+                    get-string-all)))
+
+        (p [To maximize chances that build processes actually look like
+pure functions, they are spawned in isolated build environments—Linux
+,(emph [containers])—ensuring that only explicitly declared inputs are
+visible to the build process.  This method, inherited from Nix ,(ref
+:bib 'dolstra2004:nix), is illustrated in ,(numref :text [Figure] :ident
+"fig-build-daemon"): ,(tt [guix]) commands make remote procedure calls
+(RPCs) to a build daemon, which spawns build processes in isolated
+environments on their behalf and stores the build result in ,(tt
+[/gnu/store]).])
+
+        (p [Build isolation, in turn, helps achieve bit-for-bit
+,(emph [reproducible builds]), which are critical from a security
+standpoint ,(ref :bib 'lamb2021:reproducible).  Reproducible builds
+enable users and developers to verify that a binary matches a given
+piece of source code: anyone can rebuild the package and ensure they
+obtain the same binary, bit for bit.  The explicit and unambiguous
+mapping from source to binary that the functional deployment model
+provides makes verification clear and easy.  For example, the command
+,(tt [guix build --check hello]) rebuilds the ,(tt [hello]) package
+locally and prints an error if the build result differs from that
+already available.  Likewise, ,(tt [guix challenge hello]) compares
+binaries of the ,(tt [hello]) package available locally with those
+provided by one or several substitute servers.]))
+        
+      (section :title [Bootstrappable Builds]
+
+        (p [Are reproducible builds enough to guarantee that one can
+verify source-to-binary mappings?  In his Turing Award acceptance
+speech, Ken Thompson described a scenario whereby a legitimate-looking
+build process would produce a malicious binary ,(ref :bib
+'thompson1984:trusting-trust).  If that build process is reproducible,
+it just reproducibly builds a malicious binary.  The attack Thompson
+described, often referred to as a “Trusting Trust attack”, consists in
+targeting the compilation toolchain, typically by modifying the compiler
+such that it emits malicious code when it recognizes specific patterns
+of source code.  This attack can be undetectable.  What makes such
+attacks possible is that users and distributions rely on opaque binaries
+at some level to “bootstrap” the entire package dependency graph.])
+        
+        (p [GNU/Linux systems are built around the C language.  At the
+root of the package dependency graph, we have the GNU C Library (glibc),
+the GNU Compiler Collection (GCC), the GNU Binary Utilities (Binutils),
+and the GNU command-line utilities (Coreutils, grep, sed, Findutils,
+etc.)—all this written in C and C++.  How does one build the first GCC
+though?  Historically, distributions such as Debian would informally rely on
+previously-built binaries to build the new ones: when GCC is upgraded, it
+is built using GCC as available in the previous version of the
+distribution.])
+        
+        (p [The functional build model does not allow us to “cheat”:
+the whole dependency graph has to be described and be self-contained.
+Thus, it must describe how the first GCC and C library are obtained.
+Initially, Guix would rely on of pre-built statically-linked binaries of
+GCC, Binutils, libc, and the other packages mentioned above to get
+started ,(ref :bib 'courtes2013:functional).  Even though these ,(emph
+[binary seeds]) were eventually built with Guix and thus reproducible
+and verifiable using the same Guix revision, they were just that: around
+250 MiB of opaque, non-auditable binaries.])
+
+        (p [In 2017, Nieuwenhuizen ,(it [et al.]) sought to address
+this forty-year-old problem at its root: by ensuring no opaque binaries
+appear at the bottom of the package dependency graph—no less ,(ref :bib
+'janneke:mes-web).  To that end, Nieuwenhuizen developed GNU Mes, a
+small interpreter of the Scheme language written in C, capable enough to
+run MesCC, a non-optimizing C compiler.  MesCC is then used to build
+TinyCC, a more sophisticated C compiler written in C, in turn used to
+build an old version of GCC, until we get to the modern GCC, written in
+C++.  That, coupled with other heroic
+efforts, led to a drastic reduction of the size of the opaque binaries
+at the root of the Guix package graph, well below what had been achieved
+so far ,(ref :bib '(janneke2020:bootstrap courant2022:ocamlboot)).
+While many considered it unrealistic a few years earlier, the initial
+goal of building ,(emph [everything]) from source, starting from a small
+core and incrementally building more complex pieces of software, is now
+within reach ,(ref :bib 'janneke2021:full-source-bootstrap).  This has
+the potential to thwart an entire class of software supply chain attacks
+that has been known but left unaddressed for forty years.])
+        
+        (p [Bootstrapping issues like these do not exist solely at the
+level of the C language; they show up in many compilers and occasionally
+in build systems too ,(ref :bib 'wurmus2022:bootstrappable-web).  Several
+of them were addressed in Guix for the first time: the Java development
+kit (JDK) is entirely built from source ,(ref :bib
+'wurmus2017:jdk-bootstrap), and so are the Rust ,(ref :bib
+'milosavljevic2018:rust-bootstrap) and OCaml compilers ,(ref :bib
+'courant2022:ocamlboot).])))  ;TODO: mention DDC?
+   
+   (chapter :title [Rationale] :ident "rationale"
+      
+      (p [As we have seen, Guix is conceptually a source-based
+distribution.  It addresses common classes of software supply chain
+attacks in two ways: by reducing and eventually removing reliance on
+opaque binaries at the root of its dependency graph, and by affording
+reproducible builds.  Guix users can choose to obtain pre-built binaries
+for software they install, and reproducible builds guarantee that anyone
+can verify that providers of those binaries are not distributing modified
+or malicious versions.])
+
+      (p [The security issue that the remainder of this paper focuses on
+is that of ,(emph [distributing updates securely]): how can users know
+that updates to Guix and its package collection that they fetch are
+genuine?  The problem of securing software updates is often viewed
+through the lens of binary distributions such as Debian, where the main
+asset to be protected are binaries themselves ,(ref :bib
+'cappos2008:attacks).  Guix being a source-based distribution, the
+question has to be approached from a different angle.])
+
+      (figure :legend [Supplying software with Guix: developers (left)
+write package definitions that contain a cryptographic hash of the
+source code, test them, and publish them in the Git
+repository; users (right) update their copy from Git using ,(tt [guix
+pull]) and either fetch binaries for the packages they need or build
+them locally.]
+        :ident "fig-package-workflow"
+        
+        (!latex (call-with-input-file "images/guix-package-workflow.tex"
+                   get-string-all)))
+
+      (p [Guix consists of source code for the tools as well as package
+definitions that make up the GNU/Linux distribution.  Package
+definitions contain the URL and cryptographic hash of their source code;
+it is up to package developers writing those definitions to authenticate
+upstream’s source code, for instance by verifying OpenPGP signatures.
+All this code is
+maintained under version control in a Git repository.
+To update Guix
+and its package collection, users run ,(tt [guix pull])—the equivalent
+of ,(tt [apt update]) in Debian.  When users run ,(tt [guix pull]), what
+happens behind the scene is equivalent to ,(tt [git clone]) or ,(tt [git
+pull]).  This workflow is illustrated in ,(numref :text [Figure] :ident
+"fig-package-workflow").])
+      
+      (p [There are several ways this update process can lead users to
+run malicious code.  An attacker could trick the user into connecting to
+an alternate repository that contains malicious code or definitions for
+backdoored packages.  This is made more difficult by the fact that code
+is fetched over HTTPS by default, which allows clients to authenticate
+the server they are connecting to.  However, server authentication is of
+no use when the server hosting the repository is compromised, as
+happened to GNU’s Savannah in 2010 ,(ref :bib 'fsf2010:compromise).])
+      
+      (p [An attacker who gained access to the server hosting the Guix
+repository can push code, which every user would then pull.  The
+change might even go unnoticed and remain in the repository forever.
+They may also reset the main branch to an earlier revision, leading
+users to install outdated software with known vulnerabilities—a ,(emph
+[downgrade attack]) ,(ref :bib '(cappos2008:attacks kuppusamy2017:mercury
+torresarias2016:omitting)).
+Likewise, the attacker may change the main branch
+reference so it points to a different branch, containing new malicious
+code—a ,(emph [teleport attack]) ,(ref :bib 'torresarias2016:omitting).])
+
+      (p [To summarize, we want to protect against attacks that could be
+made by gaining access to the Git repository of Guix: introduction of
+malicious changes by the attacker, downgrade attacks, and teleport
+attacks.  We do ,(emph [not]) aim to protect against cases where an
+attacker is able to impersonate an authorized developer or otherwise
+force them into pushing malicious changes; in our model, authorized
+developers are ultimately trusted.]))
+   
+   (chapter :title [Authenticating Git Checkouts]
+      :ident "authenticating"
+      
+      (p [Taking a step back, the problem we are trying to solve is
+not specific to Guix and to software deployment tools: it’s about
+,(emph [authenticating Git checkouts]).  By that, we mean that when ,(tt [guix 
pull])
+obtains code from Git, it should be able to tell that all the commits it
+fetched were pushed by authorized developers of the project.  We are
+really looking at individual commits, not tags, because users can choose
+to pull at arbitrary points in the commit history of Guix and of third-party
+channels.  Surprisingly, we found that similar Git-backed source-based
+deployment tools such as Nix do not address this problem, and there were no
+existing tools or protocols supporting off-line checkout authentication to
+our knowledge—we get back to that in ,(numref :text [Section] :ident
+"related").])
+      
+      (p [Git is an append-only, content-addressed version control
+system.  Revision history in Git is represented by a graph of commit
+objects: each commit can have zero or more parent commits.  In the
+common case, there is a single parent commit pointing to the previous
+revision; “merge commits”, which are created by merging the history of
+two development branches, have two parents.  “Append-only” means that
+one only ever ,(emph [adds]) new commits to the graph.
+“Content-addressed” corresponds to the fact that commits are referred to
+by their cryptographic content hash, currently computed with SHA-1 (more
+on that in ,(numref :text [Section] :ident "sha1")); the contents of a
+revision, ,(it [trees]) in Git parlance, as well as other kinds of data
+stored in a Git repository, are all content-addressed.  As an exception,
+metadata such as references to the latest commit of a branch, is ,(emph
+[not]) stored in the content-addressed store ,(ref :bib
+'torresarias2016:omitting).])
+
+      (p [Git supports ,(emph [signed commits]).  A signed commit
+includes an additional header containing an ASCII-armored OpenPGP
+signature computer over the other headers of the commit.  By signing a
+commit, a Guix developer asserts that they are the one who made the
+commit; they may be its author, or they may be the person who applied
+somebody else’s changes after review.  Checkout authentication requires
+cryptographically signed commits.  It also requires a notion of
+authorization: we do not simply want commits to have a valid signature,
+we want them to be signed by an authorized key.  The set of authorized
+keys changes over time as people join and leave the project.  The
+authentication mechanism must be able to deal with those changes; a
+developer’s signature may only be considered valid for the period during
+which the developer was officially an authorized committer.])
+
+      (p [To implement that, we came up with the following mechanism and rule:
+
+,(enumerate
+    (item [The repository contains a ,(tt [.guix-authorizations])
+     file
+     that lists the OpenPGP key fingerprints of authorized committers.])
+    (item [A commit is considered authentic if and only if it is signed by one
+     of the keys listed in the ,(tt [.guix-authorizations]) file of each of
+     its parents.  We call this the ,(emph [authorization invariant]).]))
+     
+The ,(tt [.guix-authorizations]) format is a Lisp-style s-expression,
+as shown in ,(numref :text [Figure] :ident
+"fig-guix-authorizations").  Such a
+structured format leaves room for extensions, such as per-file
+authorizations.])
+
+     (figure
+       :legend [Example ,(tt [.guix-authorizations]) file listing
+       authorized committers.]
+       :ident "fig-guix-authorizations"
+       
+       (prog :line #f
+          (source :language scheme [
+(authorizations
+  (version 0)               ;current file format version
+
+  (("AD17 A21E F8AE D8F1 CC02  DBD9 F8AE D8F1 765C 61E3"
+    (name "alice"))
+   ("2A39 3FFF 68F4 EF7A 3D29  12AF 68F4 EF7A 22FB B2D5"
+    (name "bob"))
+   ("CABB A931 C0FF EEC6 900D  0CFB 090B 1199 3D9A EBB5"
+    (name "charlie"))))])))
+
+      (figure
+        :legend [Graph of commits and the associated authorizations.]
+        :ident "fig-commits"
+        (image :file "images/commit-graph.pdf"
+           :width 0.7))
+
+      (p [Let us take an example to illustrate the authorization
+invariant.  In ,(numref :text [Figure] :ident "fig-commits"), each box
+is a commit, and each arrow is a parent relationship.  This figure shows
+two lines of development: the orange line on the left may be the main
+development branch, while the purple line may correspond to a feature
+branch that was eventually merged in commit ,(it [F]).  ,(it [F]) is a
+merge commit, so it has two parents: ,(it [D]) and ,(it [E]).])
+
+      (p [Labels next to boxes show who is in ,(tt [.guix-authorizations]): 
for commit A,
+only Alice is an authorized committer, and for all the other commits,
+both Bob and Alice are authorized committers.  For each commit, the
+authorization invariant holds; for example:
+
+,(itemize
+    (item [commit ,(it [B]) was made by Alice, who was the only authorized 
committer
+    in its parent, commit ,(it [A]);])
+    (item [commit ,(it [C]) was made by Bob, who was among the authorized 
committers
+    as of commit ,(it [B]);])
+    (item [commit ,(it [F]) was made by Alice, who was among the authorized
+    committers of both parents, commits ,(it [D]) and ,(it [E]).]))
+
+The authorization invariant has the nice property that it is simple to
+state, and simple to check and enforce.  This is what ,(tt [guix pull])
+implements.  If a user’s current Guix revision (as returned by the ,(tt
+[guix describe]) command) is at commit ,(it [A]) and the user wants to
+pull to commit ,(it [F]), ,(tt [guix pull]) traverses all these commits
+and checks the authorization invariant.])
+
+      (p [Once a commit has been authenticated, all the commits in its
+transitive closure are known to be already authenticated.  ,(tt [guix pull])
+keeps a local cache of the commits it has previously authenticated,
+which allows it to traverse only new commits.  For instance, if you are
+at commit ,(it [F]) and later update to a descendant of ,(it [F]), 
authentication
+starts at ,(it [F]).])
+
+      (p [Since ,(tt [.guix-authorizations]) is a regular file under version
+control, granting or revoking commit authorization does not require
+special support.  In the example above, commit ,(it [B]) is an authorized
+commit by Alice that adds Bob’s key to ,(tt [.guix-authorizations]).
+Revocation is similar: any authorized committer can remove entries from
+,(tt [.guix-authorizations]).  Key rotation can be handled similarly: a
+committer can remove their former key and add their new key in a single
+commit, signed by the former key.  If a developer’s key is compromised,
+for instance because their laptop was stolen, they can notify other
+committers so they immediately remove the key, thereby preventing it
+from being used to push new commits.])
+
+      (p [The authorization invariant satisfies the needs of the Guix
+project.  It has one downside: it does not play well with the
+pull-request-style workflow popularized by some source code hosting
+platforms.  Indeed, merging the branch of a contributor not listed in ,(tt
+[.guix-authorizations]) would break the authorization invariant, unless
+the committer who accepts the changes signs them, which involves
+rewriting the commit history of the branch that was submitted.  It is a
+good tradeoff for Guix where the contribution workflow relies on patches
+sent by email to a patch tracker, and where commits are signed by the
+committer rather than the original author, but it may be less suitable
+for other workflows.]))
+   
+   (chapter :title [Establishing Trust] :ident "bootstrapping"
+      
+      (p [You may have noticed that something is missing from the
+explanation above: what do we do about commit ,(it [A]) in ,(numref
+:text [Figure] :ident "fig-commits")?  In other words, which commit
+do we pick as the first one where we can start verifying the
+authorization invariant?])
+
+      (figure
+        :legend [The introductory commit in a commit graph.]
+        :ident "fig-commit-graph-intro"
+        (image :file "images/commit-graph-intro.pdf"
+           :width 0.6))
+
+      (p [We solve this bootstrapping issue by defining ,(emph [channel 
introductions]).
+Previously, one would identify a channel solely by its URL.  Now, when
+introducing a channel to users, one needs to provide an additional piece
+of information: the first commit where the authorization invariant
+holds, and the fingerprint of the OpenPGP key used to sign that commit
+(the fingerprint is not strictly necessary from a security perspective
+but it provides an additional check).])
+
+      (p [Consider the commit graph on ,(numref :text [Figure] :ident
+"fig-commit-graph-intro").  On this figure, ,(it [B]) is the ,(emph 
[introductory commit]).  Its
+ancestors, such as ,(it [A]), are considered authentic.  To authenticate ,(it 
[C]),
+,(it [D]), ,(it [E]), and ,(it [F]), we check the authorization
+invariant.  Commits ,(it [G]) and ,(it [H]) are considered inauthentic
+because they are not descendants of the introductory commit, ,(it [B]).])
+
+      (p [As always when it comes to establishing trust, distributing
+channel introductions is very sensitive.  The introduction of the
+official ,(tt [guix]) channel is built into Guix.  Users obtain it when
+they install Guix the first time.  Installation instructions tell
+users to verify the provided OpenPGP detached signature on the tarball
+or ISO installation image they download.  This reduces the chances of
+getting the “wrong” Guix, following a trust-on-first-use (TOFU)
+approach.])
+
+      (figure
+        :legend [Specification of a channel along with its ,(emph 
[introduction]).]
+        :ident "fig-channel-spec"
+           
+        (prog :line #f
+           (source :language scheme [
+(channel
+  (name 'my-channel)
+  (url "https://example.org/my-channel.git";)
+  (introduction
+   (make-channel-introduction
+    "6f0d8cc0d88abb59c324b2990bfee2876016bb86"
+    (openpgp-fingerprint
+     "CABB A931 C0FF EEC6 900D  0CFB 090B 1199 3D9A EBB5"))))])))
+
+      (p [Guix supports third-party channels providing extra software
+packages.  To use a third-party channel, one needs to add it to the ,(tt
+[~/.config/guix/channels.scm]) configuration file, which contains a
+declarative Scheme code snippet listing the desired channels.  Authors
+of third-party channels can also benefit from the channel authentication
+mechanism: they need to sign commits, to include a ,(tt
+[.guix-authorizations]) file and the list of relevant OpenPGP keys, and
+to advertise the channel's introduction.  Users then have to provide the
+channel’s introduction in their ,(tt [channels.scm]) file, as shown in
+,(numref :text [Figure] :ident "fig-channel-spec").])
+
+      (p [The ,(tt [guix describe]) command prints the introduction if
+there’s one.  That way, one can share their channel configuration,
+including introductions, without having to be an expert.])
+
+      (p [Channel introductions also solve another problem: ,(it
+[forks]).  Forks are an integral part of free software, which gives
+everyone the right to distribute modified copies of the software; one
+might choose to distribute a fork of Guix or a fork of a channel with
+different features or different packages.  Respecting the authorization
+invariant “forever” would effectively prevent “unauthorized” forks—forks
+made by someone who is not in ,(tt [.guix-authorizations]).  To address
+this, someone publishing a fork advertises a new introduction for their
+fork, pointing to a different starting commit.])
+
+      ;; XXX: Pointless paragraph?
+      #;(p [Last, channel introductions give a ,(emph [point of
+reference]).  Assume an attacker attempts a ,(emph [teleport attack]) by
+modifying branch references on the server hosting the official
+repository so they point to commits of their choice ,(ref :bib
+'torresarias2016:omitting).  For such a change to (potentially) go
+undetected, the attacker must choose commits that (1) are descendants of
+the introductory commit, and (2) satisfy the authorization invariant.
+In other words, the attacker may only divert users to a development
+branch published by the project developers.]))
+   
+   (chapter :title [Downgrade Attacks] :ident "downgrade"
+      
+      (p [An important threat for software deployment tools is
+,(emph [downgrade attacks]), also called ,(emph [roll-back])
+or ,(emph [replay]) attacks ,(ref :bib '(cappos2008:attacks 
kuppusamy2017:mercury)).
+The attack consists in tricking
+users into installing older, known-vulnerable software packages, which
+in turn may offer new ways to break into their system.  This is not
+strictly related to the authentication issue discussed above, but it is
+an important issue that is more easily addressed with this model in
+place.])
+
+      (p [Guix saves information about its own provenance—the Git
+commits of the channels used by ,(tt [guix pull]).  The ,(tt [guix
+describe]) command prints that information:]
+
+     (prog :line #f :class "inline" [
+$ guix describe
+Generation 201  Jan 12 2022 18:15:13    (current)
+  guix 0052c3b
+    repository URL: https://git.savannah.gnu.org/git/guix.git
+    branch: master
+    commit: 0052c3b0458fba32920a1cfb48b8311429f0d6b5
+])
+
+[In other words, the ,(tt [guix]) command being used was built
+from commit ,(tt [0052c3b…]) of the official Git repository.
+Once ,(tt [guix pull]) has retrieved the latest commit of the
+selected branch, it can thus verify that it is doing a ,(emph [fast-forward 
update]), in
+Git parlance—just like ,(tt [git pull]) does, but compared to the
+previously-deployed Guix.  A fast-forward update is when the new commit
+is a descendant of the current commit.  Going back to the figure above,
+going from commit ,(it [A]) to commit ,(it [F]) is a fast-forward update, but 
going
+from ,(it [F]) to ,(it [A]) or from ,(it [D]) to ,(it [E]) is not.])
+
+      (p [Doing a non-fast-forward update would mean that the user is
+deploying an older version of the Guix currently used, or deploying an
+unrelated version from another branch.  In both cases, the user is at
+risk of ending up installing older, vulnerable software.  By default
+,(tt [guix pull]) errors out on non-fast-forward updates, thereby
+protecting from roll-backs.  Users who understand the risks can override
+that by passing ,(tt [--allow-downgrades]).])
+
+      (p [This does not protect against all forms of ,(emph [branch
+teleport attacks]) as described by Torres-Arias ,(emph [et al.]) ,(ref
+:bib 'torresarias2016:omitting).  Specifically, an attacker with access
+to the server hosting the Git repository could modify the reference of
+the ,(tt [master]) branch so that it points to an existing development
+branch that derives from ,(tt [master]).  Users running ,(tt [guix
+pull]) would upgrade to that branch without problems—it is a
+fast-forward update.  Development branches are usually infrequently
+merged with ,(tt [master]) and do not receive package security updates
+very often; consequently this attack could lead users to install
+outdated packages ,(ref :bib 'devos2021:diverted).  Users may not notice
+the attack because, as long as the branch is active, ,(tt [guix pull])
+would still retrieve new changes.  However, it would be difficult to
+hide from developers, which makes the attack less attractive.])
+
+      (p [Downgrade prevention has been extended to system deployment.
+When deploying a system with ,(tt [guix system reconfigure]) or a fleet
+or systems with ,(tt [guix deploy]), the currently used channels are
+recorded in the deployed system, as can be seen by running ,(tt [guix
+system describe]):])
+       
+       (prog :line #f :class "inline" [
+$ guix system describe
+Generation 161 Apr 27 2021 22:04:13    (current)
+  file name: /var/guix/profiles/system-161-link
+  canonical file name: /gnu/store/dyx1j…-system
+  label: GNU with Linux-Libre 5.11.16
+  bootloader: grub-efi
+  root device: label: "root"
+  kernel: /gnu/store/k029d…-linux-libre-5.11.16/bzImage
+  channels:
+    guix:
+      repository URL: https://git.savannah.gnu.org/git/guix.git
+      branch: master
+      commit: d904abe0768293b2322dbf355b6e41d94e769d78
+  configuration file: /gnu/store/m8rql…-configuration.scm
+])
+       
+       (p [This is useful information for users who wish to map a deployed
+system back to its source code.  We take advantage of that information
+to implement ,(emph [system downgrade prevention]): like ,(tt [guix
+pull]), deploying a system with ,(tt [guix system reconfigure]) or ,(tt
+[guix deploy]) now fails with an error when attempting a
+non-fast-forward update.  To our knowledge, this is the first time
+downgrade prevention is implemented at this level.]))
+   
+   (chapter :title [Mirrors and the Risk of Staleness]
+      :ident "mirrors"
+
+      (p [For package managers, mirrors of the official repositories
+are a known security risk ,(ref :bib 'cappos2008:attacks).
+Authentication and roll-back prevention as described above allow users to 
safely
+refer to mirrors of the official Git repository of Guix.  If the official 
repository at
+,(tt [git.savannah.gnu.org]) is down, one can still update by fetching
+from a mirror, for instance with:]
+
+    (prog :line #f :class "inline" [
+guix pull --url=https://github.com/guix-mirror/guix
+])
+
+[If the repository at this URL is behind what the user already deployed,
+or if it’s not a genuine mirror, ,(tt [guix pull]) will abort.  In other
+cases, it will proceed.])
+
+      (p [Unfortunately, there is no way to answer the general question
+“,(it [is]) X ,(it [the latest commit of branch]) B,(it [?])”.
+Rollback detection prevents just that, rollbacks, but there is no
+mechanism in place to tell whether a given mirror is stale.  To mitigate
+that, channel authors can specify, in the repository, the channel’s
+,(emph [primary URL]).  This piece of information lives in the
+,(tt [.guix-channel]) file, in the channel’s repository, so it’s 
authenticated.  ,(tt
+[guix pull]) uses it to print a warning when the user pulls from a
+mirror:]
+
+    (prog :line #f :class "inline" [
+$ guix pull --url=https://github.com/guix-mirror/guix
+Updating channel 'guix' from 'https://github.com/guix-mirror/guix'...
+Authenticating channel 'guix', commits 9edb3f6 to 3e51f9e...
+guix pull: warning: pulled channel 'guix' from a mirror of 
+  https://git.savannah.gnu.org/git/guix.git, which might be stale
+
+Building from this channel:
+  guix      https://github.com/guix-mirror/guix 3e51f9e
+…
+]))
+
+      (p [Together with downgrade prevention, it makes it more difficult
+to trick users into getting stale revisions.]))
+
+   (chapter :title [Implementation]
+      :ident "implementation"
+      
+      (p [Channel authentication as described above is now used in
+production.  This section documents the reasoning behind some of the
+important implementation choices that we made and our early
+experience.])
+
+      (section :title [Commit Signatures]
+        
+        (p [We chose to use detached OpenPGP signatures on commits.
+This choice was not motivated by a belief that OpenPGP is the “right
+tool for the job”—on the contrary, its complexity, which is a result its
+broad and extensible specification ,(ref :bib
+'callas2007:rfc4880-openpgp), made it a poor candidate in our eyes.
+More focused options such as minisign ,(ref :bib
+'denis2021:minisign-web) looked more appealing.  However, we felt that
+the fact that OpenPGP commit signing is well-supported by Git,(footnote
+[As of this writing, Git tools only support OpenPGP, but work started in
+2021 to support cryptography tools other than OpenPGP/GnuPG ,(ref :bib
+'huseby2021:git-crypto).]) makes a significant practical difference:
+developers can easily be set up to sign commits with GnuPG and commands
+such as ,(tt [git log]) can verify and display signatures; ways to deal
+with OpenPGP keys and signatures, although complex, are also
+well-documented.])
+        
+        (p [Key distribution is an important issue.  We did not want
+the whole mechanism to lazily fetch public keys from key servers:
+this was bound to be unreliable and
+slow.  We instead chose to store keys inside the repository, as plain
+binary or ASCII-armored OpenPGP packets.  Our recommendation is to keep
+them in a dedicated branch to avoid cluttering regular branches (channel
+authors can include in the channel metadata the name of the branch where
+keys are to be found).  The authentication code loads keys in memory
+when it starts and looks them up for signature verification.  All the
+keys ever used to sign commits must be kept in the repository so that
+past commits can be authenticated.  Guix today has 81 ASCII-armored keys
+representing less than 2 MiB.  If needed, this could be reduced by
+removing unused OpenPGP packets from the keys, such as signature
+packets, and by storing them in binary format.]))
+
+     (section :title [Notes on SHA-1] :ident "sha1"
+
+       (p [We cannot really discuss Git commit signing without
+mentioning SHA-1.  The venerable crytographic hash function is
+approaching end of life, as evidenced by recent breakthroughs ,(ref :bib
+'(stevens2017:collision leurent2020:shambles)).  Signing a Git commit
+boils down to signing a SHA-1 hash, because all objects in the Git store
+are identified by their SHA-1 hash.])
+
+       (p [Git now relies on a collision attack detection library to
+mitigate practical attacks ,(ref :bib 'stevens2017:detection).
+Furthermore, the Git project is planning a hash function transition to
+address the problem ,(ref :bib 'git2021:hash-transition).])
+
+       (p [Some projects such as Bitcoin Core choose to not rely on
+SHA-1 at all.  Instead, for the commits they sign, they include in the
+commit log the SHA512 hash of the tree, which the verification scripts
+check ,(ref :bib 'bitcoin2021:verify-commits).  Computing a tree hash
+,(emph [for each commit]) in Guix would probably be prohibitively
+costly.  It also would not not address the fact that ,(emph [every]) Git
+object, not just trees but also commit objects and “blobs” (file
+contents), is SHA-1-addressed.  For now, for lack of a better solution,
+we rely on Git’s collision attack detection and look forward to Git’s
+transition to a more robust hash function.])
+
+       (p [As for SHA-1 in an OpenPGP context ,(ref :bib
+'callas2007:rfc4880-openpgp): our authentication code rejects SHA-1
+OpenPGP signatures, as recommended ,(ref :bib 'leurent2020:shambles).]))
+
+      (section :title [Performance]
+      
+        (p [The core idea, the authorization invariant, is simple to
+understand and its implementation can be relatively simple, too—a good
+property for security-sensitive code.  However, with more than a
+thousand commits pushed to Guix every month, users may often find
+themselves authenticating hundreds of commits when running ,(tt [guix
+pull]).  The implementation must be able to perform well.])
+      
+        (p [At the algorithmic level, the main optimization is to
+consider that, if a commit has been authenticated, then all the commits
+in its transitive closure are automatically considered authentic and do
+not need to be checked.  This optimization stems from the fact that the
+commit graph has integrity properties similar to that of a Merkle-style
+directed acyclic graph ,(ref :bib 'merkle1980:protocols).  The
+implementation takes advantage of it in two ways: by skipping commits
+that are in the transitive closure of the currently-used Guix commit,
+and by maintaining a per-user cache of previously-authenticated commits
+that can also be skipped.])
+        
+        (p [At the implementation level, two key decisions were made:
+verifying signatures in-process, and dismissing unnecessary OpenPGP
+features.  The go-to technique of spawning GnuPG and Git processes to
+verify each commit signature would have been prohibitively expensive.
+Instead, to traverse the Git commit graph, we use libgit2, a C library
+that implements the Git “protocols”, ,(it [via]) its Guile-Git bindings.])
+
+        (p [We also have an OpenPGP implementation for GNU Guile, the
+implementation language of Guix.  This OpenPGP implementation is limited
+to parsing the OpenPGP packets found in signatures and in keys, and to
+verifying signatures.  It does not implement the more complex OpenPGP
+features that are useless in this context, such as: key signatures and
+the web of trust, and key expiration and revocation.  Timestamps in
+OpenPGP signatures and expiry dates are easily forged; what matters in
+our context is the causality of commits: that a signature on a commit is
+valid and authorized.  Likewise, revocation makes little sense in this
+context; what matters is whether the authorization invariant holds.])
+
+        (p [On a recent x86_64 laptop (Intel i7 CPU at 2.6 GHz with
+data stored on a solid state device, SSD), our code authenticates
+between 600 and 700 commits per second.  There are currently between
+1,000 and 2,000 commits per month on average, so someone running ,(tt
+[guix pull]) once per month experiences a 2–3 second delay due to
+authentication.  This does not appear to be detrimental to the user
+experience.])
+
+        (p [Another performance aspect has to do with Git repository
+handling.  The mechanisms we devised for commit authentication and
+downgrade prevention assume the availability of a local copy of the Git
+repository, including its history.  The first time a user invokes ,(tt
+[guix pull]), the command clones the repository, downloading more than
+300 MiB—this can take minutes, much longer than the commit
+authentication phase.  Currently that operation performs a full clone,
+including the whole repository history, but it may be possible to
+optimize it by performing a ,(emph [shallow clone]), where only recent
+history is retrieved.  Subsequent runs are much faster and lightweight,
+as Git is able to download just what is missing from the local copy.]))
+      
+      (section :title [Generalization]
+        
+        (p [As explained in ,(numref :text [Section] :ident
+"rationale"), the general problem being solved — authenticating Git
+checkouts—is in no way specific to Guix, and the solution we devised may
+in fact be of interest to ,(emph [any]) Git user.  For this reason, and
+also to facilitate the work of Guix channel developers, we introduced a
+new command, separate from the channel machinery, to authenticate a Git
+checkout.  The command can be invoked on any Git repository, as in this
+example:])
+       
+        (prog :line #f :class "inline" [
+guix git authenticate \\
+  0c119db2ea86a389769f4d2b9c6f5c41c027e336 \\
+  "3CE4 6455 8A84 FDC6 9DB4  0CFB 090B 1199 3D9A EBB5"
+])
+       
+        (p [The command above authenticates the checkout in the current
+directory.  The arguments represent its ,(emph [introduction]): the
+introductory commit, and the fingerprint of the OpenPGP key used to sign
+that commit.  Additional options allow users to specify, for instance,
+the name of the branch where OpenPGP keys are to be found.])
+
+        (p [This command can also authenticate ,(emph [historical
+commits])—signed commits made ,(emph [before]) a ,(tt
+[.guix-authorizations]) file was introduced in the repository.  In that
+mode, users must provide an authorization file that represents the
+static set of authorizations for all those commits whose parent(s) lack
+,(tt [.guix-authorizations]).  We found it useful to retroactively 
authenticate the
+history of the Guix repository, where commit signing became compulsory
+several years before this authentication mechanism was in place.])
+        
+        (p [This interface is low-level and would benefit from
+simplifications.  For instance, repository introductions obtained by
+users could be gathered in a single place, once for all, such that users
+do not have to specify them every time.  Communicating introductions
+could also be simplified: the two twenty-byte strings above could be
+represented as a single 56-character base64 string, or as a QR code.
+For broad adoption, the best option would be to integrate the
+functionality in Git proper.]))
+
+      (section :title [Evaluation]
+        
+        (p [Channel authentication as described above has been deployed
+and used in production in Guix for more than a year, since June 2020.
+This has given us an informal but large-scale, “real-world” evaluation
+of this work.  When authentication support was integrated in production code,
+users who ran ,(tt [guix pull]) transparently obtained the new
+code, and all subsequent invocations of ,(tt [guix pull]) performed code
+authentication and downgrade prevention.  In almost two years, there was one
+incident where a committer mistakenly pushed a commit signed with an
+unauthorized key, which was immediately detected by anyone who attempted
+to run ,(tt [guix pull]); the offending commit was removed in
+minutes (with a ,(emph [hard reset]) to its parent commit, in Git
+parlance).  Such mistakes can be avoided by having a server-side hook
+running ,(tt [guix git authenticate]), but we did not have the ability
+to run such hooks at the time.])
+        
+        (p [Downgrade prevention has had a more visible impact, at
+least for advanced users with rather unusual workflows.  As an example,
+we have had reports of users who would pull to development or
+work-in-process branches, using ,(tt [guix pull --branch=,(it
+[devel])]), where ,(tt [devel]) is the name of the branch.  When trying
+to pull back to the main branch, ,(tt [guix pull]) would report an
+error saying that the target commit is “unrelated” to the source
+commit.  Indeed, because the development branch has not been merged into
+the main branch, the latest commit on the main branch is not a
+descendant of the latest commit on the development branch.  Since this
+mechanism is in production, we had only two reports from advanced users
+“surprised” that switching branches in such a way would trigger the
+downgrade prevention mechanism; these users were familiar with Git and
+understood that the mechanism rightfully protected them from a
+potential downgrade.])
+        
+        (p [System downgrade prevention has demonstrated its value.
+Since ,(tt [guix system reconfigure]) and ,(tt [guix deploy]) prevent
+downgrades, a system administrator cannot mistakenly reconfigure the system to 
an
+older or unrelated commit; this is particularly useful on systems
+administered by several people, where an administrator cannot
+“undo” the upgrade performed by another administrator.])
+        
+        (p [More importantly, checkout authentication together with
+system downgrade prevention enabled us to provide a trustworthy ,(emph
+[unattended upgrade]) service.  This functionality is typically depended
+on by server administrators.  The service periodically pulls and
+reconfigures the system.  With the guarantees Guix provides, the worst
+that can happen is that an upgrade does not take place.])
+        
+        (p [Since it became available, authors of Guix channels have
+been adopting authentication support without hassle.  Outside Guix, generalized
+authentication support offered by ,(tt [guix git authenticate]) has seen
+use in a few repositories.  We have yet to see
+broader adoption but we reckon that simplifying the interface may be a
+precondition, as explained above.])))
+      
+   (chapter :title [Related Work]
+      :ident "related"
+      
+      (p [A lot of work has gone into securing the software supply
+chain, often in the context of binary distributions, sometimes in a more
+general context; recent work also looks into Git authentication and
+related issues.  This section attempts to summarize how Guix relates to
+similar work that we are aware of in these two areas.])
+      
+      (p [,(bold [Package manager updates.])
+The Update Framework ,(ref :bib 'samuel2010:survivable) (TUF)
+is a reference for secure update systems, with a well-structured
+specification ,(ref :bib 'cappos2020:tuf-spec) and a number of
+implementations.  Many of its goals are shared by Guix.  Among the
+attacks TUF aims to protect against (Section 1.5.2 of the spec), the
+downgrade-prevention mechanism described in ,(numref :text [Section]
+:ident "downgrade") does not, ,(it [per se]), address ,(it [indefinite
+freeze attacks]) (more on that below).])
+      
+      (p [Mercury is a variant of TUF that intends to protect against
+downgrade attacks even in the face of compromised repositories ,(ref
+:bib 'kuppusamy2017:mercury).  Mercury focuses on package version
+strings to determine what constitutes a downgrade.  This is a
+restrictive definition of downgrade that relies on presumed conventions
+used by repository maintainers.  In contrast, looking at the package
+commit graph as described in ,(numref :text [Section] :ident
+"downgrade") allows us to capture the evolution of packages and of the
+distribution ,(emph [as a whole]).  However, while our approach requires
+users to download a complete copy of the Git repository, Mercury has
+much lower bandwidth requirements.])
+
+      (p [However, both in its goals and system descriptions, TUF is
+biased towards systems that distribute binaries as plain files with
+associated metadata.  That creates a fundamental impedance mismatch with
+the functional deployment model we described in ,(numref :text [Section]
+:ident "background").  As an example, attacks such as ,(emph
+[fast-forward attacks]) or ,(emph [mix-and-match attacks]) do not apply
+in the context of Guix; likewise, the ,(emph [repository]) depicted in
+Section 3 of the spec has little in common with a Git repository.  The
+spec also defines a notion of ,(emph [role]), but those roles do not
+match our distribution model.  With the authentication model described
+in ,(numref :text [Section] :ident "authenticating"), any authorized
+committer can touch any file; the model and the ,(tt
+[.guix-authorizations]) format leave room for per-file authorizations,
+which could be a way to define fine-grain roles in this context.])
+
+      (p [,(bold [Updates for source-based distributions.])
+The Nix package manager is “source-based” like Guix and
+distributes its package definitions as a Git repository.  It does not
+currently implement Git checkout authentication and secure updates.  A
+proposal requiring committers to sign commits was rejected, mainly for
+two reasons: (1) it would make it impossible to perform Git merges
+(accepting “pull requests”) from the GitHub web interface, and (2)
+GitHub is effectively considered a trusted third party ,(ref :bib
+'nixos2021:signed-commits).  Nix also features a newer and more
+decentralized mechanism to distribute packages called ,(emph [flakes]);
+flake authentication has been discussed but those discussions have not
+come to fruition yet ,(ref :bib 'dolstra2019:flake-auth).])
+      
+      (p [Other package managers have a similar setup: Brew updates its
+package repositories from GitHub, using Git, but without any particular
+mechanism to ensure checkout authenticity ,(ref :bib 'brew2022:github);
+likewise, CONDA-Forge, a set of Git repositories hosting package recipes
+for the CONDA package manager, does not offer any authentication
+mechanism ,(ref :bib 'condaforge2022:web), and FreeBSD Ports are in a
+similar situation ,(ref :bib 'freebsd2022:handbook).  The source-based
+pkgsrc tool, used on NetBSD, updates its set of package recipes using
+the CVS version control system, which allows neither for authentication
+nor for integrity checks ,(ref :bib 'pkgsrc2022:guide).])
+      
+      (p [Gentoo, a source-based GNU/Linux distribution, stores package
+definitions in a Git repository where commits are required to be signed
+by developers; the project maintains a separate list of currently
+authorized developer keys ,(ref :bib 'gentoo2022:portage-security).
+Because the list of authorized keys is separate, it is not clear how to
+verify whether a given commit is signed by a key that was authorized at
+the time of signature.  Another shortcoming is that the recommended
+method to update one’s local copy of the package repository is ,(emph
+[not]) Git but instead the rsync file synchronization protocol together
+with OpenPGP signatures of the files made with special-purpose a release
+key.])
+
+      (p [Developers of OPAM, the package manager for the OCaml
+language, adapted TUF for use with their Git-based package repository,
+later updated to write Conex ,(ref :bib 'mehnert2016:conex), a separate
+tool to authenticate OPAM repositories.  OPAM like Guix is a
+source-based distribution and its package repository is a Git repository
+containing “build recipe”.  To date, it appears that ,(tt [opam update])
+itself does not authenticate repositories though; it is up to users and
+developers to run Conex.])
+      
+      (p [,(bold [Supply chain integrity.])
+The in-toto framework ,(ref :bib 'torresarias2019:intoto) can be thought of as 
a
+generalization of TUF; it aims at ensuring the integrity of complete
+software supply chains, taking into accounts the different steps that
+comprise software supply chains in widespread use such as Debian’s.  In
+particular, it focuses on ,(emph [artifact flow integrity])—that
+artifacts created by a step cannot be altered before the next step.])
+
+      (p [Thanks the functional deployment model, Guix has end-to-end
+control over artifact flow, from source code to binaries delivered to
+users.  Complete provenance tracking gives anyone the ability to ,(emph
+[verify]) the source-to-binary mapping, or to simply not use the
+project’s official binaries, as discussed in ,(numref :text [Section]
+:ident "background").  Conversely, in-toto’s approach to artifact flow
+integrity assumes a relative disconnect between steps that makes
+verification hard in the first place.  In a sense, in-toto addresses
+non-verifiability through attestation.  SLSA ,(ref :bib
+'google2021:slsa) and sigstore ,(ref :bib 'sigstore2021:web) take a
+similar approach, insisting on certification rather than allowing
+independent verification of each step.])
+
+      (figure :legend [GitHub’s Web interface showing commit
+verification statuses.]
+        :ident "fig-github-verification"
+        (image :file "images/github-verification-statuses.png"))
+
+      (p [,(bold [Git authentication.])
+While signed Git commits (and tags) are becoming more common
+and generally seen as good practice, we are not aware of other tools or
+protocols to support off-line Git checkout authentication.  Recently,
+as illustrated in ,(numref :ident "fig-github-verification" :text [Figure]),
+hosting platforms such as GitHub and GitLab started displaying a
+“verified” tag next to commits signed with the OpenPGP key of the person
+who pushed them or that of their author—a very limited verification that
+may give a false sense of security
+,(ref :bib '(github2021:verify-commits gitlab2021:verify-commits)).
+This mechanism depends on out-of-band data (keys associated with user
+accounts) and does not permit off-line checks; it also lacks a notion of
+authorization.  Furthermore, commits made ,(it [via]) the Web interface
+are signed by the platform itself, which makes it a single point of
+trust of every hosted project.])
+
+      (p [Earlier work focuses on the impact of malicious modifications
+to Git repository meta-data ,(ref :bib "torresarias2016:omitting").  An
+attacker with access to the repository can modify, for instance, branch
+references, to cause a rollback attack or a “teleport” attack, causing
+users to pull an older commit or an unrelated commit.  As written above,
+,(tt [guix pull]) would detect such attacks.  However, ,(tt [guix pull])
+would fail to detect cases where metadata modification does not yield a
+rollback or teleport, yet gives users a different view than the intended
+one—for instance, a user is directed to an authentic but different
+branch rather than the intended one ,(ref :bib 'devos2021:diverted).  This 
potentially allows for ,(it
+[indefinite freeze attacks]), though these would likely be quickly
+detected.  The “secure push” operation and the associated ,(emph
+[reference state log]) (RSL) the authors propose would be an
+improvement.]))
+
+   (chapter :title [Conclusion]
+      :ident "conclusion"
+      
+      (p [The update authentication mechanism described in this article
+was deployed more than a year ago.  Users updating with ,(tt [guix
+pull]) may have noticed a new progress bar while commits are being
+authenticated.  Apart from that, the change was transparent and our
+experience so far has been positive.  The authentication mechanism is
+built around the Git commit graph; it is a mechanism to ,(emph
+[authenticate Git checkouts]) and in that sense it is not tied to Guix
+and its application domain.  To our knowledge, this is the first
+client-side-only Git update authentication mechanism in use.])
+      
+      (p [Guix records the commits of channels used to deploy a set of
+packages or even a complete operating system.  We took advantage of
+that, together with knowledge of the commit graph of these channels, to
+prevent downgrade attacks—both when running ,(tt [guix pull]) and when
+deploying the operating system, which is another distinguishing
+feature.])
+
+      (p [The security of the software supply chain as managed by Guix
+relies on: auditability (every piece of software is built from source),
+verifiability (the functional model and reproducible builds make it easy
+to (re)build binaries and check whether they match the source), and
+secure updates (users updating Guix can only get genuine code vetted by
+the project).  We think this is a solid foundation that addresses common
+software supply chain issues at their core.  This is a radical, novel
+approach in a time where most related work focuses on certifying each
+link of the supply chain as opposed to ensuring verifiability.])
+
+      (p [The security of free operating systems of course also depends
+on the security of the upstream software packages being distributed.  We
+hope our Git authentication model and/or tool can find its way in
+upstream development workflows.  This would address one of the
+weakest points in today’s development practices.])
+
+      #;(p [To bootstrap trust, we added the notion of ,(it [channel
+introductions]).  These are now visible in the user interface, in
+particular in the output of ,(tt [guix describe]) and in the
+configuration file of ,(tt [guix pull]) and ,(tt [guix time-machine]).
+While channel configuration remains a few lines of code that users
+typically paste, this extra bit of configuration might be intimidating.
+This gives us an incentive to facilitate the handling of channels and
+channel introductions, be it through a compact representation of these
+of ,(it [via]) improvements to the user interface.])
+
+      #;(p [The solution here is built around the assumption that Guix is
+fundamentally a source-based distribution, and is thus completely
+orthogonal to the public key infrastructure (PKI) Guix uses for the
+signature of substitutes.  Yet, the substitute PKI could probably
+benefit from the fact that we now have a secure update mechanism for the
+Guix source code: since ,(tt [guix pull]) can securely retrieve a new
+substitute signing key, perhaps it could somehow handle substitute
+signing key revocation and delegation automatically?  Related to that,
+channels could perhaps advertise a substitute URL and its signing key,
+possibly allowing users to register those when they first pull from the
+channel.  All this requires more thought, but it looks like there are
+new opportunities here.]))
+   
+   (references))
+   
+;;; Local Variables:
+;;; coding: utf-8
+;;; ispell-local-dictionary: "american"
+;;; compile-command: "make supply-chain.pdf"
+;;; comment-start: ";;"
+;;; End:



reply via email to

[Prev in Thread] Current Thread [Next in Thread]