guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

39/127: gnu: rust-ring-0.16: Generate more bundled files.


From: guix-commits
Subject: 39/127: gnu: rust-ring-0.16: Generate more bundled files.
Date: Tue, 26 Dec 2023 06:42:37 -0500 (EST)

efraim pushed a commit to branch rust-team
in repository guix.

commit 12d566046180eb8c06bb9bdd1c76cc3c427fded2
Author: Efraim Flashner <efraim@flashner.co.il>
AuthorDate: Sun Dec 24 12:56:49 2023 +0200

    gnu: rust-ring-0.16: Generate more bundled files.
    
    * gnu/packages/crates-io.scm (rust-ring-0.16-sources): New variable.
    (rust-ring-0.16)[source]: Use rust-ring-0.16-sources.  Drop patches and
    snippet.
    [arguments]: Remove custom phase generating curve25519 tables.
    [native-inputs]: Remove field.
    (rust-rustls-0.20)[native-inputs]: Remove field.
    * gnu/packages/rust-apps.scm (agate, alfis, maturin, rust-cargo-edit,
    sniffglue, spotifyd, tealdeer)[native-inputs]: Remove perl.
    * gnu/packages/patches/rust-ring-0.16-missing-files.patch,
    gnu/packages/patches/rust-ring-0.16-test-files.patch: Remove files.
    * gnu/local.mk (dist_patch_DATA): Remove them.
    
    Change-Id: I919207b6aacab78602ae18123ab345a34b00863f
---
 gnu/local.mk                                       |    2 -
 gnu/packages/crates-io.scm                         |  213 +-
 .../patches/rust-ring-0.16-missing-files.patch     | 2293 --------------------
 .../patches/rust-ring-0.16-test-files.patch        |   54 -
 gnu/packages/rust-apps.scm                         |   15 +-
 5 files changed, 183 insertions(+), 2394 deletions(-)

diff --git a/gnu/local.mk b/gnu/local.mk
index 900e3b8189..4842f9b49f 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1968,8 +1968,6 @@ dist_patch_DATA =                                         
\
   %D%/packages/patches/rustc-1.54.0-src.patch                  \
   %D%/packages/patches/rust-1.64-fix-riscv64-bootstrap.patch   \
   %D%/packages/patches/rust-1.70-fix-rustix-build.patch        \
-  %D%/packages/patches/rust-ring-0.16-missing-files.patch      \
-  %D%/packages/patches/rust-ring-0.16-test-files.patch         \
   %D%/packages/patches/rust-ring-0.17-ring-core.patch          \
   %D%/packages/patches/i3status-rust-enable-unstable-features.patch    \
   %D%/packages/patches/rust-ndarray-remove-blas-src-dep.patch  \
diff --git a/gnu/packages/crates-io.scm b/gnu/packages/crates-io.scm
index 4bfd90734d..5c2aa03d13 100644
--- a/gnu/packages/crates-io.scm
+++ b/gnu/packages/crates-io.scm
@@ -64132,33 +64132,187 @@ Digital Signature Algorithm} (ECDSA).")
     (description "This package provided safe, fast, small crypto using Rust.")
     (license (list license:isc license:openssl))))
 
+(define rust-ring-0.16-sources
+  (let* ((version "0.16.20")
+         (upstream-source
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                    (url "https://github.com/briansmith/ring";)
+                    (commit "9cc0d45f4d8521f467bb3a621e74b1535e118188")))
+             (file-name (git-file-name "rust-ring" version))
+             (sha256
+              (base32 
"1aps05i5308ka03968glnnqr4kdkk2x4ghlg5vrqhl78jm6ivvby")))))
+    (origin
+      (method computed-origin-method)
+      (file-name (string-append "rust-ring-" version ".tar.gz"))
+      (sha256 #f)
+      (uri
+        (delay
+          (with-imported-modules '((guix build utils))
+            #~(begin
+                (use-modules (guix build utils))
+                (set-path-environment-variable
+                  "PATH" '("bin")
+                  (list #+(canonical-package gzip)
+                        #+(canonical-package tar)
+                        #+perl
+                        #+nasm
+                        #+go
+                        #+clang             ; clang-format
+                        #+python2-minimal))
+                (setenv "HOME" (getcwd))
+                (copy-recursively #+upstream-source
+                                  (string-append "ring-" #$version))
+                (with-directory-excursion (string-append "ring-" #$version)
+                  (begin
+                    ;; It turns out Guix's nasm works just fine here.
+                    (substitute* "build.rs"
+                      (("./target/tools/nasm") "nasm"))
+                    ;; Files which would be deleted in a snippet:
+                    (delete-file "crypto/curve25519/curve25519_tables.h")
+                    (delete-file "crypto/fipsmodule/ec/ecp_nistz256_table.inl")
+                    ;; Files to be generated in the sources:
+                    (format #t "Generating the missing files ...~%")
+                    (force-output)
+                    (with-directory-excursion "crypto/curve25519"
+                      (with-output-to-file "curve25519_tables.h"
+                        (lambda _ (invoke "python" 
"make_curve25519_tables.py"))))
+                    (with-directory-excursion "crypto/fipsmodule/ec"
+                      (with-output-to-file "ecp_nistz256_table.inl"
+                        (lambda _ (invoke "go" "run" 
"make_p256-x86_64-table.go"))))
+                    (format #t "Generating the pregenerated files ...~%")
+                    (force-output)
+                    (mkdir-p "pregenerated/tmp")
+
+                    ;; We generate all the files which upstream would normally 
be
+                    ;; generate by using '(cd pregenerate_asm && cargo clean &&
+                    ;; cargo build) 
./pregenerate_asm/target/debug/pregenerate_asm'
+                    ;; in order to not include a dependency on cargo when
+                    ;; generating the sources.
+                    (define (prefix script)
+                      (string-append
+                        "pregenerated/"
+                        (string-drop-right
+                          (string-drop script
+                                       (string-index-right script #\/)) 3)))
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "perl" script "elf"
+                                (string-append (prefix script) "-elf.S"))
+                        (invoke "perl" script "macosx"
+                                (string-append (prefix script) "-macosx.S"))
+                        (invoke "perl" script "nasm"
+                                (string-append
+                                  "pregenerated/tmp/"
+                                  (string-drop (prefix script) 13) 
"-nasm.asm")))
+                      '("crypto/fipsmodule/aes/asm/aesni-x86_64.pl"
+                        "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"
+                        "crypto/fipsmodule/bn/asm/x86_64-mont.pl"
+                        "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"
+                        "crypto/chacha/asm/chacha-x86_64.pl"
+                        "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"
+                        "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"
+                        "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"
+                        "crypto/fipsmodule/sha/asm/sha512-x86_64.pl"
+                        "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"))
+
+                    (invoke "perl" "crypto/fipsmodule/sha/asm/sha512-x86_64.pl"
+                            "elf" "pregenerated/sha256-x86_64-elf.S")
+
+                    (invoke "perl" "crypto/fipsmodule/sha/asm/sha512-x86_64.pl"
+                            "macosx" "pregenerated/sha256-x86_64-macosx.S")
+
+                    (invoke "perl" "crypto/fipsmodule/sha/asm/sha512-x86_64.pl"
+                            "nasm" "pregenerated/tmp/sha256-x86_64-nasm.asm")
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "nasm" "-o" (string-append (prefix script) 
"obj")
+                                "-f" "win64" "-Xgnu" "-gcv8" script))
+                    (find-files "pregenerated/tmp" "\\.asm"))
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "perl" script "ios64"
+                                (string-append (prefix script) "-ios64.S"))
+                        (invoke "perl" script "linux64"
+                                (string-append (prefix script) "-linux64.S")))
+                      '("crypto/fipsmodule/aes/asm/aesv8-armx.pl"
+                        "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"
+                        "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"
+                        "crypto/fipsmodule/bn/asm/armv8-mont.pl"
+                        "crypto/chacha/asm/chacha-armv8.pl"
+                        "crypto/fipsmodule/ec/asm/ecp_nistz256-armv8.pl"
+                        "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"
+                        "crypto/fipsmodule/sha/asm/sha512-armv8.pl"))
+
+                    (invoke "perl" "crypto/fipsmodule/sha/asm/sha512-armv8.pl"
+                            "ios64" "pregenerated/sha256-armv8-ios64.S")
+
+                    (invoke "perl" "crypto/fipsmodule/sha/asm/sha512-armv8.pl"
+                            "linux64" "pregenerated/sha256-armv8-linux64.S")
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "perl" script "elf"
+                                "-fPIC" "-DOPENSSL_IA32_SSE2"
+                                (string-append (prefix script) "-elf.S"))
+                        (invoke "perl" script "macosx"
+                                "-fPIC" "-DOPENSSL_IA32_SSE2"
+                                (string-append (prefix script) "-macosx.S"))
+                        (invoke "perl" script "win32n"
+                                "-fPIC" "-DOPENSSL_IA32_SSE2"
+                                (string-append
+                                  "pregenerated/tmp/"
+                                  (string-drop (prefix script) 13) 
"-win32n.asm")))
+                      '("crypto/fipsmodule/aes/asm/aesni-x86.pl"
+                        "crypto/fipsmodule/aes/asm/vpaes-x86.pl"
+                        "crypto/fipsmodule/bn/asm/x86-mont.pl"
+                        "crypto/chacha/asm/chacha-x86.pl"
+                        "crypto/fipsmodule/ec/asm/ecp_nistz256-x86.pl"
+                        "crypto/fipsmodule/modes/asm/ghash-x86.pl"))
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "nasm" "-o" (string-append (prefix script) 
"obj")
+                                "-f" "win32" "-Xgnu" "-gcv8" script))
+                    (find-files "pregenerated/tmp" "-win32n\\.asm"))
+
+                    (for-each
+                      (lambda (script)
+                        (invoke "perl" script "ios32"
+                                (string-append (prefix script) "-ios32.S"))
+                        (invoke "perl" script "linux32"
+                                (string-append (prefix script) "-linux32.S")))
+                      '("crypto/fipsmodule/aes/asm/aesv8-armx.pl"
+                        "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"
+                        "crypto/fipsmodule/aes/asm/bsaes-armv7.pl"
+                        "crypto/fipsmodule/aes/asm/vpaes-armv7.pl"
+                        "crypto/fipsmodule/bn/asm/armv4-mont.pl"
+                        "crypto/chacha/asm/chacha-armv4.pl"
+                        "crypto/fipsmodule/ec/asm/ecp_nistz256-armv4.pl"
+                        "crypto/fipsmodule/modes/asm/ghash-armv4.pl"
+                        "crypto/fipsmodule/sha/asm/sha256-armv4.pl"
+                        "crypto/fipsmodule/sha/asm/sha512-armv4.pl"))
+
+                    (format #t "Creating the tarball ...~%")
+                    (force-output)
+                    ;; The other option is to use cargo package --allow-dirty
+                    (with-directory-excursion "../"
+                      (invoke "tar" "czf" #$output
+                              ;; avoid non-determinism in the archive
+                              "--sort=name" "--mtime=@0"
+                              "--owner=root:0" "--group=root:0"
+                              (string-append "ring-" #$version))))))))))))
+
 (define-public rust-ring-0.16
   (package
     (inherit rust-ring-0.17)
     (name "rust-ring")
     (version "0.16.20")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (crate-uri "ring" version))
-       (file-name (string-append name "-" version ".tar.gz"))
-       (sha256
-        (base32 "1z682xp7v38ayq9g9nkbhhfpj6ygralmlx7wdmsfv8rnw99cylrh"))
-       (patches (search-patches "rust-ring-0.16-missing-files.patch"
-                                "rust-ring-0.16-test-files.patch"))
-       (modules '((guix build utils)))
-       (snippet
-        '(begin
-           (delete-file-recursively "pregenerated")
-           ;; Regenerating the curve25519_tables requires python2 and 
clang-format.
-           ;; Luckily we've added the script back in the patch.
-           ;; Rust doesn't provide a clear way to regenerate files located in
-           ;; source directories, so for now we don't remove the file here.
-           ;(delete-file "crypto/curve25519/curve25519_tables.h")
-           ;; Pretend this isn't a relase tarball.
-           (with-output-to-file ".git"
-             (lambda _
-                (format #t "")))))))
+    (source rust-ring-0.16-sources)
     (arguments
      `(#:cargo-inputs
        (("rust-libc" ,rust-libc-0.2)
@@ -64171,17 +64325,7 @@ Digital Signature Algorithm} (ECDSA).")
         ("rust-cc" ,rust-cc-1))
        #:cargo-development-inputs
        (("rust-libc" ,rust-libc-0.2)
-        ("rust-wasm-bindgen-test" ,rust-wasm-bindgen-test-0.3))
-       #:phases
-       (modify-phases %standard-phases
-         (add-after 'unpack 'generate-curve25519-tables
-           (lambda _
-             (with-directory-excursion "crypto/curve25519"
-               (with-output-to-file "curve25519_tables.h"
-                 (lambda _
-                   (invoke "python" "make_curve25519_tables.py")))))))))
-    (native-inputs
-     (list clang perl python-2))
+        ("rust-wasm-bindgen-test" ,rust-wasm-bindgen-test-0.3))))
     ;; For a mostly complete list of supported systems see:
     ;; 
https://github.com/briansmith/ring/blob/main/.github/workflows/ci.yml#L170
     (supported-systems (list "aarch64-linux" "armhf-linux"
@@ -67028,8 +67172,7 @@ rustc compiler.")
         ("rust-env-logger" ,rust-env-logger-0.9)
         ("rust-log" ,rust-log-0.4)
         ("rust-rustls-pemfile" ,rust-rustls-pemfile-1)
-        ("rust-webpki-roots" ,rust-webpki-roots-0.22))))
-    (native-inputs (list perl))))
+        ("rust-webpki-roots" ,rust-webpki-roots-0.22))))))
 
 (define-public rust-rustls-0.19
   (package
diff --git a/gnu/packages/patches/rust-ring-0.16-missing-files.patch 
b/gnu/packages/patches/rust-ring-0.16-missing-files.patch
deleted file mode 100644
index fa2f94a801..0000000000
--- a/gnu/packages/patches/rust-ring-0.16-missing-files.patch
+++ /dev/null
@@ -1,2293 +0,0 @@
-These 4 files exist in the git repository for rust-ring, and are from
-the same commit where 0.16.20 is taken from. They were not added to the
-include list in Cargo.toml, so they were not added to the tarball.
-
----
- crypto/curve25519/make_curve25519_tables.py   | 222 +++++
- crypto/fipsmodule/aes/asm/vpaes-armv7.pl      | 896 ++++++++++++++++++
- crypto/fipsmodule/aes/asm/vpaes-armv8.pl      | 837 ++++++++++++++++
- .../fipsmodule/modes/asm/ghash-neon-armv8.pl  | 294 ++++++
- 4 files changed, 2249 insertions(+)
- create mode 100755 crypto/curve25519/make_curve25519_tables.py
- create mode 100644 crypto/fipsmodule/aes/asm/vpaes-armv7.pl
- create mode 100755 crypto/fipsmodule/aes/asm/vpaes-armv8.pl
- create mode 100644 crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-
-diff --git a/crypto/curve25519/make_curve25519_tables.py 
b/crypto/curve25519/make_curve25519_tables.py
-new file mode 100755
-index 0000000..50dee2a
---- /dev/null
-+++ b/crypto/curve25519/make_curve25519_tables.py
-@@ -0,0 +1,222 @@
-+#!/usr/bin/env python
-+# coding=utf-8
-+# Copyright (c) 2020, Google Inc.
-+#
-+# Permission to use, copy, modify, and/or distribute this software for any
-+# purpose with or without fee is hereby granted, provided that the above
-+# copyright notice and this permission notice appear in all copies.
-+#
-+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-+
-+import StringIO
-+import subprocess
-+
-+# Base field Z_p
-+p = 2**255 - 19
-+
-+def modp_inv(x):
-+    return pow(x, p-2, p)
-+
-+# Square root of -1
-+modp_sqrt_m1 = pow(2, (p-1) // 4, p)
-+
-+# Compute corresponding x-coordinate, with low bit corresponding to
-+# sign, or return None on failure
-+def recover_x(y, sign):
-+    if y >= p:
-+        return None
-+    x2 = (y*y-1) * modp_inv(d*y*y+1)
-+    if x2 == 0:
-+        if sign:
-+            return None
-+        else:
-+            return 0
-+
-+    # Compute square root of x2
-+    x = pow(x2, (p+3) // 8, p)
-+    if (x*x - x2) % p != 0:
-+        x = x * modp_sqrt_m1 % p
-+    if (x*x - x2) % p != 0:
-+        return None
-+
-+    if (x & 1) != sign:
-+        x = p - x
-+    return x
-+
-+# Curve constant
-+d = -121665 * modp_inv(121666) % p
-+
-+# Base point
-+g_y = 4 * modp_inv(5) % p
-+g_x = recover_x(g_y, 0)
-+
-+# Points are represented as affine tuples (x, y).
-+
-+def point_add(P, Q):
-+    x1, y1 = P
-+    x2, y2 = Q
-+    x3 = ((x1*y2 + y1*x2) * modp_inv(1 + d*x1*x2*y1*y2)) % p
-+    y3 = ((y1*y2 + x1*x2) * modp_inv(1 - d*x1*x2*y1*y2)) % p
-+    return (x3, y3)
-+
-+# Computes Q = s * P
-+def point_mul(s, P):
-+    Q = (0, 1)  # Neutral element
-+    while s > 0:
-+        if s & 1:
-+            Q = point_add(Q, P)
-+        P = point_add(P, P)
-+        s >>= 1
-+    return Q
-+
-+def to_bytes(x):
-+    ret = bytearray(32)
-+    for i in range(len(ret)):
-+        ret[i] = x % 256
-+        x >>= 8
-+    assert x == 0
-+    return ret
-+
-+def to_ge_precomp(P):
-+    # typedef struct {
-+    #   fe_loose yplusx;
-+    #   fe_loose yminusx;
-+    #   fe_loose xy2d;
-+    # } ge_precomp;
-+    x, y = P
-+    return ((y + x) % p, (y - x) % p, (x * y * 2 * d) % p)
-+
-+def to_base_25_5(x):
-+    limbs = (26, 25, 26, 25, 26, 25, 26, 25, 26, 25)
-+    ret = []
-+    for l in limbs:
-+        ret.append(x & ((1<<l) - 1))
-+        x >>= l
-+    assert x == 0
-+    return ret
-+
-+def to_base_51(x):
-+    ret = []
-+    for _ in range(5):
-+        ret.append(x & ((1<<51) - 1))
-+        x >>= 51
-+    assert x == 0
-+    return ret
-+
-+def to_literal(x):
-+    ret = "{{\n#if defined(BORINGSSL_CURVE25519_64BIT)\n"
-+    ret += ", ".join(map(str, to_base_51(x)))
-+    ret += "\n#else\n"
-+    ret += ", ".join(map(str, to_base_25_5(x)))
-+    ret += "\n#endif\n}}"
-+    return ret
-+
-+def main():
-+    d2 = (2 * d) % p
-+
-+    small_precomp = bytearray()
-+    for i in range(1, 16):
-+        s = (i&1) | ((i&2) << (64-1)) | ((i&4) << (128-2)) | ((i&8) << 
(192-3))
-+        P = point_mul(s, (g_x, g_y))
-+        small_precomp += to_bytes(P[0])
-+        small_precomp += to_bytes(P[1])
-+
-+    large_precomp = []
-+    for i in range(32):
-+        large_precomp.append([])
-+        for j in range(8):
-+            P = point_mul((j + 1) << (i * 8), (g_x, g_y))
-+            large_precomp[-1].append(to_ge_precomp(P))
-+
-+    bi_precomp = []
-+    for i in range(8):
-+        P = point_mul(2*i + 1, (g_x, g_y))
-+        bi_precomp.append(to_ge_precomp(P))
-+
-+
-+    buf = StringIO.StringIO()
-+    buf.write("""/* Copyright (c) 2020, Google Inc.
-+ *
-+ * Permission to use, copy, modify, and/or distribute this software for any
-+ * purpose with or without fee is hereby granted, provided that the above
-+ * copyright notice and this permission notice appear in all copies.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
ACTION
-+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-+
-+// This file is generated from
-+//    ./make_curve25519_tables.py > curve25519_tables.h
-+
-+
-+static const fe d = """)
-+    buf.write(to_literal(d))
-+    buf.write(""";
-+
-+static const fe sqrtm1 = """)
-+    buf.write(to_literal(modp_sqrt_m1))
-+    buf.write(""";
-+
-+static const fe d2 = """)
-+    buf.write(to_literal(d2))
-+    buf.write(""";
-+
-+#if defined(OPENSSL_SMALL)
-+
-+// This block of code replaces the standard base-point table with a much 
smaller
-+// one. The standard table is 30,720 bytes while this one is just 960.
-+//
-+// This table contains 15 pairs of group elements, (x, y), where each field
-+// element is serialised with |fe_tobytes|. If |i| is the index of the group
-+// element then consider i+1 as a four-bit number: (i₀, i₁, i₂, i₃) (where i₀
-+// is the most significant bit). The value of the group element is then:
-+// (i₀×2^192 + i₁×2^128 + i₂×2^64 + i₃)G, where G is the generator.
-+static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {""")
-+    for i, b in enumerate(small_precomp):
-+        buf.write("0x%02x, " % b)
-+    buf.write("""
-+};
-+
-+#else
-+
-+// k25519Precomp[i][j] = (j+1)*256^i*B
-+static const ge_precomp k25519Precomp[32][8] = {
-+""")
-+    for child in large_precomp:
-+        buf.write("{\n")
-+        for val in child:
-+            buf.write("{\n")
-+            for term in val:
-+                buf.write(to_literal(term) + ",\n")
-+            buf.write("},\n")
-+        buf.write("},\n")
-+    buf.write("""};
-+
-+#endif  // OPENSSL_SMALL
-+
-+// Bi[i] = (2*i+1)*B
-+static const ge_precomp Bi[8] = {
-+""")
-+    for val in bi_precomp:
-+        buf.write("{\n")
-+        for term in val:
-+                buf.write(to_literal(term) + ",\n")
-+        buf.write("},\n")
-+    buf.write("""};
-+""")
-+
-+    proc = subprocess.Popen(["clang-format"], stdin=subprocess.PIPE)
-+    proc.communicate(buf.getvalue())
-+
-+if __name__ == "__main__":
-+    main()
-diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv7.pl 
b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
-new file mode 100644
-index 0000000..d36a97a
---- /dev/null
-+++ b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
-@@ -0,0 +1,896 @@
-+#! /usr/bin/env perl
-+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+
-+######################################################################
-+## Constant-time SSSE3 AES core implementation.
-+## version 0.1
-+##
-+## By Mike Hamburg (Stanford University), 2009
-+## Public domain.
-+##
-+## For details see http://shiftleft.org/papers/vector_aes/ and
-+## http://crypto.stanford.edu/vpaes/.
-+##
-+######################################################################
-+# Adapted from the original x86_64 version and <appro@openssl.org>'s ARMv8
-+# version.
-+#
-+# armv7, aarch64, and x86_64 differ in several ways:
-+#
-+# * x86_64 SSSE3 instructions are two-address (destination operand is also a
-+#   source), while NEON is three-address (destination operand is separate from
-+#   two sources).
-+#
-+# * aarch64 has 32 SIMD registers available, while x86_64 and armv7 have 16.
-+#
-+# * x86_64 instructions can take memory references, while ARM is a load/store
-+#   architecture. This means we sometimes need a spare register.
-+#
-+# * aarch64 and x86_64 have 128-bit byte shuffle instructions (tbl and 
pshufb),
-+#   while armv7 only has a 64-bit byte shuffle (vtbl).
-+#
-+# This means this armv7 version must be a mix of both aarch64 and x86_64
-+# implementations. armv7 and aarch64 have analogous SIMD instructions, so we
-+# base the instructions on aarch64. However, we cannot use aarch64's register
-+# allocation. x86_64's register count matches, but x86_64 is two-address.
-+# vpaes-armv8.pl already accounts for this in the comments, which use
-+# three-address AVX instructions instead of the original SSSE3 ones. We base
-+# register usage on these comments, which are preserved in this file.
-+#
-+# This means we do not use separate input and output registers as in aarch64 
and
-+# cannot pin as many constants in the preheat functions. However, the 
load/store
-+# architecture means we must still deviate from x86_64 in places.
-+#
-+# Next, we account for the byte shuffle instructions. vtbl takes 64-bit source
-+# and destination and 128-bit table. Fortunately, armv7 also allows addressing
-+# upper and lower halves of each 128-bit register. The lower half of q{N} is
-+# d{2*N}. The upper half is d{2*N+1}. Instead of the following non-existent
-+# instruction,
-+#
-+#     vtbl.8 q0, q1, q2   @ Index each of q2's 16 bytes into q1. Store in q0.
-+#
-+# we write:
-+#
-+#     vtbl.8 d0, q1, d4   @ Index each of d4's 8 bytes into q1. Store in d0.
-+#     vtbl.8 d1, q1, d5   @ Index each of d5's 8 bytes into q1. Store in d1.
-+#
-+# For readability, we write d0 and d1 as q0#lo and q0#hi, respectively and
-+# post-process before outputting. (This is adapted from ghash-armv4.pl.) Note,
-+# however, that destination (q0) and table (q1) registers may no longer match.
-+# We adjust the register usage from x86_64 to avoid this. (Unfortunately, the
-+# two-address pshufb always matched these operands, so this is common.)
-+#
-+# This file also runs against the limit of ARMv7's ADR pseudo-instruction. ADR
-+# expands to an ADD or SUB of the pc register to find an address. That 
immediate
-+# must fit in ARM's encoding scheme: 8 bits of constant and 4 bits of 
rotation.
-+# This means larger values must be more aligned.
-+#
-+# ARM additionally has two encodings, ARM and Thumb mode. Our assembly files 
may
-+# use either encoding (do we actually need to support this?). In ARM mode, the
-+# distances get large enough to require 16-byte alignment. Moving constants
-+# closer to their use resolves most of this, but common constants in
-+# _vpaes_consts are used by the whole file. Affected ADR instructions must be
-+# placed at 8 mod 16 (the pc register is 8 ahead). Instructions with this
-+# constraint have been commented.
-+#
-+# For details on ARM's immediate value encoding scheme, see
-+# https://alisdair.mcdiarmid.org/arm-immediate-value-encoding/
-+#
-+# Finally, a summary of armv7 and aarch64 SIMD syntax differences:
-+#
-+# * armv7 prefixes SIMD instructions with 'v', while aarch64 does not.
-+#
-+# * armv7 SIMD registers are named like q0 (and d0 for the half-width ones).
-+#   aarch64 names registers like v0, and denotes half-width operations in an
-+#   instruction suffix (see below).
-+#
-+# * aarch64 embeds size and lane information in register suffixes. v0.16b is
-+#   16 bytes, v0.8h is eight u16s, v0.4s is four u32s, and v0.2d is two u64s.
-+#   armv7 embeds the total size in the register name (see above) and the size 
of
-+#   each element in an instruction suffix, which may look like vmov.i8,
-+#   vshr.u8, or vtbl.8, depending on instruction.
-+
-+use strict;
-+
-+my $flavour = shift;
-+my $output;
-+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
-+my $dir=$1;
-+my $xlate;
-+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+die "can't locate arm-xlate.pl";
-+
-+open OUT,"| \"$^X\" $xlate $flavour $output";
-+*STDOUT=*OUT;
-+
-+my $code = "";
-+
-+$code.=<<___;
-+.syntax       unified
-+
-+.arch armv7-a
-+.fpu  neon
-+
-+#if defined(__thumb2__)
-+.thumb
-+#else
-+.code 32
-+#endif
-+
-+.text
-+
-+.type _vpaes_consts,%object
-+.align        7       @ totally strategic alignment
-+_vpaes_consts:
-+.Lk_mc_forward:       @ mc_forward
-+      .quad   0x0407060500030201, 0x0C0F0E0D080B0A09
-+      .quad   0x080B0A0904070605, 0x000302010C0F0E0D
-+      .quad   0x0C0F0E0D080B0A09, 0x0407060500030201
-+      .quad   0x000302010C0F0E0D, 0x080B0A0904070605
-+.Lk_mc_backward:@ mc_backward
-+      .quad   0x0605040702010003, 0x0E0D0C0F0A09080B
-+      .quad   0x020100030E0D0C0F, 0x0A09080B06050407
-+      .quad   0x0E0D0C0F0A09080B, 0x0605040702010003
-+      .quad   0x0A09080B06050407, 0x020100030E0D0C0F
-+.Lk_sr:               @ sr
-+      .quad   0x0706050403020100, 0x0F0E0D0C0B0A0908
-+      .quad   0x030E09040F0A0500, 0x0B06010C07020D08
-+      .quad   0x0F060D040B020900, 0x070E050C030A0108
-+      .quad   0x0B0E0104070A0D00, 0x0306090C0F020508
-+
-+@
-+@ "Hot" constants
-+@
-+.Lk_inv:      @ inv, inva
-+      .quad   0x0E05060F0D080180, 0x040703090A0B0C02
-+      .quad   0x01040A060F0B0780, 0x030D0E0C02050809
-+.Lk_ipt:      @ input transform (lo, hi)
-+      .quad   0xC2B2E8985A2A7000, 0xCABAE09052227808
-+      .quad   0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
-+.Lk_sbo:      @ sbou, sbot
-+      .quad   0xD0D26D176FBDC700, 0x15AABF7AC502A878
-+      .quad   0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
-+.Lk_sb1:      @ sb1u, sb1t
-+      .quad   0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
-+      .quad   0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
-+.Lk_sb2:      @ sb2u, sb2t
-+      .quad   0x69EB88400AE12900, 0xC2A163C8AB82234A
-+      .quad   0xE27A93C60B712400, 0x5EB7E955BC982FCD
-+
-+.asciz  "Vector Permutation AES for ARMv7 NEON, Mike Hamburg (Stanford 
University)"
-+.size _vpaes_consts,.-_vpaes_consts
-+.align        6
-+___
-+
-+{
-+my ($inp,$out,$key) = map("r$_", (0..2));
-+
-+my ($invlo,$invhi) = map("q$_", (10..11));
-+my ($sb1u,$sb1t,$sb2u,$sb2t) = map("q$_", (12..15));
-+
-+$code.=<<___;
-+@@
-+@@  _aes_preheat
-+@@
-+@@  Fills q9-q15 as specified below.
-+@@
-+.type _vpaes_preheat,%function
-+.align        4
-+_vpaes_preheat:
-+      adr     r10, .Lk_inv
-+      vmov.i8 q9, #0x0f               @ .Lk_s0F
-+      vld1.64 {q10,q11}, [r10]!       @ .Lk_inv
-+      add     r10, r10, #64           @ Skip .Lk_ipt, .Lk_sbo
-+      vld1.64 {q12,q13}, [r10]!       @ .Lk_sb1
-+      vld1.64 {q14,q15}, [r10]        @ .Lk_sb2
-+      bx      lr
-+
-+@@
-+@@  _aes_encrypt_core
-+@@
-+@@  AES-encrypt q0.
-+@@
-+@@  Inputs:
-+@@     q0 = input
-+@@     q9-q15 as in _vpaes_preheat
-+@@    [$key] = scheduled keys
-+@@
-+@@  Output in q0
-+@@  Clobbers  q1-q5, r8-r11
-+@@  Preserves q6-q8 so you get some local vectors
-+@@
-+@@
-+.type _vpaes_encrypt_core,%function
-+.align 4
-+_vpaes_encrypt_core:
-+      mov     r9, $key
-+      ldr     r8, [$key,#240]         @ pull rounds
-+      adr     r11, .Lk_ipt
-+      @ vmovdqa       .Lk_ipt(%rip),  %xmm2   # iptlo
-+      @ vmovdqa       .Lk_ipt+16(%rip), %xmm3 # ipthi
-+      vld1.64 {q2, q3}, [r11]
-+      adr     r11, .Lk_mc_forward+16
-+      vld1.64 {q5}, [r9]!             @ vmovdqu       (%r9),  %xmm5           
# round0 key
-+      vand    q1, q0, q9              @ vpand %xmm9,  %xmm0,  %xmm1
-+      vshr.u8 q0, q0, #4              @ vpsrlb        \$4,    %xmm0,  %xmm0
-+      vtbl.8  q1#lo, {q2}, q1#lo      @ vpshufb       %xmm1,  %xmm2,  %xmm1
-+      vtbl.8  q1#hi, {q2}, q1#hi
-+      vtbl.8  q2#lo, {q3}, q0#lo      @ vpshufb       %xmm0,  %xmm3,  %xmm2
-+      vtbl.8  q2#hi, {q3}, q0#hi
-+      veor    q0, q1, q5              @ vpxor %xmm5,  %xmm1,  %xmm0
-+      veor    q0, q0, q2              @ vpxor %xmm2,  %xmm0,  %xmm0
-+
-+      @ .Lenc_entry ends with a bnz instruction which is normally paired with
-+      @ subs in .Lenc_loop.
-+      tst     r8, r8
-+      b       .Lenc_entry
-+
-+.align 4
-+.Lenc_loop:
-+      @ middle of middle round
-+      add     r10, r11, #0x40
-+      vtbl.8  q4#lo, {$sb1t}, q2#lo   @ vpshufb       %xmm2,  %xmm13, %xmm4   
# 4 = sb1u
-+      vtbl.8  q4#hi, {$sb1t}, q2#hi
-+      vld1.64 {q1}, [r11]!            @ vmovdqa       -0x40(%r11,%r10), %xmm1 
# .Lk_mc_forward[]
-+      vtbl.8  q0#lo, {$sb1u}, q3#lo   @ vpshufb       %xmm3,  %xmm12, %xmm0   
# 0 = sb1t
-+      vtbl.8  q0#hi, {$sb1u}, q3#hi
-+      veor    q4, q4, q5              @ vpxor         %xmm5,  %xmm4,  %xmm4   
# 4 = sb1u + k
-+      vtbl.8  q5#lo, {$sb2t}, q2#lo   @ vpshufb       %xmm2,  %xmm15, %xmm5   
# 4 = sb2u
-+      vtbl.8  q5#hi, {$sb2t}, q2#hi
-+      veor    q0, q0, q4              @ vpxor         %xmm4,  %xmm0,  %xmm0   
# 0 = A
-+      vtbl.8  q2#lo, {$sb2u}, q3#lo   @ vpshufb       %xmm3,  %xmm14, %xmm2   
# 2 = sb2t
-+      vtbl.8  q2#hi, {$sb2u}, q3#hi
-+      vld1.64 {q4}, [r10]             @ vmovdqa       (%r11,%r10), %xmm4      
# .Lk_mc_backward[]
-+      vtbl.8  q3#lo, {q0}, q1#lo      @ vpshufb       %xmm1,  %xmm0,  %xmm3   
# 0 = B
-+      vtbl.8  q3#hi, {q0}, q1#hi
-+      veor    q2, q2, q5              @ vpxor         %xmm5,  %xmm2,  %xmm2   
# 2 = 2A
-+      @ Write to q5 instead of q0, so the table and destination registers do
-+      @ not overlap.
-+      vtbl.8  q5#lo, {q0}, q4#lo      @ vpshufb       %xmm4,  %xmm0,  %xmm0   
# 3 = D
-+      vtbl.8  q5#hi, {q0}, q4#hi
-+      veor    q3, q3, q2              @ vpxor         %xmm2,  %xmm3,  %xmm3   
# 0 = 2A+B
-+      vtbl.8  q4#lo, {q3}, q1#lo      @ vpshufb       %xmm1,  %xmm3,  %xmm4   
# 0 = 2B+C
-+      vtbl.8  q4#hi, {q3}, q1#hi
-+      @ Here we restore the original q0/q5 usage.
-+      veor    q0, q5, q3              @ vpxor         %xmm3,  %xmm0,  %xmm0   
# 3 = 2A+B+D
-+      and     r11, r11, #~(1<<6)      @ and           \$0x30, %r11            
# ... mod 4
-+      veor    q0, q0, q4              @ vpxor         %xmm4,  %xmm0, %xmm0    
# 0 = 2A+3B+C+D
-+      subs    r8, r8, #1              @ nr--
-+
-+.Lenc_entry:
-+      @ top of round
-+      vand    q1, q0, q9              @ vpand         %xmm0,  %xmm9,  %xmm1   
# 0 = k
-+      vshr.u8 q0, q0, #4              @ vpsrlb        \$4,    %xmm0,  %xmm0   
# 1 = i
-+      vtbl.8  q5#lo, {$invhi}, q1#lo  @ vpshufb       %xmm1,  %xmm11, %xmm5   
# 2 = a/k
-+      vtbl.8  q5#hi, {$invhi}, q1#hi
-+      veor    q1, q1, q0              @ vpxor         %xmm0,  %xmm1,  %xmm1   
# 0 = j
-+      vtbl.8  q3#lo, {$invlo}, q0#lo  @ vpshufb       %xmm0,  %xmm10, %xmm3   
# 3 = 1/i
-+      vtbl.8  q3#hi, {$invlo}, q0#hi
-+      vtbl.8  q4#lo, {$invlo}, q1#lo  @ vpshufb       %xmm1,  %xmm10, %xmm4   
# 4 = 1/j
-+      vtbl.8  q4#hi, {$invlo}, q1#hi
-+      veor    q3, q3, q5              @ vpxor         %xmm5,  %xmm3,  %xmm3   
# 3 = iak = 1/i + a/k
-+      veor    q4, q4, q5              @ vpxor         %xmm5,  %xmm4,  %xmm4   
# 4 = jak = 1/j + a/k
-+      vtbl.8  q2#lo, {$invlo}, q3#lo  @ vpshufb       %xmm3,  %xmm10, %xmm2   
# 2 = 1/iak
-+      vtbl.8  q2#hi, {$invlo}, q3#hi
-+      vtbl.8  q3#lo, {$invlo}, q4#lo  @ vpshufb       %xmm4,  %xmm10, %xmm3   
# 3 = 1/jak
-+      vtbl.8  q3#hi, {$invlo}, q4#hi
-+      veor    q2, q2, q1              @ vpxor         %xmm1,  %xmm2,  %xmm2   
# 2 = io
-+      veor    q3, q3, q0              @ vpxor         %xmm0,  %xmm3,  %xmm3   
# 3 = jo
-+      vld1.64 {q5}, [r9]!             @ vmovdqu       (%r9),  %xmm5
-+      bne     .Lenc_loop
-+
-+      @ middle of last round
-+      add     r10, r11, #0x80
-+
-+      adr     r11, .Lk_sbo
-+      @ Read to q1 instead of q4, so the vtbl.8 instruction below does not
-+      @ overlap table and destination registers.
-+      vld1.64 {q1}, [r11]!            @ vmovdqa       -0x60(%r10), %xmm4      
# 3 : sbou
-+      vld1.64 {q0}, [r11]             @ vmovdqa       -0x50(%r10), %xmm0      
# 0 : sbot      .Lk_sbo+16
-+      vtbl.8  q4#lo, {q1}, q2#lo      @ vpshufb       %xmm2,  %xmm4,  %xmm4   
# 4 = sbou
-+      vtbl.8  q4#hi, {q1}, q2#hi
-+      vld1.64 {q1}, [r10]             @ vmovdqa       0x40(%r11,%r10), %xmm1  
# .Lk_sr[]
-+      @ Write to q2 instead of q0 below, to avoid overlapping table and
-+      @ destination registers.
-+      vtbl.8  q2#lo, {q0}, q3#lo      @ vpshufb       %xmm3,  %xmm0,  %xmm0   
# 0 = sb1t
-+      vtbl.8  q2#hi, {q0}, q3#hi
-+      veor    q4, q4, q5              @ vpxor %xmm5,  %xmm4,  %xmm4   # 4 = 
sb1u + k
-+      veor    q2, q2, q4              @ vpxor %xmm4,  %xmm0,  %xmm0   # 0 = A
-+      @ Here we restore the original q0/q2 usage.
-+      vtbl.8  q0#lo, {q2}, q1#lo      @ vpshufb       %xmm1,  %xmm0,  %xmm0
-+      vtbl.8  q0#hi, {q2}, q1#hi
-+      bx      lr
-+.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
-+
-+.globl        GFp_vpaes_encrypt
-+.type GFp_vpaes_encrypt,%function
-+.align        4
-+GFp_vpaes_encrypt:
-+      @ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack
-+      @ alignment.
-+      stmdb   sp!, {r7-r11,lr}
-+      @ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved.
-+      vstmdb  sp!, {d8-d11}
-+
-+      vld1.64 {q0}, [$inp]
-+      bl      _vpaes_preheat
-+      bl      _vpaes_encrypt_core
-+      vst1.64 {q0}, [$out]
-+
-+      vldmia  sp!, {d8-d11}
-+      ldmia   sp!, {r7-r11, pc}       @ return
-+.size GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
-+___
-+}
-+{
-+my ($inp,$bits,$out,$dir)=("r0","r1","r2","r3");
-+my ($rcon,$s0F,$invlo,$invhi,$s63) = map("q$_",(8..12));
-+
-+$code.=<<___;
-+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-+@@                                                    @@
-+@@                  AES key schedule                  @@
-+@@                                                    @@
-+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-+
-+@ This function diverges from both x86_64 and armv7 in which constants are
-+@ pinned. x86_64 has a common preheat function for all operations. aarch64
-+@ separates them because it has enough registers to pin nearly all constants.
-+@ armv7 does not have enough registers, but needing explicit loads and stores
-+@ also complicates using x86_64's register allocation directly.
-+@
-+@ We pin some constants for convenience and leave q14 and q15 free to load
-+@ others on demand.
-+
-+@
-+@  Key schedule constants
-+@
-+.type _vpaes_key_consts,%object
-+.align        4
-+_vpaes_key_consts:
-+.Lk_rcon:     @ rcon
-+      .quad   0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
-+
-+.Lk_opt:      @ output transform
-+      .quad   0xFF9F4929D6B66000, 0xF7974121DEBE6808
-+      .quad   0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
-+.Lk_deskew:   @ deskew tables: inverts the sbox's "skew"
-+      .quad   0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
-+      .quad   0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
-+.size _vpaes_key_consts,.-_vpaes_key_consts
-+
-+.type _vpaes_key_preheat,%function
-+.align        4
-+_vpaes_key_preheat:
-+      adr     r11, .Lk_rcon
-+      vmov.i8 $s63, #0x5b                     @ .Lk_s63
-+      adr     r10, .Lk_inv                    @ Must be aligned to 8 mod 16.
-+      vmov.i8 $s0F, #0x0f                     @ .Lk_s0F
-+      vld1.64 {$invlo,$invhi}, [r10]          @ .Lk_inv
-+      vld1.64 {$rcon}, [r11]                  @ .Lk_rcon
-+      bx      lr
-+.size _vpaes_key_preheat,.-_vpaes_key_preheat
-+
-+.type _vpaes_schedule_core,%function
-+.align        4
-+_vpaes_schedule_core:
-+      @ We only need to save lr, but ARM requires an 8-byte stack alignment,
-+      @ so save an extra register.
-+      stmdb   sp!, {r3,lr}
-+
-+      bl      _vpaes_key_preheat      @ load the tables
-+
-+      adr     r11, .Lk_ipt            @ Must be aligned to 8 mod 16.
-+      vld1.64 {q0}, [$inp]!           @ vmovdqu       (%rdi), %xmm0           
# load key (unaligned)
-+
-+      @ input transform
-+      @ Use q4 here rather than q3 so .Lschedule_am_decrypting does not
-+      @ overlap table and destination.
-+      vmov    q4, q0                  @ vmovdqa       %xmm0,  %xmm3
-+      bl      _vpaes_schedule_transform
-+      adr     r10, .Lk_sr             @ Must be aligned to 8 mod 16.
-+      vmov    q7, q0                  @ vmovdqa       %xmm0,  %xmm7
-+
-+      add     r8, r8, r10
-+
-+      @ encrypting, output zeroth round key after transform
-+      vst1.64 {q0}, [$out]            @ vmovdqu       %xmm0,  (%rdx)
-+
-+      @ *ring*: Decryption removed.
-+
-+.Lschedule_go:
-+      cmp     $bits, #192             @ cmp   \$192,  %esi
-+      bhi     .Lschedule_256
-+      @ 128: fall though
-+
-+@@
-+@@  .schedule_128
-+@@
-+@@  128-bit specific part of key schedule.
-+@@
-+@@  This schedule is really simple, because all its parts
-+@@  are accomplished by the subroutines.
-+@@
-+.Lschedule_128:
-+      mov     $inp, #10               @ mov   \$10, %esi
-+
-+.Loop_schedule_128:
-+      bl      _vpaes_schedule_round
-+      subs    $inp, $inp, #1          @ dec   %esi
-+      beq     .Lschedule_mangle_last
-+      bl      _vpaes_schedule_mangle  @ write output
-+      b       .Loop_schedule_128
-+
-+@@
-+@@  .aes_schedule_256
-+@@
-+@@  256-bit specific part of key schedule.
-+@@
-+@@  The structure here is very similar to the 128-bit
-+@@  schedule, but with an additional "low side" in
-+@@  q6.  The low side's rounds are the same as the
-+@@  high side's, except no rcon and no rotation.
-+@@
-+.align        4
-+.Lschedule_256:
-+      vld1.64 {q0}, [$inp]                    @ vmovdqu       16(%rdi),%xmm0  
        # load key part 2 (unaligned)
-+      bl      _vpaes_schedule_transform       @ input transform
-+      mov     $inp, #7                        @ mov   \$7, %esi
-+
-+.Loop_schedule_256:
-+      bl      _vpaes_schedule_mangle          @ output low result
-+      vmov    q6, q0                          @ vmovdqa       %xmm0,  %xmm6   
        # save cur_lo in xmm6
-+
-+      @ high round
-+      bl      _vpaes_schedule_round
-+      subs    $inp, $inp, #1                  @ dec   %esi
-+      beq     .Lschedule_mangle_last
-+      bl      _vpaes_schedule_mangle
-+
-+      @ low round. swap xmm7 and xmm6
-+      vdup.32 q0, q0#hi[1]            @ vpshufd       \$0xFF, %xmm0,  %xmm0
-+      vmov.i8 q4, #0
-+      vmov    q5, q7                  @ vmovdqa       %xmm7,  %xmm5
-+      vmov    q7, q6                  @ vmovdqa       %xmm6,  %xmm7
-+      bl      _vpaes_schedule_low_round
-+      vmov    q7, q5                  @ vmovdqa       %xmm5,  %xmm7
-+
-+      b       .Loop_schedule_256
-+
-+@@
-+@@  .aes_schedule_mangle_last
-+@@
-+@@  Mangler for last round of key schedule
-+@@  Mangles q0
-+@@    when encrypting, outputs out(q0) ^ 63
-+@@    when decrypting, outputs unskew(q0)
-+@@
-+@@  Always called right before return... jumps to cleanup and exits
-+@@
-+.align        4
-+.Lschedule_mangle_last:
-+      @ schedule last round key from xmm0
-+      adr     r11, .Lk_deskew                 @ lea   .Lk_deskew(%rip),%r11   
# prepare to deskew
-+
-+      @ encrypting
-+      vld1.64 {q1}, [r8]              @ vmovdqa       (%r8,%r10),%xmm1
-+      adr     r11, .Lk_opt            @ lea           .Lk_opt(%rip),  %r11    
        # prepare to output transform
-+      add     $out, $out, #32         @ add           \$32,   %rdx
-+      vmov    q2, q0
-+      vtbl.8  q0#lo, {q2}, q1#lo      @ vpshufb       %xmm1,  %xmm0,  %xmm0   
        # output permute
-+      vtbl.8  q0#hi, {q2}, q1#hi
-+
-+.Lschedule_mangle_last_dec:
-+      sub     $out, $out, #16                 @ add   \$-16,  %rdx
-+      veor    q0, q0, $s63                    @ vpxor .Lk_s63(%rip),  %xmm0,  
%xmm0
-+      bl      _vpaes_schedule_transform       @ output transform
-+      vst1.64 {q0}, [$out]                    @ vmovdqu       %xmm0,  (%rdx)  
        # save last key
-+
-+      @ cleanup
-+      veor    q0, q0, q0              @ vpxor %xmm0,  %xmm0,  %xmm0
-+      veor    q1, q1, q1              @ vpxor %xmm1,  %xmm1,  %xmm1
-+      veor    q2, q2, q2              @ vpxor %xmm2,  %xmm2,  %xmm2
-+      veor    q3, q3, q3              @ vpxor %xmm3,  %xmm3,  %xmm3
-+      veor    q4, q4, q4              @ vpxor %xmm4,  %xmm4,  %xmm4
-+      veor    q5, q5, q5              @ vpxor %xmm5,  %xmm5,  %xmm5
-+      veor    q6, q6, q6              @ vpxor %xmm6,  %xmm6,  %xmm6
-+      veor    q7, q7, q7              @ vpxor %xmm7,  %xmm7,  %xmm7
-+      ldmia   sp!, {r3,pc}            @ return
-+.size _vpaes_schedule_core,.-_vpaes_schedule_core
-+
-+@@
-+@@  .aes_schedule_round
-+@@
-+@@  Runs one main round of the key schedule on q0, q7
-+@@
-+@@  Specifically, runs subbytes on the high dword of q0
-+@@  then rotates it by one byte and xors into the low dword of
-+@@  q7.
-+@@
-+@@  Adds rcon from low byte of q8, then rotates q8 for
-+@@  next rcon.
-+@@
-+@@  Smears the dwords of q7 by xoring the low into the
-+@@  second low, result into third, result into highest.
-+@@
-+@@  Returns results in q7 = q0.
-+@@  Clobbers q1-q4, r11.
-+@@
-+.type _vpaes_schedule_round,%function
-+.align        4
-+_vpaes_schedule_round:
-+      @ extract rcon from xmm8
-+      vmov.i8 q4, #0                          @ vpxor         %xmm4,  %xmm4,  
%xmm4
-+      vext.8  q1, $rcon, q4, #15              @ vpalignr      \$15,   %xmm8,  
%xmm4,  %xmm1
-+      vext.8  $rcon, $rcon, $rcon, #15        @ vpalignr      \$15,   %xmm8,  
%xmm8,  %xmm8
-+      veor    q7, q7, q1                      @ vpxor         %xmm1,  %xmm7,  
%xmm7
-+
-+      @ rotate
-+      vdup.32 q0, q0#hi[1]                    @ vpshufd       \$0xFF, %xmm0,  
%xmm0
-+      vext.8  q0, q0, q0, #1                  @ vpalignr      \$1,    %xmm0,  
%xmm0,  %xmm0
-+
-+      @ fall through...
-+
-+      @ low round: same as high round, but no rotation and no rcon.
-+_vpaes_schedule_low_round:
-+      @ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12.
-+      @ We pin other values in _vpaes_key_preheat, so load them now.
-+      adr     r11, .Lk_sb1
-+      vld1.64 {q14,q15}, [r11]
-+
-+      @ smear xmm7
-+      vext.8  q1, q4, q7, #12                 @ vpslldq       \$4,    %xmm7,  
%xmm1
-+      veor    q7, q7, q1                      @ vpxor %xmm1,  %xmm7,  %xmm7
-+      vext.8  q4, q4, q7, #8                  @ vpslldq       \$8,    %xmm7,  
%xmm4
-+
-+      @ subbytes
-+      vand    q1, q0, $s0F                    @ vpand         %xmm9,  %xmm0,  
%xmm1           # 0 = k
-+      vshr.u8 q0, q0, #4                      @ vpsrlb        \$4,    %xmm0,  
%xmm0           # 1 = i
-+       veor   q7, q7, q4                      @ vpxor         %xmm4,  %xmm7,  
%xmm7
-+      vtbl.8  q2#lo, {$invhi}, q1#lo          @ vpshufb       %xmm1,  %xmm11, 
%xmm2           # 2 = a/k
-+      vtbl.8  q2#hi, {$invhi}, q1#hi
-+      veor    q1, q1, q0                      @ vpxor         %xmm0,  %xmm1,  
%xmm1           # 0 = j
-+      vtbl.8  q3#lo, {$invlo}, q0#lo          @ vpshufb       %xmm0,  %xmm10, 
%xmm3           # 3 = 1/i
-+      vtbl.8  q3#hi, {$invlo}, q0#hi
-+      veor    q3, q3, q2                      @ vpxor         %xmm2,  %xmm3,  
%xmm3           # 3 = iak = 1/i + a/k
-+      vtbl.8  q4#lo, {$invlo}, q1#lo          @ vpshufb       %xmm1,  %xmm10, 
%xmm4           # 4 = 1/j
-+      vtbl.8  q4#hi, {$invlo}, q1#hi
-+       veor   q7, q7, $s63                    @ vpxor         .Lk_s63(%rip),  
%xmm7,  %xmm7
-+      vtbl.8  q3#lo, {$invlo}, q3#lo          @ vpshufb       %xmm3,  %xmm10, 
%xmm3           # 2 = 1/iak
-+      vtbl.8  q3#hi, {$invlo}, q3#hi
-+      veor    q4, q4, q2                      @ vpxor         %xmm2,  %xmm4,  
%xmm4           # 4 = jak = 1/j + a/k
-+      vtbl.8  q2#lo, {$invlo}, q4#lo          @ vpshufb       %xmm4,  %xmm10, 
%xmm2           # 3 = 1/jak
-+      vtbl.8  q2#hi, {$invlo}, q4#hi
-+      veor    q3, q3, q1                      @ vpxor         %xmm1,  %xmm3,  
%xmm3           # 2 = io
-+      veor    q2, q2, q0                      @ vpxor         %xmm0,  %xmm2,  
%xmm2           # 3 = jo
-+      vtbl.8  q4#lo, {q15}, q3#lo             @ vpshufb       %xmm3,  %xmm13, 
%xmm4           # 4 = sbou
-+      vtbl.8  q4#hi, {q15}, q3#hi
-+      vtbl.8  q1#lo, {q14}, q2#lo             @ vpshufb       %xmm2,  %xmm12, 
%xmm1           # 0 = sb1t
-+      vtbl.8  q1#hi, {q14}, q2#hi
-+      veor    q1, q1, q4                      @ vpxor         %xmm4,  %xmm1,  
%xmm1           # 0 = sbox output
-+
-+      @ add in smeared stuff
-+      veor    q0, q1, q7                      @ vpxor %xmm7,  %xmm1,  %xmm0
-+      veor    q7, q1, q7                      @ vmovdqa       %xmm0,  %xmm7
-+      bx      lr
-+.size _vpaes_schedule_round,.-_vpaes_schedule_round
-+
-+@@
-+@@  .aes_schedule_transform
-+@@
-+@@  Linear-transform q0 according to tables at [r11]
-+@@
-+@@  Requires that q9 = 0x0F0F... as in preheat
-+@@  Output in q0
-+@@  Clobbers q1, q2, q14, q15
-+@@
-+.type _vpaes_schedule_transform,%function
-+.align        4
-+_vpaes_schedule_transform:
-+      vld1.64 {q14,q15}, [r11]        @ vmovdqa       (%r11), %xmm2   # lo
-+                                      @ vmovdqa       16(%r11),       %xmm1 # 
hi
-+      vand    q1, q0, $s0F            @ vpand %xmm9,  %xmm0,  %xmm1
-+      vshr.u8 q0, q0, #4              @ vpsrlb        \$4,    %xmm0,  %xmm0
-+      vtbl.8  q2#lo, {q14}, q1#lo     @ vpshufb       %xmm1,  %xmm2,  %xmm2
-+      vtbl.8  q2#hi, {q14}, q1#hi
-+      vtbl.8  q0#lo, {q15}, q0#lo     @ vpshufb       %xmm0,  %xmm1,  %xmm0
-+      vtbl.8  q0#hi, {q15}, q0#hi
-+      veor    q0, q0, q2              @ vpxor %xmm2,  %xmm0,  %xmm0
-+      bx      lr
-+.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
-+
-+@@
-+@@  .aes_schedule_mangle
-+@@
-+@@  Mangles q0 from (basis-transformed) standard version
-+@@  to our version.
-+@@
-+@@  On encrypt,
-+@@    xor with 0x63
-+@@    multiply by circulant 0,1,1,1
-+@@    apply shiftrows transform
-+@@
-+@@  On decrypt,
-+@@    xor with 0x63
-+@@    multiply by "inverse mixcolumns" circulant E,B,D,9
-+@@    deskew
-+@@    apply shiftrows transform
-+@@
-+@@
-+@@  Writes out to [r2], and increments or decrements it
-+@@  Keeps track of round number mod 4 in r8
-+@@  Preserves q0
-+@@  Clobbers q1-q5
-+@@
-+.type _vpaes_schedule_mangle,%function
-+.align        4
-+_vpaes_schedule_mangle:
-+      tst     $dir, $dir
-+      vmov    q4, q0                  @ vmovdqa       %xmm0,  %xmm4   # save 
xmm0 for later
-+      adr     r11, .Lk_mc_forward     @ Must be aligned to 8 mod 16.
-+      vld1.64 {q5}, [r11]             @ vmovdqa       
.Lk_mc_forward(%rip),%xmm5
-+
-+      @ encrypting
-+      @ Write to q2 so we do not overlap table and destination below.
-+      veor    q2, q0, $s63            @ vpxor         .Lk_s63(%rip),  %xmm0,  
%xmm4
-+      add     $out, $out, #16         @ add           \$16,   %rdx
-+      vtbl.8  q4#lo, {q2}, q5#lo      @ vpshufb       %xmm5,  %xmm4,  %xmm4
-+      vtbl.8  q4#hi, {q2}, q5#hi
-+      vtbl.8  q1#lo, {q4}, q5#lo      @ vpshufb       %xmm5,  %xmm4,  %xmm1
-+      vtbl.8  q1#hi, {q4}, q5#hi
-+      vtbl.8  q3#lo, {q1}, q5#lo      @ vpshufb       %xmm5,  %xmm1,  %xmm3
-+      vtbl.8  q3#hi, {q1}, q5#hi
-+      veor    q4, q4, q1              @ vpxor         %xmm1,  %xmm4,  %xmm4
-+      vld1.64 {q1}, [r8]              @ vmovdqa       (%r8,%r10),     %xmm1
-+      veor    q3, q3, q4              @ vpxor         %xmm4,  %xmm3,  %xmm3
-+
-+.Lschedule_mangle_both:
-+      @ Write to q2 so table and destination do not overlap.
-+      vtbl.8  q2#lo, {q3}, q1#lo      @ vpshufb       %xmm1,  %xmm3,  %xmm3
-+      vtbl.8  q2#hi, {q3}, q1#hi
-+      add     r8, r8, #64-16          @ add   \$-16,  %r8
-+      and     r8, r8, #~(1<<6)        @ and   \$0x30, %r8
-+      vst1.64 {q2}, [$out]            @ vmovdqu       %xmm3,  (%rdx)
-+      bx      lr
-+.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
-+
-+.globl        GFp_vpaes_set_encrypt_key
-+.type GFp_vpaes_set_encrypt_key,%function
-+.align        4
-+GFp_vpaes_set_encrypt_key:
-+      stmdb   sp!, {r7-r11, lr}
-+      vstmdb  sp!, {d8-d15}
-+
-+      lsr     r9, $bits, #5           @ shr   \$5,%eax
-+      add     r9, r9, #5              @ \$5,%eax
-+      str     r9, [$out,#240]         @ mov   %eax,240(%rdx)  # 
AES_KEY->rounds = nbits/32+5;
-+
-+      mov     $dir, #0                @ mov   \$0,%ecx
-+      mov     r8, #0x30               @ mov   \$0x30,%r8d
-+      bl      _vpaes_schedule_core
-+      eor     r0, r0, r0
-+
-+      vldmia  sp!, {d8-d15}
-+      ldmia   sp!, {r7-r11, pc}       @ return
-+.size GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
-+___
-+}
-+
-+{
-+my ($out, $inp) = map("r$_", (0..1));
-+my ($s0F, $s63, $s63_raw, $mc_forward) = map("q$_", (9..12));
-+
-+$code .= <<___;
-+
-+@ Additional constants for converting to bsaes.
-+.type _vpaes_convert_consts,%object
-+.align        4
-+_vpaes_convert_consts:
-+@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear
-+@ transform in the AES S-box. 0x63 is incorporated into the low half of the
-+@ table. This was computed with the following script:
-+@
-+@   def u64s_to_u128(x, y):
-+@       return x | (y << 64)
-+@   def u128_to_u64s(w):
-+@       return w & ((1<<64)-1), w >> 64
-+@   def get_byte(w, i):
-+@       return (w >> (i*8)) & 0xff
-+@   def apply_table(table, b):
-+@       lo = b & 0xf
-+@       hi = b >> 4
-+@       return get_byte(table[0], lo) ^ get_byte(table[1], hi)
-+@   def opt(b):
-+@       table = [
-+@           u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808),
-+@           u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0),
-+@       ]
-+@       return apply_table(table, b)
-+@   def rot_byte(b, n):
-+@       return 0xff & ((b << n) | (b >> (8-n)))
-+@   def skew(x):
-+@       return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^
-+@               rot_byte(x, 4))
-+@   table = [0, 0]
-+@   for i in range(16):
-+@       table[0] |= (skew(opt(i)) ^ 0x63) << (i*8)
-+@       table[1] |= skew(opt(i<<4)) << (i*8)
-+@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[0]))
-+@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[1]))
-+.Lk_opt_then_skew:
-+      .quad   0x9cb8436798bc4763, 0x6440bb9f6044bf9b
-+      .quad   0x1f30062936192f00, 0xb49bad829db284ab
-+
-+@ void GFp_vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes);
-+.globl        GFp_vpaes_encrypt_key_to_bsaes
-+.type GFp_vpaes_encrypt_key_to_bsaes,%function
-+.align        4
-+GFp_vpaes_encrypt_key_to_bsaes:
-+      stmdb   sp!, {r11, lr}
-+
-+      @ See _vpaes_schedule_core for the key schedule logic. In particular,
-+      @ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper),
-+      @ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last
-+      @ contain the transformations not in the bsaes representation. This
-+      @ function inverts those transforms.
-+      @
-+      @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key
-+      @ representation, which does not match the other aes_nohw_*
-+      @ implementations. The ARM aes_nohw_* stores each 32-bit word
-+      @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the
-+      @ cost of extra REV and VREV32 operations in little-endian ARM.
-+
-+      vmov.i8 $s0F, #0x0f             @ Required by _vpaes_schedule_transform
-+      adr     r2, .Lk_mc_forward      @ Must be aligned to 8 mod 16.
-+      add     r3, r2, 0x90            @ .Lk_sr+0x10-.Lk_mc_forward = 0x90 
(Apple's toolchain doesn't support the expression)
-+
-+      vld1.64 {$mc_forward}, [r2]
-+      vmov.i8 $s63, #0x5b             @ .Lk_s63 from vpaes-x86_64
-+      adr     r11, .Lk_opt            @ Must be aligned to 8 mod 16.
-+      vmov.i8 $s63_raw, #0x63         @ .LK_s63 without .Lk_ipt applied
-+
-+      @ vpaes stores one fewer round count than bsaes, but the number of keys
-+      @ is the same.
-+      ldr     r2, [$inp,#240]
-+      add     r2, r2, #1
-+      str     r2, [$out,#240]
-+
-+      @ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt).
-+      @ Invert this with .Lk_opt.
-+      vld1.64 {q0}, [$inp]!
-+      bl      _vpaes_schedule_transform
-+      vrev32.8        q0, q0
-+      vst1.64 {q0}, [$out]!
-+
-+      @ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied,
-+      @ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63,
-+      @ multiplies by the circulant 0,1,1,1, then applies ShiftRows.
-+.Loop_enc_key_to_bsaes:
-+      vld1.64 {q0}, [$inp]!
-+
-+      @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle
-+      @ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30.
-+      @ We use r3 rather than r8 to avoid a callee-saved register.
-+      vld1.64 {q1}, [r3]
-+      vtbl.8  q2#lo, {q0}, q1#lo
-+      vtbl.8  q2#hi, {q0}, q1#hi
-+      add     r3, r3, #16
-+      and     r3, r3, #~(1<<6)
-+      vmov    q0, q2
-+
-+      @ Handle the last key differently.
-+      subs    r2, r2, #1
-+      beq     .Loop_enc_key_to_bsaes_last
-+
-+      @ Multiply by the circulant. This is its own inverse.
-+      vtbl.8  q1#lo, {q0}, $mc_forward#lo
-+      vtbl.8  q1#hi, {q0}, $mc_forward#hi
-+      vmov    q0, q1
-+      vtbl.8  q2#lo, {q1}, $mc_forward#lo
-+      vtbl.8  q2#hi, {q1}, $mc_forward#hi
-+      veor    q0, q0, q2
-+      vtbl.8  q1#lo, {q2}, $mc_forward#lo
-+      vtbl.8  q1#hi, {q2}, $mc_forward#hi
-+      veor    q0, q0, q1
-+
-+      @ XOR and finish.
-+      veor    q0, q0, $s63
-+      bl      _vpaes_schedule_transform
-+      vrev32.8        q0, q0
-+      vst1.64 {q0}, [$out]!
-+      b       .Loop_enc_key_to_bsaes
-+
-+.Loop_enc_key_to_bsaes_last:
-+      @ The final key does not have a basis transform (note
-+      @ .Lschedule_mangle_last inverts the original transform). It only XORs
-+      @ 0x63 and applies ShiftRows. The latter was already inverted in the
-+      @ loop. Note that, because we act on the original representation, we use
-+      @ $s63_raw, not $s63.
-+      veor    q0, q0, $s63_raw
-+      vrev32.8        q0, q0
-+      vst1.64 {q0}, [$out]
-+
-+      @ Wipe registers which contained key material.
-+      veor    q0, q0, q0
-+      veor    q1, q1, q1
-+      veor    q2, q2, q2
-+
-+      ldmia   sp!, {r11, pc}  @ return
-+.size GFp_vpaes_encrypt_key_to_bsaes,.-GFp_vpaes_encrypt_key_to_bsaes
-+___
-+}
-+
-+{
-+# Register-passed parameters.
-+my ($inp, $out, $len, $key) = map("r$_", 0..3);
-+# Temporaries. _vpaes_encrypt_core already uses r8..r11, so overlap $ivec and
-+# $tmp. $ctr is r7 because it must be preserved across calls.
-+my ($ctr, $ivec, $tmp) = map("r$_", 7..9);
-+
-+# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
-+#                                 const AES_KEY *key, const uint8_t ivec[16]);
-+$code .= <<___;
-+.globl        GFp_vpaes_ctr32_encrypt_blocks
-+.type GFp_vpaes_ctr32_encrypt_blocks,%function
-+.align        4
-+GFp_vpaes_ctr32_encrypt_blocks:
-+      mov     ip, sp
-+      stmdb   sp!, {r7-r11, lr}
-+      @ This function uses q4-q7 (d8-d15), which are callee-saved.
-+      vstmdb  sp!, {d8-d15}
-+
-+      cmp     $len, #0
-+      @ $ivec is passed on the stack.
-+      ldr     $ivec, [ip]
-+      beq     .Lctr32_done
-+
-+      @ _vpaes_encrypt_core expects the key in r2, so swap $len and $key.
-+      mov     $tmp, $key
-+      mov     $key, $len
-+      mov     $len, $tmp
-+___
-+my ($len, $key) = ($key, $len);
-+$code .= <<___;
-+
-+      @ Load the IV and counter portion.
-+      ldr     $ctr, [$ivec, #12]
-+      vld1.8  {q7}, [$ivec]
-+
-+      bl      _vpaes_preheat
-+      rev     $ctr, $ctr              @ The counter is big-endian.
-+
-+.Lctr32_loop:
-+      vmov    q0, q7
-+      vld1.8  {q6}, [$inp]!           @ Load input ahead of time
-+      bl      _vpaes_encrypt_core
-+      veor    q0, q0, q6              @ XOR input and result
-+      vst1.8  {q0}, [$out]!
-+      subs    $len, $len, #1
-+      @ Update the counter.
-+      add     $ctr, $ctr, #1
-+      rev     $tmp, $ctr
-+      vmov.32 q7#hi[1], $tmp
-+      bne     .Lctr32_loop
-+
-+.Lctr32_done:
-+      vldmia  sp!, {d8-d15}
-+      ldmia   sp!, {r7-r11, pc}       @ return
-+.size GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
-+___
-+}
-+
-+foreach (split("\n",$code)) {
-+      s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
-+      print $_,"\n";
-+}
-+
-+close STDOUT;
-diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv8.pl 
b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
-new file mode 100755
-index 0000000..b31bbb8
---- /dev/null
-+++ b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
-@@ -0,0 +1,837 @@
-+#! /usr/bin/env perl
-+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+
-+######################################################################
-+## Constant-time SSSE3 AES core implementation.
-+## version 0.1
-+##
-+## By Mike Hamburg (Stanford University), 2009
-+## Public domain.
-+##
-+## For details see http://shiftleft.org/papers/vector_aes/ and
-+## http://crypto.stanford.edu/vpaes/.
-+##
-+######################################################################
-+# ARMv8 NEON adaptation by <appro@openssl.org>
-+#
-+# Reason for undertaken effort is that there is at least one popular
-+# SoC based on Cortex-A53 that doesn't have crypto extensions.
-+#
-+#                   CBC enc     ECB enc/dec(*)   [bit-sliced enc/dec]
-+# Cortex-A53        21.5        18.1/20.6        [17.5/19.8         ]
-+# Cortex-A57        36.0(**)    20.4/24.9(**)    [14.4/16.6         ]
-+# X-Gene            45.9(**)    45.8/57.7(**)    [33.1/37.6(**)     ]
-+# Denver(***)       16.6(**)    15.1/17.8(**)    [8.80/9.93         ]
-+# Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
-+# Mongoose(***)     26.3(**)    21.0/25.0(**)    [13.3/16.8         ]
-+#
-+# (*) ECB denotes approximate result for parallelizable modes
-+#     such as CBC decrypt, CTR, etc.;
-+# (**)        these results are worse than scalar compiler-generated
-+#     code, but it's constant-time and therefore preferred;
-+# (***)       presented for reference/comparison purposes;
-+
-+$flavour = shift;
-+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+die "can't locate arm-xlate.pl";
-+
-+open OUT,"| \"$^X\" $xlate $flavour $output";
-+*STDOUT=*OUT;
-+
-+$code.=<<___;
-+#include <GFp/arm_arch.h>
-+
-+.section      .rodata
-+
-+.type _vpaes_consts,%object
-+.align        7       // totally strategic alignment
-+_vpaes_consts:
-+.Lk_mc_forward:       // mc_forward
-+      .quad   0x0407060500030201, 0x0C0F0E0D080B0A09
-+      .quad   0x080B0A0904070605, 0x000302010C0F0E0D
-+      .quad   0x0C0F0E0D080B0A09, 0x0407060500030201
-+      .quad   0x000302010C0F0E0D, 0x080B0A0904070605
-+.Lk_mc_backward:// mc_backward
-+      .quad   0x0605040702010003, 0x0E0D0C0F0A09080B
-+      .quad   0x020100030E0D0C0F, 0x0A09080B06050407
-+      .quad   0x0E0D0C0F0A09080B, 0x0605040702010003
-+      .quad   0x0A09080B06050407, 0x020100030E0D0C0F
-+.Lk_sr:               // sr
-+      .quad   0x0706050403020100, 0x0F0E0D0C0B0A0908
-+      .quad   0x030E09040F0A0500, 0x0B06010C07020D08
-+      .quad   0x0F060D040B020900, 0x070E050C030A0108
-+      .quad   0x0B0E0104070A0D00, 0x0306090C0F020508
-+
-+//
-+// "Hot" constants
-+//
-+.Lk_inv:      // inv, inva
-+      .quad   0x0E05060F0D080180, 0x040703090A0B0C02
-+      .quad   0x01040A060F0B0780, 0x030D0E0C02050809
-+.Lk_ipt:      // input transform (lo, hi)
-+      .quad   0xC2B2E8985A2A7000, 0xCABAE09052227808
-+      .quad   0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
-+.Lk_sbo:      // sbou, sbot
-+      .quad   0xD0D26D176FBDC700, 0x15AABF7AC502A878
-+      .quad   0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
-+.Lk_sb1:      // sb1u, sb1t
-+      .quad   0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
-+      .quad   0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
-+.Lk_sb2:      // sb2u, sb2t
-+      .quad   0x69EB88400AE12900, 0xC2A163C8AB82234A
-+      .quad   0xE27A93C60B712400, 0x5EB7E955BC982FCD
-+
-+//
-+//  Key schedule constants
-+//
-+.Lk_dksd:     // decryption key schedule: invskew x*D
-+      .quad   0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
-+      .quad   0x41C277F4B5368300, 0x5FDC69EAAB289D1E
-+.Lk_dksb:     // decryption key schedule: invskew x*B
-+      .quad   0x9A4FCA1F8550D500, 0x03D653861CC94C99
-+      .quad   0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
-+.Lk_dkse:     // decryption key schedule: invskew x*E + 0x63
-+      .quad   0xD5031CCA1FC9D600, 0x53859A4C994F5086
-+      .quad   0xA23196054FDC7BE8, 0xCD5EF96A20B31487
-+.Lk_dks9:     // decryption key schedule: invskew x*9
-+      .quad   0xB6116FC87ED9A700, 0x4AED933482255BFC
-+      .quad   0x4576516227143300, 0x8BB89FACE9DAFDCE
-+
-+.Lk_rcon:     // rcon
-+      .quad   0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
-+
-+.Lk_opt:      // output transform
-+      .quad   0xFF9F4929D6B66000, 0xF7974121DEBE6808
-+      .quad   0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
-+.Lk_deskew:   // deskew tables: inverts the sbox's "skew"
-+      .quad   0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
-+      .quad   0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
-+
-+.asciz  "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
-+.size _vpaes_consts,.-_vpaes_consts
-+.align        6
-+
-+.text
-+___
-+
-+{
-+my ($inp,$out,$key) = map("x$_",(0..2));
-+
-+my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_.16b",(18..23));
-+my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_.16b",(24..27));
-+my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_.16b",(24..31));
-+
-+$code.=<<___;
-+##
-+##  _aes_preheat
-+##
-+##  Fills register %r10 -> .aes_consts (so you can -fPIC)
-+##  and %xmm9-%xmm15 as specified below.
-+##
-+.type _vpaes_encrypt_preheat,%function
-+.align        4
-+_vpaes_encrypt_preheat:
-+      adrp    x10, :pg_hi21:.Lk_inv
-+      add     x10, x10, :lo12:.Lk_inv
-+      movi    v17.16b, #0x0f
-+      ld1     {v18.2d-v19.2d}, [x10],#32      // .Lk_inv
-+      ld1     {v20.2d-v23.2d}, [x10],#64      // .Lk_ipt, .Lk_sbo
-+      ld1     {v24.2d-v27.2d}, [x10]          // .Lk_sb1, .Lk_sb2
-+      ret
-+.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat
-+
-+##
-+##  _aes_encrypt_core
-+##
-+##  AES-encrypt %xmm0.
-+##
-+##  Inputs:
-+##     %xmm0 = input
-+##     %xmm9-%xmm15 as in _vpaes_preheat
-+##    (%rdx) = scheduled keys
-+##
-+##  Output in %xmm0
-+##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
-+##  Preserves %xmm6 - %xmm8 so you get some local vectors
-+##
-+##
-+.type _vpaes_encrypt_core,%function
-+.align 4
-+_vpaes_encrypt_core:
-+      mov     x9, $key
-+      ldr     w8, [$key,#240]                 // pull rounds
-+      adrp    x11, :pg_hi21:.Lk_mc_forward+16
-+      add     x11, x11, :lo12:.Lk_mc_forward+16
-+                                              // vmovdqa      .Lk_ipt(%rip),  
%xmm2   # iptlo
-+      ld1     {v16.2d}, [x9], #16             // vmovdqu      (%r9),  %xmm5   
        # round0 key
-+      and     v1.16b, v7.16b, v17.16b         // vpand        %xmm9,  %xmm0,  
%xmm1
-+      ushr    v0.16b, v7.16b, #4              // vpsrlb       \$4,    %xmm0,  
%xmm0
-+      tbl     v1.16b, {$iptlo}, v1.16b        // vpshufb      %xmm1,  %xmm2,  
%xmm1
-+                                              // vmovdqa      
.Lk_ipt+16(%rip), %xmm3 # ipthi
-+      tbl     v2.16b, {$ipthi}, v0.16b        // vpshufb      %xmm0,  %xmm3,  
%xmm2
-+      eor     v0.16b, v1.16b, v16.16b         // vpxor        %xmm5,  %xmm1,  
%xmm0
-+      eor     v0.16b, v0.16b, v2.16b          // vpxor        %xmm2,  %xmm0,  
%xmm0
-+      b       .Lenc_entry
-+
-+.align 4
-+.Lenc_loop:
-+      // middle of middle round
-+      add     x10, x11, #0x40
-+      tbl     v4.16b, {$sb1t}, v2.16b         // vpshufb      %xmm2,  %xmm13, 
%xmm4   # 4 = sb1u
-+      ld1     {v1.2d}, [x11], #16             // vmovdqa      
-0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
-+      tbl     v0.16b, {$sb1u}, v3.16b         // vpshufb      %xmm3,  %xmm12, 
%xmm0   # 0 = sb1t
-+      eor     v4.16b, v4.16b, v16.16b         // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = sb1u + k
-+      tbl     v5.16b, {$sb2t}, v2.16b         // vpshufb      %xmm2,  %xmm15, 
%xmm5   # 4 = sb2u
-+      eor     v0.16b, v0.16b, v4.16b          // vpxor        %xmm4,  %xmm0,  
%xmm0   # 0 = A
-+      tbl     v2.16b, {$sb2u}, v3.16b         // vpshufb      %xmm3,  %xmm14, 
%xmm2   # 2 = sb2t
-+      ld1     {v4.2d}, [x10]                  // vmovdqa      (%r11,%r10), 
%xmm4      # .Lk_mc_backward[]
-+      tbl     v3.16b, {v0.16b}, v1.16b        // vpshufb      %xmm1,  %xmm0,  
%xmm3   # 0 = B
-+      eor     v2.16b, v2.16b, v5.16b          // vpxor        %xmm5,  %xmm2,  
%xmm2   # 2 = 2A
-+      tbl     v0.16b, {v0.16b}, v4.16b        // vpshufb      %xmm4,  %xmm0,  
%xmm0   # 3 = D
-+      eor     v3.16b, v3.16b, v2.16b          // vpxor        %xmm2,  %xmm3,  
%xmm3   # 0 = 2A+B
-+      tbl     v4.16b, {v3.16b}, v1.16b        // vpshufb      %xmm1,  %xmm3,  
%xmm4   # 0 = 2B+C
-+      eor     v0.16b, v0.16b, v3.16b          // vpxor        %xmm3,  %xmm0,  
%xmm0   # 3 = 2A+B+D
-+      and     x11, x11, #~(1<<6)              // and          \$0x30, %r11    
        # ... mod 4
-+      eor     v0.16b, v0.16b, v4.16b          // vpxor        %xmm4,  %xmm0, 
%xmm0    # 0 = 2A+3B+C+D
-+      sub     w8, w8, #1                      // nr--
-+
-+.Lenc_entry:
-+      // top of round
-+      and     v1.16b, v0.16b, v17.16b         // vpand        %xmm0,  %xmm9,  
%xmm1   # 0 = k
-+      ushr    v0.16b, v0.16b, #4              // vpsrlb       \$4,    %xmm0,  
%xmm0   # 1 = i
-+      tbl     v5.16b, {$invhi}, v1.16b        // vpshufb      %xmm1,  %xmm11, 
%xmm5   # 2 = a/k
-+      eor     v1.16b, v1.16b, v0.16b          // vpxor        %xmm0,  %xmm1,  
%xmm1   # 0 = j
-+      tbl     v3.16b, {$invlo}, v0.16b        // vpshufb      %xmm0,  %xmm10, 
%xmm3   # 3 = 1/i
-+      tbl     v4.16b, {$invlo}, v1.16b        // vpshufb      %xmm1,  %xmm10, 
%xmm4   # 4 = 1/j
-+      eor     v3.16b, v3.16b, v5.16b          // vpxor        %xmm5,  %xmm3,  
%xmm3   # 3 = iak = 1/i + a/k
-+      eor     v4.16b, v4.16b, v5.16b          // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = jak = 1/j + a/k
-+      tbl     v2.16b, {$invlo}, v3.16b        // vpshufb      %xmm3,  %xmm10, 
%xmm2   # 2 = 1/iak
-+      tbl     v3.16b, {$invlo}, v4.16b        // vpshufb      %xmm4,  %xmm10, 
%xmm3   # 3 = 1/jak
-+      eor     v2.16b, v2.16b, v1.16b          // vpxor        %xmm1,  %xmm2,  
%xmm2   # 2 = io
-+      eor     v3.16b, v3.16b, v0.16b          // vpxor        %xmm0,  %xmm3,  
%xmm3   # 3 = jo
-+      ld1     {v16.2d}, [x9],#16              // vmovdqu      (%r9),  %xmm5
-+      cbnz    w8, .Lenc_loop
-+
-+      // middle of last round
-+      add     x10, x11, #0x80
-+                                              // vmovdqa      -0x60(%r10), 
%xmm4      # 3 : sbou      .Lk_sbo
-+                                              // vmovdqa      -0x50(%r10), 
%xmm0      # 0 : sbot      .Lk_sbo+16
-+      tbl     v4.16b, {$sbou}, v2.16b         // vpshufb      %xmm2,  %xmm4,  
%xmm4   # 4 = sbou
-+      ld1     {v1.2d}, [x10]                  // vmovdqa      
0x40(%r11,%r10), %xmm1  # .Lk_sr[]
-+      tbl     v0.16b, {$sbot}, v3.16b         // vpshufb      %xmm3,  %xmm0,  
%xmm0   # 0 = sb1t
-+      eor     v4.16b, v4.16b, v16.16b         // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = sb1u + k
-+      eor     v0.16b, v0.16b, v4.16b          // vpxor        %xmm4,  %xmm0,  
%xmm0   # 0 = A
-+      tbl     v0.16b, {v0.16b}, v1.16b        // vpshufb      %xmm1,  %xmm0,  
%xmm0
-+      ret
-+.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
-+
-+.globl        GFp_vpaes_encrypt
-+.type GFp_vpaes_encrypt,%function
-+.align        4
-+GFp_vpaes_encrypt:
-+      AARCH64_SIGN_LINK_REGISTER
-+      stp     x29,x30,[sp,#-16]!
-+      add     x29,sp,#0
-+
-+      ld1     {v7.16b}, [$inp]
-+      bl      _vpaes_encrypt_preheat
-+      bl      _vpaes_encrypt_core
-+      st1     {v0.16b}, [$out]
-+
-+      ldp     x29,x30,[sp],#16
-+      AARCH64_VALIDATE_LINK_REGISTER
-+      ret
-+.size GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
-+
-+.type _vpaes_encrypt_2x,%function
-+.align 4
-+_vpaes_encrypt_2x:
-+      mov     x9, $key
-+      ldr     w8, [$key,#240]                 // pull rounds
-+      adrp    x11, :pg_hi21:.Lk_mc_forward+16
-+      add     x11, x11, :lo12:.Lk_mc_forward+16
-+                                              // vmovdqa      .Lk_ipt(%rip),  
%xmm2   # iptlo
-+      ld1     {v16.2d}, [x9], #16             // vmovdqu      (%r9),  %xmm5   
        # round0 key
-+      and     v1.16b,  v14.16b,  v17.16b      // vpand        %xmm9,  %xmm0,  
%xmm1
-+      ushr    v0.16b,  v14.16b,  #4           // vpsrlb       \$4,    %xmm0,  
%xmm0
-+       and    v9.16b,  v15.16b,  v17.16b
-+       ushr   v8.16b,  v15.16b,  #4
-+      tbl     v1.16b,  {$iptlo}, v1.16b       // vpshufb      %xmm1,  %xmm2,  
%xmm1
-+       tbl    v9.16b,  {$iptlo}, v9.16b
-+                                              // vmovdqa      
.Lk_ipt+16(%rip), %xmm3 # ipthi
-+      tbl     v2.16b,  {$ipthi}, v0.16b       // vpshufb      %xmm0,  %xmm3,  
%xmm2
-+       tbl    v10.16b, {$ipthi}, v8.16b
-+      eor     v0.16b,  v1.16b,   v16.16b      // vpxor        %xmm5,  %xmm1,  
%xmm0
-+       eor    v8.16b,  v9.16b,   v16.16b
-+      eor     v0.16b,  v0.16b,   v2.16b       // vpxor        %xmm2,  %xmm0,  
%xmm0
-+       eor    v8.16b,  v8.16b,   v10.16b
-+      b       .Lenc_2x_entry
-+
-+.align 4
-+.Lenc_2x_loop:
-+      // middle of middle round
-+      add     x10, x11, #0x40
-+      tbl     v4.16b,  {$sb1t}, v2.16b        // vpshufb      %xmm2,  %xmm13, 
%xmm4   # 4 = sb1u
-+       tbl    v12.16b, {$sb1t}, v10.16b
-+      ld1     {v1.2d}, [x11], #16             // vmovdqa      
-0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
-+      tbl     v0.16b,  {$sb1u}, v3.16b        // vpshufb      %xmm3,  %xmm12, 
%xmm0   # 0 = sb1t
-+       tbl    v8.16b,  {$sb1u}, v11.16b
-+      eor     v4.16b,  v4.16b,  v16.16b       // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = sb1u + k
-+       eor    v12.16b, v12.16b, v16.16b
-+      tbl     v5.16b,  {$sb2t}, v2.16b        // vpshufb      %xmm2,  %xmm15, 
%xmm5   # 4 = sb2u
-+       tbl    v13.16b, {$sb2t}, v10.16b
-+      eor     v0.16b,  v0.16b,  v4.16b        // vpxor        %xmm4,  %xmm0,  
%xmm0   # 0 = A
-+       eor    v8.16b,  v8.16b,  v12.16b
-+      tbl     v2.16b,  {$sb2u}, v3.16b        // vpshufb      %xmm3,  %xmm14, 
%xmm2   # 2 = sb2t
-+       tbl    v10.16b, {$sb2u}, v11.16b
-+      ld1     {v4.2d}, [x10]                  // vmovdqa      (%r11,%r10), 
%xmm4      # .Lk_mc_backward[]
-+      tbl     v3.16b,  {v0.16b}, v1.16b       // vpshufb      %xmm1,  %xmm0,  
%xmm3   # 0 = B
-+       tbl    v11.16b, {v8.16b}, v1.16b
-+      eor     v2.16b,  v2.16b,  v5.16b        // vpxor        %xmm5,  %xmm2,  
%xmm2   # 2 = 2A
-+       eor    v10.16b, v10.16b, v13.16b
-+      tbl     v0.16b,  {v0.16b}, v4.16b       // vpshufb      %xmm4,  %xmm0,  
%xmm0   # 3 = D
-+       tbl    v8.16b,  {v8.16b}, v4.16b
-+      eor     v3.16b,  v3.16b,  v2.16b        // vpxor        %xmm2,  %xmm3,  
%xmm3   # 0 = 2A+B
-+       eor    v11.16b, v11.16b, v10.16b
-+      tbl     v4.16b,  {v3.16b}, v1.16b       // vpshufb      %xmm1,  %xmm3,  
%xmm4   # 0 = 2B+C
-+       tbl    v12.16b, {v11.16b},v1.16b
-+      eor     v0.16b,  v0.16b,  v3.16b        // vpxor        %xmm3,  %xmm0,  
%xmm0   # 3 = 2A+B+D
-+       eor    v8.16b,  v8.16b,  v11.16b
-+      and     x11, x11, #~(1<<6)              // and          \$0x30, %r11    
        # ... mod 4
-+      eor     v0.16b,  v0.16b,  v4.16b        // vpxor        %xmm4,  %xmm0, 
%xmm0    # 0 = 2A+3B+C+D
-+       eor    v8.16b,  v8.16b,  v12.16b
-+      sub     w8, w8, #1                      // nr--
-+
-+.Lenc_2x_entry:
-+      // top of round
-+      and     v1.16b,  v0.16b, v17.16b        // vpand        %xmm0,  %xmm9,  
%xmm1   # 0 = k
-+      ushr    v0.16b,  v0.16b, #4             // vpsrlb       \$4,    %xmm0,  
%xmm0   # 1 = i
-+       and    v9.16b,  v8.16b, v17.16b
-+       ushr   v8.16b,  v8.16b, #4
-+      tbl     v5.16b,  {$invhi},v1.16b        // vpshufb      %xmm1,  %xmm11, 
%xmm5   # 2 = a/k
-+       tbl    v13.16b, {$invhi},v9.16b
-+      eor     v1.16b,  v1.16b,  v0.16b        // vpxor        %xmm0,  %xmm1,  
%xmm1   # 0 = j
-+       eor    v9.16b,  v9.16b,  v8.16b
-+      tbl     v3.16b,  {$invlo},v0.16b        // vpshufb      %xmm0,  %xmm10, 
%xmm3   # 3 = 1/i
-+       tbl    v11.16b, {$invlo},v8.16b
-+      tbl     v4.16b,  {$invlo},v1.16b        // vpshufb      %xmm1,  %xmm10, 
%xmm4   # 4 = 1/j
-+       tbl    v12.16b, {$invlo},v9.16b
-+      eor     v3.16b,  v3.16b,  v5.16b        // vpxor        %xmm5,  %xmm3,  
%xmm3   # 3 = iak = 1/i + a/k
-+       eor    v11.16b, v11.16b, v13.16b
-+      eor     v4.16b,  v4.16b,  v5.16b        // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = jak = 1/j + a/k
-+       eor    v12.16b, v12.16b, v13.16b
-+      tbl     v2.16b,  {$invlo},v3.16b        // vpshufb      %xmm3,  %xmm10, 
%xmm2   # 2 = 1/iak
-+       tbl    v10.16b, {$invlo},v11.16b
-+      tbl     v3.16b,  {$invlo},v4.16b        // vpshufb      %xmm4,  %xmm10, 
%xmm3   # 3 = 1/jak
-+       tbl    v11.16b, {$invlo},v12.16b
-+      eor     v2.16b,  v2.16b,  v1.16b        // vpxor        %xmm1,  %xmm2,  
%xmm2   # 2 = io
-+       eor    v10.16b, v10.16b, v9.16b
-+      eor     v3.16b,  v3.16b,  v0.16b        // vpxor        %xmm0,  %xmm3,  
%xmm3   # 3 = jo
-+       eor    v11.16b, v11.16b, v8.16b
-+      ld1     {v16.2d}, [x9],#16              // vmovdqu      (%r9),  %xmm5
-+      cbnz    w8, .Lenc_2x_loop
-+
-+      // middle of last round
-+      add     x10, x11, #0x80
-+                                              // vmovdqa      -0x60(%r10), 
%xmm4      # 3 : sbou      .Lk_sbo
-+                                              // vmovdqa      -0x50(%r10), 
%xmm0      # 0 : sbot      .Lk_sbo+16
-+      tbl     v4.16b,  {$sbou}, v2.16b        // vpshufb      %xmm2,  %xmm4,  
%xmm4   # 4 = sbou
-+       tbl    v12.16b, {$sbou}, v10.16b
-+      ld1     {v1.2d}, [x10]                  // vmovdqa      
0x40(%r11,%r10), %xmm1  # .Lk_sr[]
-+      tbl     v0.16b,  {$sbot}, v3.16b        // vpshufb      %xmm3,  %xmm0,  
%xmm0   # 0 = sb1t
-+       tbl    v8.16b,  {$sbot}, v11.16b
-+      eor     v4.16b,  v4.16b,  v16.16b       // vpxor        %xmm5,  %xmm4,  
%xmm4   # 4 = sb1u + k
-+       eor    v12.16b, v12.16b, v16.16b
-+      eor     v0.16b,  v0.16b,  v4.16b        // vpxor        %xmm4,  %xmm0,  
%xmm0   # 0 = A
-+       eor    v8.16b,  v8.16b,  v12.16b
-+      tbl     v0.16b,  {v0.16b},v1.16b        // vpshufb      %xmm1,  %xmm0,  
%xmm0
-+       tbl    v1.16b,  {v8.16b},v1.16b
-+      ret
-+.size _vpaes_encrypt_2x,.-_vpaes_encrypt_2x
-+___
-+}
-+{
-+my ($inp,$bits,$out,$dir)=("x0","w1","x2","w3");
-+my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_.16b",(18..21,8));
-+
-+$code.=<<___;
-+########################################################
-+##                                                    ##
-+##                  AES key schedule                  ##
-+##                                                    ##
-+########################################################
-+.type _vpaes_key_preheat,%function
-+.align        4
-+_vpaes_key_preheat:
-+      adrp    x10, :pg_hi21:.Lk_inv
-+      add     x10, x10, :lo12:.Lk_inv
-+      movi    v16.16b, #0x5b                  // .Lk_s63
-+      adrp    x11, :pg_hi21:.Lk_sb1
-+      add     x11, x11, :lo12:.Lk_sb1
-+      movi    v17.16b, #0x0f                  // .Lk_s0F
-+      ld1     {v18.2d-v21.2d}, [x10]          // .Lk_inv, .Lk_ipt
-+      adrp    x10, :pg_hi21:.Lk_dksd
-+      add     x10, x10, :lo12:.Lk_dksd
-+      ld1     {v22.2d-v23.2d}, [x11]          // .Lk_sb1
-+      adrp    x11, :pg_hi21:.Lk_mc_forward
-+      add     x11, x11, :lo12:.Lk_mc_forward
-+      ld1     {v24.2d-v27.2d}, [x10],#64      // .Lk_dksd, .Lk_dksb
-+      ld1     {v28.2d-v31.2d}, [x10],#64      // .Lk_dkse, .Lk_dks9
-+      ld1     {v8.2d}, [x10]                  // .Lk_rcon
-+      ld1     {v9.2d}, [x11]                  // .Lk_mc_forward[0]
-+      ret
-+.size _vpaes_key_preheat,.-_vpaes_key_preheat
-+
-+.type _vpaes_schedule_core,%function
-+.align        4
-+_vpaes_schedule_core:
-+      AARCH64_SIGN_LINK_REGISTER
-+      stp     x29, x30, [sp,#-16]!
-+      add     x29,sp,#0
-+
-+      bl      _vpaes_key_preheat              // load the tables
-+
-+      ld1     {v0.16b}, [$inp],#16            // vmovdqu      (%rdi), %xmm0   
        # load key (unaligned)
-+
-+      // input transform
-+      mov     v3.16b, v0.16b                  // vmovdqa      %xmm0,  %xmm3
-+      bl      _vpaes_schedule_transform
-+      mov     v7.16b, v0.16b                  // vmovdqa      %xmm0,  %xmm7
-+
-+      adrp    x10, :pg_hi21:.Lk_sr            // lea  .Lk_sr(%rip),%r10
-+      add     x10, x10, :lo12:.Lk_sr
-+
-+      add     x8, x8, x10
-+
-+      // encrypting, output zeroth round key after transform
-+      st1     {v0.2d}, [$out]                 // vmovdqu      %xmm0,  (%rdx)
-+
-+      cmp     $bits, #192                     // cmp  \$192,  %esi
-+      b.hi    .Lschedule_256
-+      b.eq    .Lschedule_192
-+      // 128: fall though
-+
-+##
-+##  .schedule_128
-+##
-+##  128-bit specific part of key schedule.
-+##
-+##  This schedule is really simple, because all its parts
-+##  are accomplished by the subroutines.
-+##
-+.Lschedule_128:
-+      mov     $inp, #10                       // mov  \$10, %esi
-+
-+.Loop_schedule_128:
-+      sub     $inp, $inp, #1                  // dec  %esi
-+      bl      _vpaes_schedule_round
-+      cbz     $inp, .Lschedule_mangle_last
-+      bl      _vpaes_schedule_mangle          // write output
-+      b       .Loop_schedule_128
-+
-+##
-+##  .aes_schedule_192
-+##
-+##  192-bit specific part of key schedule.
-+##
-+##  The main body of this schedule is the same as the 128-bit
-+##  schedule, but with more smearing.  The long, high side is
-+##  stored in %xmm7 as before, and the short, low side is in
-+##  the high bits of %xmm6.
-+##
-+##  This schedule is somewhat nastier, however, because each
-+##  round produces 192 bits of key material, or 1.5 round keys.
-+##  Therefore, on each cycle we do 2 rounds and produce 3 round
-+##  keys.
-+##
-+.align        4
-+.Lschedule_192:
-+      sub     $inp, $inp, #8
-+      ld1     {v0.16b}, [$inp]                // vmovdqu      8(%rdi),%xmm0   
        # load key part 2 (very unaligned)
-+      bl      _vpaes_schedule_transform       // input transform
-+      mov     v6.16b, v0.16b                  // vmovdqa      %xmm0,  %xmm6   
        # save short part
-+      eor     v4.16b, v4.16b, v4.16b          // vpxor        %xmm4,  %xmm4, 
%xmm4    # clear 4
-+      ins     v6.d[0], v4.d[0]                // vmovhlps     %xmm4,  %xmm6,  
%xmm6           # clobber low side with zeros
-+      mov     $inp, #4                        // mov  \$4,    %esi
-+
-+.Loop_schedule_192:
-+      sub     $inp, $inp, #1                  // dec  %esi
-+      bl      _vpaes_schedule_round
-+      ext     v0.16b, v6.16b, v0.16b, #8      // vpalignr     
\$8,%xmm6,%xmm0,%xmm0
-+      bl      _vpaes_schedule_mangle          // save key n
-+      bl      _vpaes_schedule_192_smear
-+      bl      _vpaes_schedule_mangle          // save key n+1
-+      bl      _vpaes_schedule_round
-+      cbz     $inp, .Lschedule_mangle_last
-+      bl      _vpaes_schedule_mangle          // save key n+2
-+      bl      _vpaes_schedule_192_smear
-+      b       .Loop_schedule_192
-+
-+##
-+##  .aes_schedule_256
-+##
-+##  256-bit specific part of key schedule.
-+##
-+##  The structure here is very similar to the 128-bit
-+##  schedule, but with an additional "low side" in
-+##  %xmm6.  The low side's rounds are the same as the
-+##  high side's, except no rcon and no rotation.
-+##
-+.align        4
-+.Lschedule_256:
-+      ld1     {v0.16b}, [$inp]                // vmovdqu      16(%rdi),%xmm0  
        # load key part 2 (unaligned)
-+      bl      _vpaes_schedule_transform       // input transform
-+      mov     $inp, #7                        // mov  \$7, %esi
-+
-+.Loop_schedule_256:
-+      sub     $inp, $inp, #1                  // dec  %esi
-+      bl      _vpaes_schedule_mangle          // output low result
-+      mov     v6.16b, v0.16b                  // vmovdqa      %xmm0,  %xmm6   
        # save cur_lo in xmm6
-+
-+      // high round
-+      bl      _vpaes_schedule_round
-+      cbz     $inp, .Lschedule_mangle_last
-+      bl      _vpaes_schedule_mangle
-+
-+      // low round. swap xmm7 and xmm6
-+      dup     v0.4s, v0.s[3]                  // vpshufd      \$0xFF, %xmm0,  
%xmm0
-+      movi    v4.16b, #0
-+      mov     v5.16b, v7.16b                  // vmovdqa      %xmm7,  %xmm5
-+      mov     v7.16b, v6.16b                  // vmovdqa      %xmm6,  %xmm7
-+      bl      _vpaes_schedule_low_round
-+      mov     v7.16b, v5.16b                  // vmovdqa      %xmm5,  %xmm7
-+
-+      b       .Loop_schedule_256
-+
-+##
-+##  .aes_schedule_mangle_last
-+##
-+##  Mangler for last round of key schedule
-+##  Mangles %xmm0
-+##    when encrypting, outputs out(%xmm0) ^ 63
-+##    when decrypting, outputs unskew(%xmm0)
-+##
-+##  Always called right before return... jumps to cleanup and exits
-+##
-+.align        4
-+.Lschedule_mangle_last:
-+      // schedule last round key from xmm0
-+      adrp    x11, :pg_hi21:.Lk_deskew        // lea  .Lk_deskew(%rip),%r11   
# prepare to deskew
-+      add     x11, x11, :lo12:.Lk_deskew
-+
-+      cbnz    $dir, .Lschedule_mangle_last_dec
-+
-+      // encrypting
-+      ld1     {v1.2d}, [x8]                   // vmovdqa      (%r8,%r10),%xmm1
-+      adrp    x11, :pg_hi21:.Lk_opt           // lea  .Lk_opt(%rip),  %r11    
        # prepare to output transform
-+      add     x11, x11, :lo12:.Lk_opt
-+      add     $out, $out, #32                 // add  \$32,   %rdx
-+      tbl     v0.16b, {v0.16b}, v1.16b        // vpshufb      %xmm1,  %xmm0,  
%xmm0           # output permute
-+
-+.Lschedule_mangle_last_dec:
-+      ld1     {v20.2d-v21.2d}, [x11]          // reload constants
-+      sub     $out, $out, #16                 // add  \$-16,  %rdx
-+      eor     v0.16b, v0.16b, v16.16b         // vpxor        .Lk_s63(%rip),  
%xmm0,  %xmm0
-+      bl      _vpaes_schedule_transform       // output transform
-+      st1     {v0.2d}, [$out]                 // vmovdqu      %xmm0,  (%rdx)  
        # save last key
-+
-+      // cleanup
-+      eor     v0.16b, v0.16b, v0.16b          // vpxor        %xmm0,  %xmm0,  
%xmm0
-+      eor     v1.16b, v1.16b, v1.16b          // vpxor        %xmm1,  %xmm1,  
%xmm1
-+      eor     v2.16b, v2.16b, v2.16b          // vpxor        %xmm2,  %xmm2,  
%xmm2
-+      eor     v3.16b, v3.16b, v3.16b          // vpxor        %xmm3,  %xmm3,  
%xmm3
-+      eor     v4.16b, v4.16b, v4.16b          // vpxor        %xmm4,  %xmm4,  
%xmm4
-+      eor     v5.16b, v5.16b, v5.16b          // vpxor        %xmm5,  %xmm5,  
%xmm5
-+      eor     v6.16b, v6.16b, v6.16b          // vpxor        %xmm6,  %xmm6,  
%xmm6
-+      eor     v7.16b, v7.16b, v7.16b          // vpxor        %xmm7,  %xmm7,  
%xmm7
-+      ldp     x29, x30, [sp],#16
-+      AARCH64_VALIDATE_LINK_REGISTER
-+      ret
-+.size _vpaes_schedule_core,.-_vpaes_schedule_core
-+
-+##
-+##  .aes_schedule_192_smear
-+##
-+##  Smear the short, low side in the 192-bit key schedule.
-+##
-+##  Inputs:
-+##    %xmm7: high side, b  a  x  y
-+##    %xmm6:  low side, d  c  0  0
-+##    %xmm13: 0
-+##
-+##  Outputs:
-+##    %xmm6: b+c+d  b+c  0  0
-+##    %xmm0: b+c+d  b+c  b  a
-+##
-+.type _vpaes_schedule_192_smear,%function
-+.align        4
-+_vpaes_schedule_192_smear:
-+      movi    v1.16b, #0
-+      dup     v0.4s, v7.s[3]
-+      ins     v1.s[3], v6.s[2]        // vpshufd      \$0x80, %xmm6,  %xmm1   
# d c 0 0 -> c 0 0 0
-+      ins     v0.s[0], v7.s[2]        // vpshufd      \$0xFE, %xmm7,  %xmm0   
# b a _ _ -> b b b a
-+      eor     v6.16b, v6.16b, v1.16b  // vpxor        %xmm1,  %xmm6,  %xmm6   
# -> c+d c 0 0
-+      eor     v1.16b, v1.16b, v1.16b  // vpxor        %xmm1,  %xmm1,  %xmm1
-+      eor     v6.16b, v6.16b, v0.16b  // vpxor        %xmm0,  %xmm6,  %xmm6   
# -> b+c+d b+c b a
-+      mov     v0.16b, v6.16b          // vmovdqa      %xmm6,  %xmm0
-+      ins     v6.d[0], v1.d[0]        // vmovhlps     %xmm1,  %xmm6,  %xmm6   
# clobber low side with zeros
-+      ret
-+.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
-+
-+##
-+##  .aes_schedule_round
-+##
-+##  Runs one main round of the key schedule on %xmm0, %xmm7
-+##
-+##  Specifically, runs subbytes on the high dword of %xmm0
-+##  then rotates it by one byte and xors into the low dword of
-+##  %xmm7.
-+##
-+##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
-+##  next rcon.
-+##
-+##  Smears the dwords of %xmm7 by xoring the low into the
-+##  second low, result into third, result into highest.
-+##
-+##  Returns results in %xmm7 = %xmm0.
-+##  Clobbers %xmm1-%xmm4, %r11.
-+##
-+.type _vpaes_schedule_round,%function
-+.align        4
-+_vpaes_schedule_round:
-+      // extract rcon from xmm8
-+      movi    v4.16b, #0                      // vpxor        %xmm4,  %xmm4,  
%xmm4
-+      ext     v1.16b, $rcon, v4.16b, #15      // vpalignr     \$15,   %xmm8,  
%xmm4,  %xmm1
-+      ext     $rcon, $rcon, $rcon, #15        // vpalignr     \$15,   %xmm8,  
%xmm8,  %xmm8
-+      eor     v7.16b, v7.16b, v1.16b          // vpxor        %xmm1,  %xmm7,  
%xmm7
-+
-+      // rotate
-+      dup     v0.4s, v0.s[3]                  // vpshufd      \$0xFF, %xmm0,  
%xmm0
-+      ext     v0.16b, v0.16b, v0.16b, #1      // vpalignr     \$1,    %xmm0,  
%xmm0,  %xmm0
-+
-+      // fall through...
-+
-+      // low round: same as high round, but no rotation and no rcon.
-+_vpaes_schedule_low_round:
-+      // smear xmm7
-+      ext     v1.16b, v4.16b, v7.16b, #12     // vpslldq      \$4,    %xmm7,  
%xmm1
-+      eor     v7.16b, v7.16b, v1.16b          // vpxor        %xmm1,  %xmm7,  
%xmm7
-+      ext     v4.16b, v4.16b, v7.16b, #8      // vpslldq      \$8,    %xmm7,  
%xmm4
-+
-+      // subbytes
-+      and     v1.16b, v0.16b, v17.16b         // vpand        %xmm9,  %xmm0,  
%xmm1           # 0 = k
-+      ushr    v0.16b, v0.16b, #4              // vpsrlb       \$4,    %xmm0,  
%xmm0           # 1 = i
-+       eor    v7.16b, v7.16b, v4.16b          // vpxor        %xmm4,  %xmm7,  
%xmm7
-+      tbl     v2.16b, {$invhi}, v1.16b        // vpshufb      %xmm1,  %xmm11, 
%xmm2           # 2 = a/k
-+      eor     v1.16b, v1.16b, v0.16b          // vpxor        %xmm0,  %xmm1,  
%xmm1           # 0 = j
-+      tbl     v3.16b, {$invlo}, v0.16b        // vpshufb      %xmm0,  %xmm10, 
%xmm3           # 3 = 1/i
-+      eor     v3.16b, v3.16b, v2.16b          // vpxor        %xmm2,  %xmm3,  
%xmm3           # 3 = iak = 1/i + a/k
-+      tbl     v4.16b, {$invlo}, v1.16b        // vpshufb      %xmm1,  %xmm10, 
%xmm4           # 4 = 1/j
-+       eor    v7.16b, v7.16b, v16.16b         // vpxor        .Lk_s63(%rip),  
%xmm7,  %xmm7
-+      tbl     v3.16b, {$invlo}, v3.16b        // vpshufb      %xmm3,  %xmm10, 
%xmm3           # 2 = 1/iak
-+      eor     v4.16b, v4.16b, v2.16b          // vpxor        %xmm2,  %xmm4,  
%xmm4           # 4 = jak = 1/j + a/k
-+      tbl     v2.16b, {$invlo}, v4.16b        // vpshufb      %xmm4,  %xmm10, 
%xmm2           # 3 = 1/jak
-+      eor     v3.16b, v3.16b, v1.16b          // vpxor        %xmm1,  %xmm3,  
%xmm3           # 2 = io
-+      eor     v2.16b, v2.16b, v0.16b          // vpxor        %xmm0,  %xmm2,  
%xmm2           # 3 = jo
-+      tbl     v4.16b, {v23.16b}, v3.16b       // vpshufb      %xmm3,  %xmm13, 
%xmm4           # 4 = sbou
-+      tbl     v1.16b, {v22.16b}, v2.16b       // vpshufb      %xmm2,  %xmm12, 
%xmm1           # 0 = sb1t
-+      eor     v1.16b, v1.16b, v4.16b          // vpxor        %xmm4,  %xmm1,  
%xmm1           # 0 = sbox output
-+
-+      // add in smeared stuff
-+      eor     v0.16b, v1.16b, v7.16b          // vpxor        %xmm7,  %xmm1,  
%xmm0
-+      eor     v7.16b, v1.16b, v7.16b          // vmovdqa      %xmm0,  %xmm7
-+      ret
-+.size _vpaes_schedule_round,.-_vpaes_schedule_round
-+
-+##
-+##  .aes_schedule_transform
-+##
-+##  Linear-transform %xmm0 according to tables at (%r11)
-+##
-+##  Requires that %xmm9 = 0x0F0F... as in preheat
-+##  Output in %xmm0
-+##  Clobbers %xmm1, %xmm2
-+##
-+.type _vpaes_schedule_transform,%function
-+.align        4
-+_vpaes_schedule_transform:
-+      and     v1.16b, v0.16b, v17.16b         // vpand        %xmm9,  %xmm0,  
%xmm1
-+      ushr    v0.16b, v0.16b, #4              // vpsrlb       \$4,    %xmm0,  
%xmm0
-+                                              // vmovdqa      (%r11), %xmm2   
# lo
-+      tbl     v2.16b, {$iptlo}, v1.16b        // vpshufb      %xmm1,  %xmm2,  
%xmm2
-+                                              // vmovdqa      16(%r11),       
%xmm1 # hi
-+      tbl     v0.16b, {$ipthi}, v0.16b        // vpshufb      %xmm0,  %xmm1,  
%xmm0
-+      eor     v0.16b, v0.16b, v2.16b          // vpxor        %xmm2,  %xmm0,  
%xmm0
-+      ret
-+.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
-+
-+##
-+##  .aes_schedule_mangle
-+##
-+##  Mangle xmm0 from (basis-transformed) standard version
-+##  to our version.
-+##
-+##  On encrypt,
-+##    xor with 0x63
-+##    multiply by circulant 0,1,1,1
-+##    apply shiftrows transform
-+##
-+##  On decrypt,
-+##    xor with 0x63
-+##    multiply by "inverse mixcolumns" circulant E,B,D,9
-+##    deskew
-+##    apply shiftrows transform
-+##
-+##
-+##  Writes out to (%rdx), and increments or decrements it
-+##  Keeps track of round number mod 4 in %r8
-+##  Preserves xmm0
-+##  Clobbers xmm1-xmm5
-+##
-+.type _vpaes_schedule_mangle,%function
-+.align        4
-+_vpaes_schedule_mangle:
-+      mov     v4.16b, v0.16b                  // vmovdqa      %xmm0,  %xmm4   
# save xmm0 for later
-+                                              // vmovdqa      
.Lk_mc_forward(%rip),%xmm5
-+
-+      // encrypting
-+      eor     v4.16b, v0.16b, v16.16b         // vpxor        .Lk_s63(%rip),  
%xmm0,  %xmm4
-+      add     $out, $out, #16                 // add  \$16,   %rdx
-+      tbl     v4.16b, {v4.16b}, v9.16b        // vpshufb      %xmm5,  %xmm4,  
%xmm4
-+      tbl     v1.16b, {v4.16b}, v9.16b        // vpshufb      %xmm5,  %xmm4,  
%xmm1
-+      tbl     v3.16b, {v1.16b}, v9.16b        // vpshufb      %xmm5,  %xmm1,  
%xmm3
-+      eor     v4.16b, v4.16b, v1.16b          // vpxor        %xmm1,  %xmm4,  
%xmm4
-+      ld1     {v1.2d}, [x8]                   // vmovdqa      (%r8,%r10),     
%xmm1
-+      eor     v3.16b, v3.16b, v4.16b          // vpxor        %xmm4,  %xmm3,  
%xmm3
-+
-+.Lschedule_mangle_both:
-+      tbl     v3.16b, {v3.16b}, v1.16b        // vpshufb      %xmm1,  %xmm3,  
%xmm3
-+      add     x8, x8, #64-16                  // add  \$-16,  %r8
-+      and     x8, x8, #~(1<<6)                // and  \$0x30, %r8
-+      st1     {v3.2d}, [$out]                 // vmovdqu      %xmm3,  (%rdx)
-+      ret
-+.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
-+
-+.globl        GFp_vpaes_set_encrypt_key
-+.type GFp_vpaes_set_encrypt_key,%function
-+.align        4
-+GFp_vpaes_set_encrypt_key:
-+      AARCH64_SIGN_LINK_REGISTER
-+      stp     x29,x30,[sp,#-16]!
-+      add     x29,sp,#0
-+      stp     d8,d9,[sp,#-16]!        // ABI spec says so
-+
-+      lsr     w9, $bits, #5           // shr  \$5,%eax
-+      add     w9, w9, #5              // \$5,%eax
-+      str     w9, [$out,#240]         // mov  %eax,240(%rdx)  # 
AES_KEY->rounds = nbits/32+5;
-+
-+      mov     $dir, #0                // mov  \$0,%ecx
-+      mov     x8, #0x30               // mov  \$0x30,%r8d
-+      bl      _vpaes_schedule_core
-+      eor     x0, x0, x0
-+
-+      ldp     d8,d9,[sp],#16
-+      ldp     x29,x30,[sp],#16
-+      AARCH64_VALIDATE_LINK_REGISTER
-+      ret
-+.size GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
-+___
-+}
-+{
-+my ($inp,$out,$len,$key,$ivec) = map("x$_",(0..4));
-+my ($ctr, $ctr_tmp) = ("w6", "w7");
-+
-+# void GFp_vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t 
len,
-+#                                     const AES_KEY *key, const uint8_t 
ivec[16]);
-+$code.=<<___;
-+.globl        GFp_vpaes_ctr32_encrypt_blocks
-+.type GFp_vpaes_ctr32_encrypt_blocks,%function
-+.align        4
-+GFp_vpaes_ctr32_encrypt_blocks:
-+      AARCH64_SIGN_LINK_REGISTER
-+      stp     x29,x30,[sp,#-16]!
-+      add     x29,sp,#0
-+      stp     d8,d9,[sp,#-16]!        // ABI spec says so
-+      stp     d10,d11,[sp,#-16]!
-+      stp     d12,d13,[sp,#-16]!
-+      stp     d14,d15,[sp,#-16]!
-+
-+      cbz     $len, .Lctr32_done
-+
-+      // Note, unlike the other functions, $len here is measured in blocks,
-+      // not bytes.
-+      mov     x17, $len
-+      mov     x2,  $key
-+
-+      // Load the IV and counter portion.
-+      ldr     $ctr, [$ivec, #12]
-+      ld1     {v7.16b}, [$ivec]
-+
-+      bl      _vpaes_encrypt_preheat
-+      tst     x17, #1
-+      rev     $ctr, $ctr              // The counter is big-endian.
-+      b.eq    .Lctr32_prep_loop
-+
-+      // Handle one block so the remaining block count is even for
-+      // _vpaes_encrypt_2x.
-+      ld1     {v6.16b}, [$inp], #16   // Load input ahead of time
-+      bl      _vpaes_encrypt_core
-+      eor     v0.16b, v0.16b, v6.16b  // XOR input and result
-+      st1     {v0.16b}, [$out], #16
-+      subs    x17, x17, #1
-+      // Update the counter.
-+      add     $ctr, $ctr, #1
-+      rev     $ctr_tmp, $ctr
-+      mov     v7.s[3], $ctr_tmp
-+      b.ls    .Lctr32_done
-+
-+.Lctr32_prep_loop:
-+      // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
-+      // uses v14 and v15.
-+      mov     v15.16b, v7.16b
-+      mov     v14.16b, v7.16b
-+      add     $ctr, $ctr, #1
-+      rev     $ctr_tmp, $ctr
-+      mov     v15.s[3], $ctr_tmp
-+
-+.Lctr32_loop:
-+      ld1     {v6.16b,v7.16b}, [$inp], #32    // Load input ahead of time
-+      bl      _vpaes_encrypt_2x
-+      eor     v0.16b, v0.16b, v6.16b          // XOR input and result
-+      eor     v1.16b, v1.16b, v7.16b          // XOR input and result (#2)
-+      st1     {v0.16b,v1.16b}, [$out], #32
-+      subs    x17, x17, #2
-+      // Update the counter.
-+      add     $ctr_tmp, $ctr, #1
-+      add     $ctr, $ctr, #2
-+      rev     $ctr_tmp, $ctr_tmp
-+      mov     v14.s[3], $ctr_tmp
-+      rev     $ctr_tmp, $ctr
-+      mov     v15.s[3], $ctr_tmp
-+      b.hi    .Lctr32_loop
-+
-+.Lctr32_done:
-+      ldp     d14,d15,[sp],#16
-+      ldp     d12,d13,[sp],#16
-+      ldp     d10,d11,[sp],#16
-+      ldp     d8,d9,[sp],#16
-+      ldp     x29,x30,[sp],#16
-+      AARCH64_VALIDATE_LINK_REGISTER
-+      ret
-+.size GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
-+___
-+}
-+
-+print $code;
-+
-+close STDOUT or die "error closing STDOUT";
-diff --git a/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl 
b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-new file mode 100644
-index 0000000..7e52ad6
---- /dev/null
-+++ b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-@@ -0,0 +1,294 @@
-+#! /usr/bin/env perl
-+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# This file was adapted to AArch64 from the 32-bit version in ghash-armv4.pl. 
It
-+# implements the multiplication algorithm described in:
-+#
-+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
-+# Polynomial Multiplication on ARM Processors using the NEON Engine.
-+#
-+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
-+#
-+# The main distinction to keep in mind between 32-bit NEON and AArch64 SIMD is
-+# AArch64 cannot compute over the upper halves of SIMD registers. In 32-bit
-+# NEON, the low and high halves of the 128-bit register q0 are accessible as
-+# 64-bit registers d0 and d1, respectively. In AArch64, dN is the lower half 
of
-+# vN. Where the 32-bit version would use the upper half, this file must keep
-+# halves in separate registers.
-+#
-+# The other distinction is in syntax. 32-bit NEON embeds lane information in 
the
-+# instruction name, while AArch64 uses suffixes on the registers. For 
instance,
-+# left-shifting 64-bit lanes of a SIMD register in 32-bit would be written:
-+#
-+#     vshl.i64 q0, q0, #1
-+#
-+# in 64-bit, it would be written:
-+#
-+#     shl v0.2d, v0.2d, #1
-+#
-+# See Programmer's Guide for ARMv8-A, section 7 for details.
-+# 
http://infocenter.arm.com/help/topic/com.arm.doc.den0024a/DEN0024A_v8_architecture_PG.pdf
-+#
-+# Finally, note the 8-bit and 64-bit polynomial multipliers in AArch64 differ
-+# only by suffix. pmull vR.8h, vA.8b, vB.8b multiplies eight 8-bit polynomials
-+# and is always available. pmull vR.1q, vA.1d, vB.1d multiplies a 64-bit
-+# polynomial and is conditioned on the PMULL extension. This file emulates the
-+# latter with the former.
-+
-+use strict;
-+
-+my $flavour = shift;
-+my $output;
-+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
-+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
-+
-+if ($flavour && $flavour ne "void") {
-+    $0 =~ m/(.*[\/\\])[^\/\\]+$/;
-+    my $dir = $1;
-+    my $xlate;
-+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+    die "can't locate arm-xlate.pl";
-+
-+    open OUT,"| \"$^X\" $xlate $flavour $output";
-+    *STDOUT=*OUT;
-+} else {
-+    open OUT,">$output";
-+    *STDOUT=*OUT;
-+}
-+
-+my ($Xi, $Htbl, $inp, $len) = map("x$_", (0..3));     # argument block
-+my ($Xl, $Xm, $Xh, $INlo, $INhi) = map("v$_", (0..4));
-+my ($Hlo, $Hhi, $Hhl) = map("v$_", (5..7));
-+# d8-d15 are callee-saved, so avoid v8-v15. AArch64 SIMD has plenty of 
registers
-+# to spare.
-+my ($t0, $t1, $t2, $t3) = map("v$_", (16..19));
-+my ($t0l_t1l, $t0h_t1h, $t2l_t3l, $t2h_t3h) = map("v$_", (20..23));
-+my ($k48_k32, $k16_k0) = map("v$_", (24..25));
-+
-+my $code = "";
-+
-+# clmul64x64 emits code which emulates pmull $r.1q, $a.1d, $b.1d. $r, $a, and 
$b
-+# must be distinct from $t* and $k*. $t* are clobbered by the emitted code.
-+sub clmul64x64 {
-+my ($r, $a, $b) = @_;
-+$code .= <<___;
-+      ext     $t0.8b, $a.8b, $a.8b, #1        // A1
-+      pmull   $t0.8h, $t0.8b, $b.8b           // F = A1*B
-+      ext     $r.8b, $b.8b, $b.8b, #1         // B1
-+      pmull   $r.8h, $a.8b, $r.8b             // E = A*B1
-+      ext     $t1.8b, $a.8b, $a.8b, #2        // A2
-+      pmull   $t1.8h, $t1.8b, $b.8b           // H = A2*B
-+      ext     $t3.8b, $b.8b, $b.8b, #2        // B2
-+      pmull   $t3.8h, $a.8b, $t3.8b           // G = A*B2
-+      ext     $t2.8b, $a.8b, $a.8b, #3        // A3
-+      eor     $t0.16b, $t0.16b, $r.16b        // L = E + F
-+      pmull   $t2.8h, $t2.8b, $b.8b           // J = A3*B
-+      ext     $r.8b, $b.8b, $b.8b, #3         // B3
-+      eor     $t1.16b, $t1.16b, $t3.16b       // M = G + H
-+      pmull   $r.8h, $a.8b, $r.8b             // I = A*B3
-+
-+      // Here we diverge from the 32-bit version. It computes the following
-+      // (instructions reordered for clarity):
-+      //
-+      //     veor     \$t0#lo, \$t0#lo, \$t0#hi       @ t0 = P0 + P1 (L)
-+      //     vand     \$t0#hi, \$t0#hi, \$k48
-+      //     veor     \$t0#lo, \$t0#lo, \$t0#hi
-+      //
-+      //     veor     \$t1#lo, \$t1#lo, \$t1#hi       @ t1 = P2 + P3 (M)
-+      //     vand     \$t1#hi, \$t1#hi, \$k32
-+      //     veor     \$t1#lo, \$t1#lo, \$t1#hi
-+      //
-+      //     veor     \$t2#lo, \$t2#lo, \$t2#hi       @ t2 = P4 + P5 (N)
-+      //     vand     \$t2#hi, \$t2#hi, \$k16
-+      //     veor     \$t2#lo, \$t2#lo, \$t2#hi
-+      //
-+      //     veor     \$t3#lo, \$t3#lo, \$t3#hi       @ t3 = P6 + P7 (K)
-+      //     vmov.i64 \$t3#hi, #0
-+      //
-+      // \$kN is a mask with the bottom N bits set. AArch64 cannot compute on
-+      // upper halves of SIMD registers, so we must split each half into
-+      // separate registers. To compensate, we pair computations up and
-+      // parallelize.
-+
-+      ext     $t3.8b, $b.8b, $b.8b, #4        // B4
-+      eor     $t2.16b, $t2.16b, $r.16b        // N = I + J
-+      pmull   $t3.8h, $a.8b, $t3.8b           // K = A*B4
-+
-+      // This can probably be scheduled more efficiently. For now, we just
-+      // pair up independent instructions.
-+      zip1    $t0l_t1l.2d, $t0.2d, $t1.2d
-+      zip1    $t2l_t3l.2d, $t2.2d, $t3.2d
-+      zip2    $t0h_t1h.2d, $t0.2d, $t1.2d
-+      zip2    $t2h_t3h.2d, $t2.2d, $t3.2d
-+      eor     $t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
-+      eor     $t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
-+      and     $t0h_t1h.16b, $t0h_t1h.16b, $k48_k32.16b
-+      and     $t2h_t3h.16b, $t2h_t3h.16b, $k16_k0.16b
-+      eor     $t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
-+      eor     $t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
-+      zip1    $t0.2d, $t0l_t1l.2d, $t0h_t1h.2d
-+      zip1    $t2.2d, $t2l_t3l.2d, $t2h_t3h.2d
-+      zip2    $t1.2d, $t0l_t1l.2d, $t0h_t1h.2d
-+      zip2    $t3.2d, $t2l_t3l.2d, $t2h_t3h.2d
-+
-+      ext     $t0.16b, $t0.16b, $t0.16b, #15  // t0 = t0 << 8
-+      ext     $t1.16b, $t1.16b, $t1.16b, #14  // t1 = t1 << 16
-+      pmull   $r.8h, $a.8b, $b.8b             // D = A*B
-+      ext     $t3.16b, $t3.16b, $t3.16b, #12  // t3 = t3 << 32
-+      ext     $t2.16b, $t2.16b, $t2.16b, #13  // t2 = t2 << 24
-+      eor     $t0.16b, $t0.16b, $t1.16b
-+      eor     $t2.16b, $t2.16b, $t3.16b
-+      eor     $r.16b, $r.16b, $t0.16b
-+      eor     $r.16b, $r.16b, $t2.16b
-+___
-+}
-+
-+$code .= <<___;
-+#include <GFp/arm_arch.h>
-+
-+.text
-+
-+.global       GFp_gcm_init_neon
-+.type GFp_gcm_init_neon,%function
-+.align        4
-+GFp_gcm_init_neon:
-+      AARCH64_VALID_CALL_TARGET
-+      // This function is adapted from gcm_init_v8. xC2 is t3.
-+      ld1     {$t1.2d}, [x1]                  // load H
-+      movi    $t3.16b, #0xe1
-+      shl     $t3.2d, $t3.2d, #57             // 0xc2.0
-+      ext     $INlo.16b, $t1.16b, $t1.16b, #8
-+      ushr    $t2.2d, $t3.2d, #63
-+      dup     $t1.4s, $t1.s[1]
-+      ext     $t0.16b, $t2.16b, $t3.16b, #8   // t0=0xc2....01
-+      ushr    $t2.2d, $INlo.2d, #63
-+      sshr    $t1.4s, $t1.4s, #31             // broadcast carry bit
-+      and     $t2.16b, $t2.16b, $t0.16b
-+      shl     $INlo.2d, $INlo.2d, #1
-+      ext     $t2.16b, $t2.16b, $t2.16b, #8
-+      and     $t0.16b, $t0.16b, $t1.16b
-+      orr     $INlo.16b, $INlo.16b, $t2.16b   // H<<<=1
-+      eor     $Hlo.16b, $INlo.16b, $t0.16b    // twisted H
-+      st1     {$Hlo.2d}, [x0]                 // store Htable[0]
-+      ret
-+.size GFp_gcm_init_neon,.-GFp_gcm_init_neon
-+
-+.global       GFp_gcm_gmult_neon
-+.type GFp_gcm_gmult_neon,%function
-+.align        4
-+GFp_gcm_gmult_neon:
-+      AARCH64_VALID_CALL_TARGET
-+      ld1     {$INlo.16b}, [$Xi]              // load Xi
-+      ld1     {$Hlo.1d}, [$Htbl], #8          // load twisted H
-+      ld1     {$Hhi.1d}, [$Htbl]
-+      adrp    x9, :pg_hi21:.Lmasks            // load constants
-+      add     x9, x9, :lo12:.Lmasks
-+      ld1     {$k48_k32.2d, $k16_k0.2d}, [x9]
-+      rev64   $INlo.16b, $INlo.16b            // byteswap Xi
-+      ext     $INlo.16b, $INlo.16b, $INlo.16b, #8
-+      eor     $Hhl.8b, $Hlo.8b, $Hhi.8b       // Karatsuba pre-processing
-+
-+      mov     $len, #16
-+      b       .Lgmult_neon
-+.size GFp_gcm_gmult_neon,.-GFp_gcm_gmult_neon
-+
-+.global       GFp_gcm_ghash_neon
-+.type GFp_gcm_ghash_neon,%function
-+.align        4
-+GFp_gcm_ghash_neon:
-+      AARCH64_VALID_CALL_TARGET
-+      ld1     {$Xl.16b}, [$Xi]                // load Xi
-+      ld1     {$Hlo.1d}, [$Htbl], #8          // load twisted H
-+      ld1     {$Hhi.1d}, [$Htbl]
-+      adrp    x9, :pg_hi21:.Lmasks            // load constants
-+      add     x9, x9, :lo12:.Lmasks
-+      ld1     {$k48_k32.2d, $k16_k0.2d}, [x9]
-+      rev64   $Xl.16b, $Xl.16b                // byteswap Xi
-+      ext     $Xl.16b, $Xl.16b, $Xl.16b, #8
-+      eor     $Hhl.8b, $Hlo.8b, $Hhi.8b       // Karatsuba pre-processing
-+
-+.Loop_neon:
-+      ld1     {$INlo.16b}, [$inp], #16        // load inp
-+      rev64   $INlo.16b, $INlo.16b            // byteswap inp
-+      ext     $INlo.16b, $INlo.16b, $INlo.16b, #8
-+      eor     $INlo.16b, $INlo.16b, $Xl.16b   // inp ^= Xi
-+
-+.Lgmult_neon:
-+      // Split the input into $INlo and $INhi. (The upper halves are unused,
-+      // so it is okay to leave them alone.)
-+      ins     $INhi.d[0], $INlo.d[1]
-+___
-+&clmul64x64   ($Xl, $Hlo, $INlo);             # H.lo·Xi.lo
-+$code .= <<___;
-+      eor     $INlo.8b, $INlo.8b, $INhi.8b    // Karatsuba pre-processing
-+___
-+&clmul64x64   ($Xm, $Hhl, $INlo);             # (H.lo+H.hi)·(Xi.lo+Xi.hi)
-+&clmul64x64   ($Xh, $Hhi, $INhi);             # H.hi·Xi.hi
-+$code .= <<___;
-+      ext     $t0.16b, $Xl.16b, $Xh.16b, #8
-+      eor     $Xm.16b, $Xm.16b, $Xl.16b       // Karatsuba post-processing
-+      eor     $Xm.16b, $Xm.16b, $Xh.16b
-+      eor     $Xm.16b, $Xm.16b, $t0.16b       // Xm overlaps Xh.lo and Xl.hi
-+      ins     $Xl.d[1], $Xm.d[0]              // Xh|Xl - 256-bit result
-+      // This is a no-op due to the ins instruction below.
-+      // ins  $Xh.d[0], $Xm.d[1]
-+
-+      // equivalent of reduction_avx from ghash-x86_64.pl
-+      shl     $t1.2d, $Xl.2d, #57             // 1st phase
-+      shl     $t2.2d, $Xl.2d, #62
-+      eor     $t2.16b, $t2.16b, $t1.16b       //
-+      shl     $t1.2d, $Xl.2d, #63
-+      eor     $t2.16b, $t2.16b, $t1.16b       //
-+      // Note Xm contains {Xl.d[1], Xh.d[0]}.
-+      eor     $t2.16b, $t2.16b, $Xm.16b
-+      ins     $Xl.d[1], $t2.d[0]              // Xl.d[1] ^= t2.d[0]
-+      ins     $Xh.d[0], $t2.d[1]              // Xh.d[0] ^= t2.d[1]
-+
-+      ushr    $t2.2d, $Xl.2d, #1              // 2nd phase
-+      eor     $Xh.16b, $Xh.16b,$Xl.16b
-+      eor     $Xl.16b, $Xl.16b,$t2.16b        //
-+      ushr    $t2.2d, $t2.2d, #6
-+      ushr    $Xl.2d, $Xl.2d, #1              //
-+      eor     $Xl.16b, $Xl.16b, $Xh.16b       //
-+      eor     $Xl.16b, $Xl.16b, $t2.16b       //
-+
-+      subs    $len, $len, #16
-+      bne     .Loop_neon
-+
-+      rev64   $Xl.16b, $Xl.16b                // byteswap Xi and write
-+      ext     $Xl.16b, $Xl.16b, $Xl.16b, #8
-+      st1     {$Xl.16b}, [$Xi]
-+
-+      ret
-+.size GFp_gcm_ghash_neon,.-GFp_gcm_ghash_neon
-+
-+.section      .rodata
-+.align        4
-+.Lmasks:
-+.quad 0x0000ffffffffffff      // k48
-+.quad 0x00000000ffffffff      // k32
-+.quad 0x000000000000ffff      // k16
-+.quad 0x0000000000000000      // k0
-+.asciz  "GHASH for ARMv8, derived from ARMv4 version by <appro\@openssl.org>"
-+.align  2
-+___
-+
-+foreach (split("\n",$code)) {
-+      s/\`([^\`]*)\`/eval $1/geo;
-+
-+      print $_,"\n";
-+}
-+close STDOUT or die "error closing STDOUT"; # enforce flush
--- 
-Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
-GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
-Confidentiality cannot be guaranteed on emails sent or received unencrypted
-
diff --git a/gnu/packages/patches/rust-ring-0.16-test-files.patch 
b/gnu/packages/patches/rust-ring-0.16-test-files.patch
deleted file mode 100644
index dbe5c0f4ee..0000000000
--- a/gnu/packages/patches/rust-ring-0.16-test-files.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-This file exists in the upstream repository at the commit which
-corresponds to the ring-0.16.20 release, but was excluded from the
-release tarball.
-
----
- tests/ed25519_verify_tests.txt | 34 ++++++++++++++++++++++++++++++++++
- 1 file changed, 34 insertions(+)
- create mode 100644 tests/ed25519_verify_tests.txt
-
-diff --git a/tests/ed25519_verify_tests.txt b/tests/ed25519_verify_tests.txt
-new file mode 100644
-index 0000000..74c94b3
---- /dev/null
-+++ b/tests/ed25519_verify_tests.txt
-@@ -0,0 +1,34 @@
-+# BoringSSL TEST(Ed25519Test Malleability)
-+
-+# Control; S is in range.
-+MESSAGE = 54657374
-+SIG = 
7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab07a9155711ecfaf7f99f277bad0c6ae7e39d4eef676573336a5c51eb6f946b30d
-+PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
-+Result = P
-+
-+# Same as above, but with the order L added to S so it is out of range.
-+# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
-+MESSAGE = 54657374
-+SIG = 
7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab067654bce3832c2d76f8f6f5dafc08d9339d4eef676573336a5c51eb6f946b31d
-+PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
-+Result = F
-+
-+
-+# BoringSSL commit 3094902fcdc2db2cc832fa854b9a6a8be383926c
-+MESSAGE = 124e583f8b8eca58bb29c271b41d36986bbc45541f8e51f9cb0133eca447601e
-+SIG = 
dac119d6ca87fc59ae611c157048f4d4fc932a149dbe20ec6effd1436abf83ea05c7df0fef06147241259113909bc71bd3c53ba4464ffcad3c0968f2ffffff0f
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+Result = P
-+
-+# Control. Same key as above; same message and signature as below, except S 
is in range.
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
-+SIG = 
0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4f58ac9d20ec563133cabc6230b1db8625f8446639ede46ad4df4053000000000
-+Result = P
-+
-+# Same key as above, but S is out of range.
-+# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
-+SIG = 
0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4e25ebf2f2928766b1248bec6e91697775f8446639ede46ad4df4053000000010
-+Result = F
--- 
-Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
-GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
-Confidentiality cannot be guaranteed on emails sent or received unencrypted
-
diff --git a/gnu/packages/rust-apps.scm b/gnu/packages/rust-apps.scm
index 77e4f24f50..0675fee658 100644
--- a/gnu/packages/rust-apps.scm
+++ b/gnu/packages/rust-apps.scm
@@ -114,8 +114,6 @@
         ("rust-tokio" ,rust-tokio-1)
         ("rust-tokio-rustls" ,rust-tokio-rustls-0.22)
         ("rust-url" ,rust-url-2))))
-    (native-inputs
-     (list perl))
     (home-page "https://github.com/mbrubeck/agate";)
     (synopsis "Very simple server for the Gemini hypertext protocol")
     (description
@@ -192,7 +190,7 @@ low-end hardware and serving many concurrent requests.")
        (("rust-serde-bytes" ,rust-serde-bytes-0.11)
         ("rust-serde-derive" ,rust-serde-derive-1))))
     (native-inputs
-     (list perl pkg-config))
+     (list pkg-config))
     (inputs
      (list at-spi2-core
            gtk
@@ -1193,8 +1191,7 @@ on the terminal in a visually appealing way.")
      (list python-tomli))
     (inputs (list bzip2))
     (native-inputs
-     (list perl
-           python-wheel
+     (list python-wheel
            python-wrapper
            python-setuptools-rust))
     (home-page "https://github.com/pyo3/maturin";)
@@ -1504,7 +1501,7 @@ browsers.")
              (substitute* "Cargo.toml"
                ((".*\"vendored-libgit2\".*") "")))))))
     (native-inputs
-     (list perl pkg-config))
+     (list pkg-config))
     (inputs
      (list libgit2-1.4
            libssh2
@@ -1695,8 +1692,6 @@ rebase.")
        (("rust-boxxy" ,rust-boxxy-0.12))))
     (inputs
      (list libpcap libseccomp))
-    (native-inputs
-     (list perl))
     (home-page "https://github.com/kpcyrd/sniffglue";)
     (synopsis "Secure multithreaded packet sniffer")
     (description
@@ -2366,7 +2361,7 @@ consecutive lines and since program start.")
                         ;"dbus_mpris"   ; Conflicts with rust-chrono-0.4 
version.
                         "pulseaudio_backend"
                         "rodio_backend")))
-    (native-inputs (list perl pkg-config))
+    (native-inputs (list pkg-config))
     (inputs (list alsa-lib dbus pulseaudio))
     (home-page "https://github.com/Spotifyd/spotifyd";)
     (synopsis "Spotify streaming daemon with Spotify Connect support")
@@ -2543,7 +2538,7 @@ daemon which executes them.")
         ("rust-predicates" ,rust-predicates-2)
         ("rust-tempfile" ,rust-tempfile-3))))
     (native-inputs
-     (list perl pkg-config))
+     (list pkg-config))
     (inputs
      (list openssl))
     (home-page "https://github.com/dbrgn/tealdeer/";)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]